diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 19:13:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 19:13:28 -0400 |
commit | 1a4a2bc460721bc8f91e4c1294d39b38e5af132f (patch) | |
tree | fe646d05f6e17f05601e0a32cc796bec718ab6e7 | |
parent | 110a9e42b68719f584879c5c5c727bbae90d15f9 (diff) | |
parent | 1ef55be16ed69538f89e0a6508be5e62fdc9851c (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull low-level x86 updates from Ingo Molnar:
"In this cycle this topic tree has become one of those 'super topics'
that accumulated a lot of changes:
- Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on
x86 - preceded by an array of changes. v4.8 saw preparatory changes
in this area already - this is the rest of the work. Includes the
thread stack caching performance optimization. (Andy Lutomirski)
- switch_to() cleanups and all around enhancements. (Brian Gerst)
- A large number of dumpstack infrastructure enhancements and an
unwinder abstraction. The secret long term plan is safe(r) live
patching plus maybe another attempt at debuginfo based unwinding -
but all these current bits are standalone enhancements in a frame
pointer based debug environment as well. (Josh Poimboeuf)
- More __ro_after_init and const annotations. (Kees Cook)
- Enable KASLR for the vmemmap memory region. (Thomas Garnier)"
[ The virtually mapped stack changes are pretty fundamental, and not
x86-specific per se, even if they are only used on x86 right now. ]
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits)
x86/asm: Get rid of __read_cr4_safe()
thread_info: Use unsigned long for flags
x86/alternatives: Add stack frame dependency to alternative_call_2()
x86/dumpstack: Fix show_stack() task pointer regression
x86/dumpstack: Remove dump_trace() and related callbacks
x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder
oprofile/x86: Convert x86_backtrace() to use the new unwinder
x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder
perf/x86: Convert perf_callchain_kernel() to use the new unwinder
x86/unwind: Add new unwind interface and implementations
x86/dumpstack: Remove NULL task pointer convention
fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y
sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK
lib/syscall: Pin the task stack in collect_syscall()
x86/process: Pin the target stack in get_wchan()
x86/dumpstack: Pin the target stack when dumping it
kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function
sched/core: Add try_get_task_stack() and put_task_stack()
x86/entry/64: Fix a minor comment rebase error
iommu/amd: Don't put completion-wait semaphore on stack
...
114 files changed, 1722 insertions, 1108 deletions
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index dd5f916b351d..a273dd0bbaaa 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt | |||
@@ -203,6 +203,17 @@ along to ftrace_push_return_trace() instead of a stub value of 0. | |||
203 | 203 | ||
204 | Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer. | 204 | Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer. |
205 | 205 | ||
206 | HAVE_FUNCTION_GRAPH_RET_ADDR_PTR | ||
207 | -------------------------------- | ||
208 | |||
209 | An arch may pass in a pointer to the return address on the stack. This | ||
210 | prevents potential stack unwinding issues where the unwinder gets out of | ||
211 | sync with ret_stack and the wrong addresses are reported by | ||
212 | ftrace_graph_ret_addr(). | ||
213 | |||
214 | Adding support for it is easy: just define the macro in asm/ftrace.h and | ||
215 | pass the return address pointer as the 'retp' argument to | ||
216 | ftrace_push_return_trace(). | ||
206 | 217 | ||
207 | HAVE_FTRACE_NMI_ENTER | 218 | HAVE_FTRACE_NMI_ENTER |
208 | --------------------- | 219 | --------------------- |
diff --git a/arch/Kconfig b/arch/Kconfig index fd6e9712af81..180ea33164dc 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -696,4 +696,38 @@ config ARCH_NO_COHERENT_DMA_MMAP | |||
696 | config CPU_NO_EFFICIENT_FFS | 696 | config CPU_NO_EFFICIENT_FFS |
697 | def_bool n | 697 | def_bool n |
698 | 698 | ||
699 | config HAVE_ARCH_VMAP_STACK | ||
700 | def_bool n | ||
701 | help | ||
702 | An arch should select this symbol if it can support kernel stacks | ||
703 | in vmalloc space. This means: | ||
704 | |||
705 | - vmalloc space must be large enough to hold many kernel stacks. | ||
706 | This may rule out many 32-bit architectures. | ||
707 | |||
708 | - Stacks in vmalloc space need to work reliably. For example, if | ||
709 | vmap page tables are created on demand, either this mechanism | ||
710 | needs to work while the stack points to a virtual address with | ||
711 | unpopulated page tables or arch code (switch_to() and switch_mm(), | ||
712 | most likely) needs to ensure that the stack's page table entries | ||
713 | are populated before running on a possibly unpopulated stack. | ||
714 | |||
715 | - If the stack overflows into a guard page, something reasonable | ||
716 | should happen. The definition of "reasonable" is flexible, but | ||
717 | instantly rebooting without logging anything would be unfriendly. | ||
718 | |||
719 | config VMAP_STACK | ||
720 | default y | ||
721 | bool "Use a virtually-mapped stack" | ||
722 | depends on HAVE_ARCH_VMAP_STACK && !KASAN | ||
723 | ---help--- | ||
724 | Enable this if you want the use virtually-mapped kernel stacks | ||
725 | with guard pages. This causes kernel stack overflows to be | ||
726 | caught immediately rather than causing difficult-to-diagnose | ||
727 | corruption. | ||
728 | |||
729 | This is presently incompatible with KASAN because KASAN expects | ||
730 | the stack to map directly to the KASAN shadow map using a formula | ||
731 | that is incorrect if the stack is in vmalloc space. | ||
732 | |||
699 | source "kernel/gcov/Kconfig" | 733 | source "kernel/gcov/Kconfig" |
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 709ee1d6d4df..3f1759411d51 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c | |||
@@ -218,7 +218,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
218 | } | 218 | } |
219 | 219 | ||
220 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, | 220 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, |
221 | frame_pointer); | 221 | frame_pointer, NULL); |
222 | if (err == -EBUSY) { | 222 | if (err == -EBUSY) { |
223 | *parent = old; | 223 | *parent = old; |
224 | return; | 224 | return; |
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 0f03a8fe2314..aef02d2af3b5 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S | |||
@@ -219,7 +219,7 @@ ENDPROC(ftrace_graph_caller) | |||
219 | * | 219 | * |
220 | * Run ftrace_return_to_handler() before going back to parent. | 220 | * Run ftrace_return_to_handler() before going back to parent. |
221 | * @fp is checked against the value passed by ftrace_graph_caller() | 221 | * @fp is checked against the value passed by ftrace_graph_caller() |
222 | * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. | 222 | * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled. |
223 | */ | 223 | */ |
224 | ENTRY(return_to_handler) | 224 | ENTRY(return_to_handler) |
225 | save_return_regs | 225 | save_return_regs |
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index ebecf9aa33d1..40ad08ac569a 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c | |||
@@ -138,7 +138,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
138 | return; | 138 | return; |
139 | 139 | ||
140 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, | 140 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, |
141 | frame_pointer); | 141 | frame_pointer, NULL); |
142 | if (err == -EBUSY) | 142 | if (err == -EBUSY) |
143 | return; | 143 | return; |
144 | else | 144 | else |
diff --git a/arch/blackfin/kernel/ftrace-entry.S b/arch/blackfin/kernel/ftrace-entry.S index 28d059540424..3b8bdcbb7da3 100644 --- a/arch/blackfin/kernel/ftrace-entry.S +++ b/arch/blackfin/kernel/ftrace-entry.S | |||
@@ -169,7 +169,7 @@ ENTRY(_ftrace_graph_caller) | |||
169 | r0 = sp; /* unsigned long *parent */ | 169 | r0 = sp; /* unsigned long *parent */ |
170 | r1 = [sp]; /* unsigned long self_addr */ | 170 | r1 = [sp]; /* unsigned long self_addr */ |
171 | # endif | 171 | # endif |
172 | # ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST | 172 | # ifdef HAVE_FUNCTION_GRAPH_FP_TEST |
173 | r2 = fp; /* unsigned long frame_pointer */ | 173 | r2 = fp; /* unsigned long frame_pointer */ |
174 | # endif | 174 | # endif |
175 | r0 += 16; /* skip the 4 local regs on stack */ | 175 | r0 += 16; /* skip the 4 local regs on stack */ |
@@ -190,7 +190,7 @@ ENTRY(_return_to_handler) | |||
190 | [--sp] = r1; | 190 | [--sp] = r1; |
191 | 191 | ||
192 | /* get original return address */ | 192 | /* get original return address */ |
193 | # ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST | 193 | # ifdef HAVE_FUNCTION_GRAPH_FP_TEST |
194 | r0 = fp; /* Blackfin is sane, so omit this */ | 194 | r0 = fp; /* Blackfin is sane, so omit this */ |
195 | # endif | 195 | # endif |
196 | call _ftrace_return_to_handler; | 196 | call _ftrace_return_to_handler; |
diff --git a/arch/blackfin/kernel/ftrace.c b/arch/blackfin/kernel/ftrace.c index 095de0fa044d..8dad7589b843 100644 --- a/arch/blackfin/kernel/ftrace.c +++ b/arch/blackfin/kernel/ftrace.c | |||
@@ -107,7 +107,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
107 | return; | 107 | return; |
108 | 108 | ||
109 | if (ftrace_push_return_trace(*parent, self_addr, &trace.depth, | 109 | if (ftrace_push_return_trace(*parent, self_addr, &trace.depth, |
110 | frame_pointer) == -EBUSY) | 110 | frame_pointer, NULL) == -EBUSY) |
111 | return; | 111 | return; |
112 | 112 | ||
113 | trace.func = self_addr; | 113 | trace.func = self_addr; |
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 29bd59790d6c..c7026429816b 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h | |||
@@ -56,7 +56,7 @@ struct thread_info { | |||
56 | #define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) | 56 | #define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) |
57 | #define task_thread_info(tsk) ((struct thread_info *) 0) | 57 | #define task_thread_info(tsk) ((struct thread_info *) 0) |
58 | #endif | 58 | #endif |
59 | #define free_thread_stack(ti) /* nothing */ | 59 | #define free_thread_stack(tsk) /* nothing */ |
60 | #define task_stack_page(tsk) ((void *)(tsk)) | 60 | #define task_stack_page(tsk) ((void *)(tsk)) |
61 | 61 | ||
62 | #define __HAVE_THREAD_FUNCTIONS | 62 | #define __HAVE_THREAD_FUNCTIONS |
diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index fc7b48a52cd5..d57563c58a26 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c | |||
@@ -63,7 +63,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | |||
63 | return; | 63 | return; |
64 | } | 64 | } |
65 | 65 | ||
66 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0); | 66 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); |
67 | if (err == -EBUSY) { | 67 | if (err == -EBUSY) { |
68 | *parent = old; | 68 | *parent = old; |
69 | return; | 69 | return; |
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 937c54bc8ccc..30a3b75e88eb 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c | |||
@@ -382,8 +382,8 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, | |||
382 | if (unlikely(faulted)) | 382 | if (unlikely(faulted)) |
383 | goto out; | 383 | goto out; |
384 | 384 | ||
385 | if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp) | 385 | if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp, |
386 | == -EBUSY) { | 386 | NULL) == -EBUSY) { |
387 | *parent_ra_addr = old_parent_ra; | 387 | *parent_ra_addr = old_parent_ra; |
388 | return; | 388 | return; |
389 | } | 389 | } |
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index a828a0adf52c..5a5506a35395 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c | |||
@@ -48,7 +48,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent, | |||
48 | return; | 48 | return; |
49 | 49 | ||
50 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, | 50 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, |
51 | 0 ) == -EBUSY) | 51 | 0, NULL) == -EBUSY) |
52 | return; | 52 | return; |
53 | 53 | ||
54 | /* activate parisc_return_to_handler() as return point */ | 54 | /* activate parisc_return_to_handler() as return point */ |
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index cc52d9795f88..a95639b8d4ac 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c | |||
@@ -593,7 +593,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) | |||
593 | if (!ftrace_graph_entry(&trace)) | 593 | if (!ftrace_graph_entry(&trace)) |
594 | goto out; | 594 | goto out; |
595 | 595 | ||
596 | if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) | 596 | if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, |
597 | NULL) == -EBUSY) | ||
597 | goto out; | 598 | goto out; |
598 | 599 | ||
599 | parent = return_hooker; | 600 | parent = return_hooker; |
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 0f7bfeba6da6..60a8a4e207ed 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c | |||
@@ -209,7 +209,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) | |||
209 | /* Only trace if the calling function expects to. */ | 209 | /* Only trace if the calling function expects to. */ |
210 | if (!ftrace_graph_entry(&trace)) | 210 | if (!ftrace_graph_entry(&trace)) |
211 | goto out; | 211 | goto out; |
212 | if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) | 212 | if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, |
213 | NULL) == -EBUSY) | ||
213 | goto out; | 214 | goto out; |
214 | parent = (unsigned long) return_to_handler; | 215 | parent = (unsigned long) return_to_handler; |
215 | out: | 216 | out: |
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 38993e09ef03..95eccd49672f 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c | |||
@@ -382,7 +382,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | |||
382 | return; | 382 | return; |
383 | } | 383 | } |
384 | 384 | ||
385 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0); | 385 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); |
386 | if (err == -EBUSY) { | 386 | if (err == -EBUSY) { |
387 | __raw_writel(old, parent); | 387 | __raw_writel(old, parent); |
388 | return; | 388 | return; |
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 59b09600dd32..f5d60f14a0bc 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
@@ -56,7 +56,6 @@ config SPARC64 | |||
56 | def_bool 64BIT | 56 | def_bool 64BIT |
57 | select HAVE_FUNCTION_TRACER | 57 | select HAVE_FUNCTION_TRACER |
58 | select HAVE_FUNCTION_GRAPH_TRACER | 58 | select HAVE_FUNCTION_GRAPH_TRACER |
59 | select HAVE_FUNCTION_GRAPH_FP_TEST | ||
60 | select HAVE_KRETPROBES | 59 | select HAVE_KRETPROBES |
61 | select HAVE_KPROBES | 60 | select HAVE_KPROBES |
62 | select HAVE_RCU_TABLE_FREE if SMP | 61 | select HAVE_RCU_TABLE_FREE if SMP |
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index 3192a8e42fd6..62755a339a59 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h | |||
@@ -9,6 +9,10 @@ | |||
9 | void _mcount(void); | 9 | void _mcount(void); |
10 | #endif | 10 | #endif |
11 | 11 | ||
12 | #endif /* CONFIG_MCOUNT */ | ||
13 | |||
14 | #if defined(CONFIG_SPARC64) && !defined(CC_USE_FENTRY) | ||
15 | #define HAVE_FUNCTION_GRAPH_FP_TEST | ||
12 | #endif | 16 | #endif |
13 | 17 | ||
14 | #ifdef CONFIG_DYNAMIC_FTRACE | 18 | #ifdef CONFIG_DYNAMIC_FTRACE |
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 0a2d2ddff543..6bcff698069b 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c | |||
@@ -131,7 +131,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, | |||
131 | return parent + 8UL; | 131 | return parent + 8UL; |
132 | 132 | ||
133 | if (ftrace_push_return_trace(parent, self_addr, &trace.depth, | 133 | if (ftrace_push_return_trace(parent, self_addr, &trace.depth, |
134 | frame_pointer) == -EBUSY) | 134 | frame_pointer, NULL) == -EBUSY) |
135 | return parent + 8UL; | 135 | return parent + 8UL; |
136 | 136 | ||
137 | trace.func = self_addr; | 137 | trace.func = self_addr; |
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c index 4a572088b270..b827a418b155 100644 --- a/arch/tile/kernel/ftrace.c +++ b/arch/tile/kernel/ftrace.c | |||
@@ -184,7 +184,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
184 | *parent = return_hooker; | 184 | *parent = return_hooker; |
185 | 185 | ||
186 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, | 186 | err = ftrace_push_return_trace(old, self_addr, &trace.depth, |
187 | frame_pointer); | 187 | frame_pointer, NULL); |
188 | if (err == -EBUSY) { | 188 | if (err == -EBUSY) { |
189 | *parent = old; | 189 | *parent = old; |
190 | return; | 190 | return; |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0cc8811af4e0..ac5944fa6da2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -93,6 +93,7 @@ config X86 | |||
93 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE | 93 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE |
94 | select HAVE_ARCH_WITHIN_STACK_FRAMES | 94 | select HAVE_ARCH_WITHIN_STACK_FRAMES |
95 | select HAVE_EBPF_JIT if X86_64 | 95 | select HAVE_EBPF_JIT if X86_64 |
96 | select HAVE_ARCH_VMAP_STACK if X86_64 | ||
96 | select HAVE_CC_STACKPROTECTOR | 97 | select HAVE_CC_STACKPROTECTOR |
97 | select HAVE_CMPXCHG_DOUBLE | 98 | select HAVE_CMPXCHG_DOUBLE |
98 | select HAVE_CMPXCHG_LOCAL | 99 | select HAVE_CMPXCHG_LOCAL |
@@ -109,7 +110,6 @@ config X86 | |||
109 | select HAVE_EXIT_THREAD | 110 | select HAVE_EXIT_THREAD |
110 | select HAVE_FENTRY if X86_64 | 111 | select HAVE_FENTRY if X86_64 |
111 | select HAVE_FTRACE_MCOUNT_RECORD | 112 | select HAVE_FTRACE_MCOUNT_RECORD |
112 | select HAVE_FUNCTION_GRAPH_FP_TEST | ||
113 | select HAVE_FUNCTION_GRAPH_TRACER | 113 | select HAVE_FUNCTION_GRAPH_TRACER |
114 | select HAVE_FUNCTION_TRACER | 114 | select HAVE_FUNCTION_TRACER |
115 | select HAVE_GCC_PLUGINS | 115 | select HAVE_GCC_PLUGINS |
@@ -157,6 +157,7 @@ config X86 | |||
157 | select SPARSE_IRQ | 157 | select SPARSE_IRQ |
158 | select SRCU | 158 | select SRCU |
159 | select SYSCTL_EXCEPTION_TRACE | 159 | select SYSCTL_EXCEPTION_TRACE |
160 | select THREAD_INFO_IN_TASK | ||
160 | select USER_STACKTRACE_SUPPORT | 161 | select USER_STACKTRACE_SUPPORT |
161 | select VIRT_TO_BUS | 162 | select VIRT_TO_BUS |
162 | select X86_DEV_DMA_OPS if X86_64 | 163 | select X86_DEV_DMA_OPS if X86_64 |
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 1433f6b4607d..bdd9cc59d20f 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c | |||
@@ -31,13 +31,6 @@ | |||
31 | #define CREATE_TRACE_POINTS | 31 | #define CREATE_TRACE_POINTS |
32 | #include <trace/events/syscalls.h> | 32 | #include <trace/events/syscalls.h> |
33 | 33 | ||
34 | static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) | ||
35 | { | ||
36 | unsigned long top_of_stack = | ||
37 | (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; | ||
38 | return (struct thread_info *)(top_of_stack - THREAD_SIZE); | ||
39 | } | ||
40 | |||
41 | #ifdef CONFIG_CONTEXT_TRACKING | 34 | #ifdef CONFIG_CONTEXT_TRACKING |
42 | /* Called on entry from user mode with IRQs off. */ | 35 | /* Called on entry from user mode with IRQs off. */ |
43 | __visible inline void enter_from_user_mode(void) | 36 | __visible inline void enter_from_user_mode(void) |
@@ -71,7 +64,7 @@ static long syscall_trace_enter(struct pt_regs *regs) | |||
71 | { | 64 | { |
72 | u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; | 65 | u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; |
73 | 66 | ||
74 | struct thread_info *ti = pt_regs_to_thread_info(regs); | 67 | struct thread_info *ti = current_thread_info(); |
75 | unsigned long ret = 0; | 68 | unsigned long ret = 0; |
76 | bool emulated = false; | 69 | bool emulated = false; |
77 | u32 work; | 70 | u32 work; |
@@ -173,18 +166,17 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |||
173 | /* Disable IRQs and retry */ | 166 | /* Disable IRQs and retry */ |
174 | local_irq_disable(); | 167 | local_irq_disable(); |
175 | 168 | ||
176 | cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags); | 169 | cached_flags = READ_ONCE(current_thread_info()->flags); |
177 | 170 | ||
178 | if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) | 171 | if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) |
179 | break; | 172 | break; |
180 | |||
181 | } | 173 | } |
182 | } | 174 | } |
183 | 175 | ||
184 | /* Called with IRQs disabled. */ | 176 | /* Called with IRQs disabled. */ |
185 | __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) | 177 | __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) |
186 | { | 178 | { |
187 | struct thread_info *ti = pt_regs_to_thread_info(regs); | 179 | struct thread_info *ti = current_thread_info(); |
188 | u32 cached_flags; | 180 | u32 cached_flags; |
189 | 181 | ||
190 | if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) | 182 | if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) |
@@ -209,7 +201,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) | |||
209 | * special case only applies after poking regs and before the | 201 | * special case only applies after poking regs and before the |
210 | * very next return to user mode. | 202 | * very next return to user mode. |
211 | */ | 203 | */ |
212 | ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); | 204 | current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); |
213 | #endif | 205 | #endif |
214 | 206 | ||
215 | user_enter_irqoff(); | 207 | user_enter_irqoff(); |
@@ -247,7 +239,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags) | |||
247 | */ | 239 | */ |
248 | __visible inline void syscall_return_slowpath(struct pt_regs *regs) | 240 | __visible inline void syscall_return_slowpath(struct pt_regs *regs) |
249 | { | 241 | { |
250 | struct thread_info *ti = pt_regs_to_thread_info(regs); | 242 | struct thread_info *ti = current_thread_info(); |
251 | u32 cached_flags = READ_ONCE(ti->flags); | 243 | u32 cached_flags = READ_ONCE(ti->flags); |
252 | 244 | ||
253 | CT_WARN_ON(ct_state() != CONTEXT_KERNEL); | 245 | CT_WARN_ON(ct_state() != CONTEXT_KERNEL); |
@@ -270,7 +262,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) | |||
270 | #ifdef CONFIG_X86_64 | 262 | #ifdef CONFIG_X86_64 |
271 | __visible void do_syscall_64(struct pt_regs *regs) | 263 | __visible void do_syscall_64(struct pt_regs *regs) |
272 | { | 264 | { |
273 | struct thread_info *ti = pt_regs_to_thread_info(regs); | 265 | struct thread_info *ti = current_thread_info(); |
274 | unsigned long nr = regs->orig_ax; | 266 | unsigned long nr = regs->orig_ax; |
275 | 267 | ||
276 | enter_from_user_mode(); | 268 | enter_from_user_mode(); |
@@ -303,11 +295,11 @@ __visible void do_syscall_64(struct pt_regs *regs) | |||
303 | */ | 295 | */ |
304 | static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) | 296 | static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) |
305 | { | 297 | { |
306 | struct thread_info *ti = pt_regs_to_thread_info(regs); | 298 | struct thread_info *ti = current_thread_info(); |
307 | unsigned int nr = (unsigned int)regs->orig_ax; | 299 | unsigned int nr = (unsigned int)regs->orig_ax; |
308 | 300 | ||
309 | #ifdef CONFIG_IA32_EMULATION | 301 | #ifdef CONFIG_IA32_EMULATION |
310 | ti->status |= TS_COMPAT; | 302 | current->thread.status |= TS_COMPAT; |
311 | #endif | 303 | #endif |
312 | 304 | ||
313 | if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { | 305 | if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { |
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 0b56666e6039..b75a8bcd2d23 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S | |||
@@ -204,34 +204,70 @@ | |||
204 | POP_GS_EX | 204 | POP_GS_EX |
205 | .endm | 205 | .endm |
206 | 206 | ||
207 | /* | ||
208 | * %eax: prev task | ||
209 | * %edx: next task | ||
210 | */ | ||
211 | ENTRY(__switch_to_asm) | ||
212 | /* | ||
213 | * Save callee-saved registers | ||
214 | * This must match the order in struct inactive_task_frame | ||
215 | */ | ||
216 | pushl %ebp | ||
217 | pushl %ebx | ||
218 | pushl %edi | ||
219 | pushl %esi | ||
220 | |||
221 | /* switch stack */ | ||
222 | movl %esp, TASK_threadsp(%eax) | ||
223 | movl TASK_threadsp(%edx), %esp | ||
224 | |||
225 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
226 | movl TASK_stack_canary(%edx), %ebx | ||
227 | movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset | ||
228 | #endif | ||
229 | |||
230 | /* restore callee-saved registers */ | ||
231 | popl %esi | ||
232 | popl %edi | ||
233 | popl %ebx | ||
234 | popl %ebp | ||
235 | |||
236 | jmp __switch_to | ||
237 | END(__switch_to_asm) | ||
238 | |||
239 | /* | ||
240 | * A newly forked process directly context switches into this address. | ||
241 | * | ||
242 | * eax: prev task we switched from | ||
243 | * ebx: kernel thread func (NULL for user thread) | ||
244 | * edi: kernel thread arg | ||
245 | */ | ||
207 | ENTRY(ret_from_fork) | 246 | ENTRY(ret_from_fork) |
208 | pushl %eax | 247 | pushl %eax |
209 | call schedule_tail | 248 | call schedule_tail |
210 | popl %eax | 249 | popl %eax |
211 | 250 | ||
251 | testl %ebx, %ebx | ||
252 | jnz 1f /* kernel threads are uncommon */ | ||
253 | |||
254 | 2: | ||
212 | /* When we fork, we trace the syscall return in the child, too. */ | 255 | /* When we fork, we trace the syscall return in the child, too. */ |
213 | movl %esp, %eax | 256 | movl %esp, %eax |
214 | call syscall_return_slowpath | 257 | call syscall_return_slowpath |
215 | jmp restore_all | 258 | jmp restore_all |
216 | END(ret_from_fork) | ||
217 | |||
218 | ENTRY(ret_from_kernel_thread) | ||
219 | pushl %eax | ||
220 | call schedule_tail | ||
221 | popl %eax | ||
222 | movl PT_EBP(%esp), %eax | ||
223 | call *PT_EBX(%esp) | ||
224 | movl $0, PT_EAX(%esp) | ||
225 | 259 | ||
260 | /* kernel thread */ | ||
261 | 1: movl %edi, %eax | ||
262 | call *%ebx | ||
226 | /* | 263 | /* |
227 | * Kernel threads return to userspace as if returning from a syscall. | 264 | * A kernel thread is allowed to return here after successfully |
228 | * We should check whether anything actually uses this path and, if so, | 265 | * calling do_execve(). Exit to userspace to complete the execve() |
229 | * consider switching it over to ret_from_fork. | 266 | * syscall. |
230 | */ | 267 | */ |
231 | movl %esp, %eax | 268 | movl $0, PT_EAX(%esp) |
232 | call syscall_return_slowpath | 269 | jmp 2b |
233 | jmp restore_all | 270 | END(ret_from_fork) |
234 | ENDPROC(ret_from_kernel_thread) | ||
235 | 271 | ||
236 | /* | 272 | /* |
237 | * Return to user mode is not as complex as all this looks, | 273 | * Return to user mode is not as complex as all this looks, |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 02fff3ebfb87..fee1d95902b5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) | |||
179 | * If we need to do entry work or if we guess we'll need to do | 179 | * If we need to do entry work or if we guess we'll need to do |
180 | * exit work, go straight to the slow path. | 180 | * exit work, go straight to the slow path. |
181 | */ | 181 | */ |
182 | testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) | 182 | movq PER_CPU_VAR(current_task), %r11 |
183 | testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) | ||
183 | jnz entry_SYSCALL64_slow_path | 184 | jnz entry_SYSCALL64_slow_path |
184 | 185 | ||
185 | entry_SYSCALL_64_fastpath: | 186 | entry_SYSCALL_64_fastpath: |
@@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath: | |||
217 | */ | 218 | */ |
218 | DISABLE_INTERRUPTS(CLBR_NONE) | 219 | DISABLE_INTERRUPTS(CLBR_NONE) |
219 | TRACE_IRQS_OFF | 220 | TRACE_IRQS_OFF |
220 | testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) | 221 | movq PER_CPU_VAR(current_task), %r11 |
222 | testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) | ||
221 | jnz 1f | 223 | jnz 1f |
222 | 224 | ||
223 | LOCKDEP_SYS_EXIT | 225 | LOCKDEP_SYS_EXIT |
@@ -351,8 +353,7 @@ ENTRY(stub_ptregs_64) | |||
351 | jmp entry_SYSCALL64_slow_path | 353 | jmp entry_SYSCALL64_slow_path |
352 | 354 | ||
353 | 1: | 355 | 1: |
354 | /* Called from C */ | 356 | jmp *%rax /* Called from C */ |
355 | jmp *%rax /* called from C */ | ||
356 | END(stub_ptregs_64) | 357 | END(stub_ptregs_64) |
357 | 358 | ||
358 | .macro ptregs_stub func | 359 | .macro ptregs_stub func |
@@ -369,41 +370,73 @@ END(ptregs_\func) | |||
369 | #include <asm/syscalls_64.h> | 370 | #include <asm/syscalls_64.h> |
370 | 371 | ||
371 | /* | 372 | /* |
373 | * %rdi: prev task | ||
374 | * %rsi: next task | ||
375 | */ | ||
376 | ENTRY(__switch_to_asm) | ||
377 | /* | ||
378 | * Save callee-saved registers | ||
379 | * This must match the order in inactive_task_frame | ||
380 | */ | ||
381 | pushq %rbp | ||
382 | pushq %rbx | ||
383 | pushq %r12 | ||
384 | pushq %r13 | ||
385 | pushq %r14 | ||
386 | pushq %r15 | ||
387 | |||
388 | /* switch stack */ | ||
389 | movq %rsp, TASK_threadsp(%rdi) | ||
390 | movq TASK_threadsp(%rsi), %rsp | ||
391 | |||
392 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
393 | movq TASK_stack_canary(%rsi), %rbx | ||
394 | movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset | ||
395 | #endif | ||
396 | |||
397 | /* restore callee-saved registers */ | ||
398 | popq %r15 | ||
399 | popq %r14 | ||
400 | popq %r13 | ||
401 | popq %r12 | ||
402 | popq %rbx | ||
403 | popq %rbp | ||
404 | |||
405 | jmp __switch_to | ||
406 | END(__switch_to_asm) | ||
407 | |||
408 | /* | ||
372 | * A newly forked process directly context switches into this address. | 409 | * A newly forked process directly context switches into this address. |
373 | * | 410 | * |
374 | * rdi: prev task we switched from | 411 | * rax: prev task we switched from |
412 | * rbx: kernel thread func (NULL for user thread) | ||
413 | * r12: kernel thread arg | ||
375 | */ | 414 | */ |
376 | ENTRY(ret_from_fork) | 415 | ENTRY(ret_from_fork) |
377 | LOCK ; btr $TIF_FORK, TI_flags(%r8) | 416 | movq %rax, %rdi |
378 | |||
379 | call schedule_tail /* rdi: 'prev' task parameter */ | 417 | call schedule_tail /* rdi: 'prev' task parameter */ |
380 | 418 | ||
381 | testb $3, CS(%rsp) /* from kernel_thread? */ | 419 | testq %rbx, %rbx /* from kernel_thread? */ |
382 | jnz 1f | 420 | jnz 1f /* kernel threads are uncommon */ |
383 | 421 | ||
384 | /* | 422 | 2: |
385 | * We came from kernel_thread. This code path is quite twisted, and | ||
386 | * someone should clean it up. | ||
387 | * | ||
388 | * copy_thread_tls stashes the function pointer in RBX and the | ||
389 | * parameter to be passed in RBP. The called function is permitted | ||
390 | * to call do_execve and thereby jump to user mode. | ||
391 | */ | ||
392 | movq RBP(%rsp), %rdi | ||
393 | call *RBX(%rsp) | ||
394 | movl $0, RAX(%rsp) | ||
395 | |||
396 | /* | ||
397 | * Fall through as though we're exiting a syscall. This makes a | ||
398 | * twisted sort of sense if we just called do_execve. | ||
399 | */ | ||
400 | |||
401 | 1: | ||
402 | movq %rsp, %rdi | 423 | movq %rsp, %rdi |
403 | call syscall_return_slowpath /* returns with IRQs disabled */ | 424 | call syscall_return_slowpath /* returns with IRQs disabled */ |
404 | TRACE_IRQS_ON /* user mode is traced as IRQS on */ | 425 | TRACE_IRQS_ON /* user mode is traced as IRQS on */ |
405 | SWAPGS | 426 | SWAPGS |
406 | jmp restore_regs_and_iret | 427 | jmp restore_regs_and_iret |
428 | |||
429 | 1: | ||
430 | /* kernel thread */ | ||
431 | movq %r12, %rdi | ||
432 | call *%rbx | ||
433 | /* | ||
434 | * A kernel thread is allowed to return here after successfully | ||
435 | * calling do_execve(). Exit to userspace to complete the execve() | ||
436 | * syscall. | ||
437 | */ | ||
438 | movq $0, RAX(%rsp) | ||
439 | jmp 2b | ||
407 | END(ret_from_fork) | 440 | END(ret_from_fork) |
408 | 441 | ||
409 | /* | 442 | /* |
@@ -555,27 +588,69 @@ native_irq_return_iret: | |||
555 | 588 | ||
556 | #ifdef CONFIG_X86_ESPFIX64 | 589 | #ifdef CONFIG_X86_ESPFIX64 |
557 | native_irq_return_ldt: | 590 | native_irq_return_ldt: |
558 | pushq %rax | 591 | /* |
559 | pushq %rdi | 592 | * We are running with user GSBASE. All GPRs contain their user |
593 | * values. We have a percpu ESPFIX stack that is eight slots | ||
594 | * long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom | ||
595 | * of the ESPFIX stack. | ||
596 | * | ||
597 | * We clobber RAX and RDI in this code. We stash RDI on the | ||
598 | * normal stack and RAX on the ESPFIX stack. | ||
599 | * | ||
600 | * The ESPFIX stack layout we set up looks like this: | ||
601 | * | ||
602 | * --- top of ESPFIX stack --- | ||
603 | * SS | ||
604 | * RSP | ||
605 | * RFLAGS | ||
606 | * CS | ||
607 | * RIP <-- RSP points here when we're done | ||
608 | * RAX <-- espfix_waddr points here | ||
609 | * --- bottom of ESPFIX stack --- | ||
610 | */ | ||
611 | |||
612 | pushq %rdi /* Stash user RDI */ | ||
560 | SWAPGS | 613 | SWAPGS |
561 | movq PER_CPU_VAR(espfix_waddr), %rdi | 614 | movq PER_CPU_VAR(espfix_waddr), %rdi |
562 | movq %rax, (0*8)(%rdi) /* RAX */ | 615 | movq %rax, (0*8)(%rdi) /* user RAX */ |
563 | movq (2*8)(%rsp), %rax /* RIP */ | 616 | movq (1*8)(%rsp), %rax /* user RIP */ |
564 | movq %rax, (1*8)(%rdi) | 617 | movq %rax, (1*8)(%rdi) |
565 | movq (3*8)(%rsp), %rax /* CS */ | 618 | movq (2*8)(%rsp), %rax /* user CS */ |
566 | movq %rax, (2*8)(%rdi) | 619 | movq %rax, (2*8)(%rdi) |
567 | movq (4*8)(%rsp), %rax /* RFLAGS */ | 620 | movq (3*8)(%rsp), %rax /* user RFLAGS */ |
568 | movq %rax, (3*8)(%rdi) | 621 | movq %rax, (3*8)(%rdi) |
569 | movq (6*8)(%rsp), %rax /* SS */ | 622 | movq (5*8)(%rsp), %rax /* user SS */ |
570 | movq %rax, (5*8)(%rdi) | 623 | movq %rax, (5*8)(%rdi) |
571 | movq (5*8)(%rsp), %rax /* RSP */ | 624 | movq (4*8)(%rsp), %rax /* user RSP */ |
572 | movq %rax, (4*8)(%rdi) | 625 | movq %rax, (4*8)(%rdi) |
573 | andl $0xffff0000, %eax | 626 | /* Now RAX == RSP. */ |
574 | popq %rdi | 627 | |
628 | andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ | ||
629 | popq %rdi /* Restore user RDI */ | ||
630 | |||
631 | /* | ||
632 | * espfix_stack[31:16] == 0. The page tables are set up such that | ||
633 | * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of | ||
634 | * espfix_waddr for any X. That is, there are 65536 RO aliases of | ||
635 | * the same page. Set up RSP so that RSP[31:16] contains the | ||
636 | * respective 16 bits of the /userspace/ RSP and RSP nonetheless | ||
637 | * still points to an RO alias of the ESPFIX stack. | ||
638 | */ | ||
575 | orq PER_CPU_VAR(espfix_stack), %rax | 639 | orq PER_CPU_VAR(espfix_stack), %rax |
576 | SWAPGS | 640 | SWAPGS |
577 | movq %rax, %rsp | 641 | movq %rax, %rsp |
578 | popq %rax | 642 | |
643 | /* | ||
644 | * At this point, we cannot write to the stack any more, but we can | ||
645 | * still read. | ||
646 | */ | ||
647 | popq %rax /* Restore user RAX */ | ||
648 | |||
649 | /* | ||
650 | * RSP now points to an ordinary IRET frame, except that the page | ||
651 | * is read-only and RSP[31:16] are preloaded with the userspace | ||
652 | * values. We can now IRET back to userspace. | ||
653 | */ | ||
579 | jmp native_irq_return_iret | 654 | jmp native_irq_return_iret |
580 | #endif | 655 | #endif |
581 | END(common_interrupt) | 656 | END(common_interrupt) |
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 18a1acf86c90..d31735f37ed7 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <asm/timer.h> | 37 | #include <asm/timer.h> |
38 | #include <asm/desc.h> | 38 | #include <asm/desc.h> |
39 | #include <asm/ldt.h> | 39 | #include <asm/ldt.h> |
40 | #include <asm/unwind.h> | ||
40 | 41 | ||
41 | #include "perf_event.h" | 42 | #include "perf_event.h" |
42 | 43 | ||
@@ -2267,39 +2268,26 @@ void arch_perf_update_userpage(struct perf_event *event, | |||
2267 | cyc2ns_read_end(data); | 2268 | cyc2ns_read_end(data); |
2268 | } | 2269 | } |
2269 | 2270 | ||
2270 | /* | ||
2271 | * callchain support | ||
2272 | */ | ||
2273 | |||
2274 | static int backtrace_stack(void *data, char *name) | ||
2275 | { | ||
2276 | return 0; | ||
2277 | } | ||
2278 | |||
2279 | static int backtrace_address(void *data, unsigned long addr, int reliable) | ||
2280 | { | ||
2281 | struct perf_callchain_entry_ctx *entry = data; | ||
2282 | |||
2283 | return perf_callchain_store(entry, addr); | ||
2284 | } | ||
2285 | |||
2286 | static const struct stacktrace_ops backtrace_ops = { | ||
2287 | .stack = backtrace_stack, | ||
2288 | .address = backtrace_address, | ||
2289 | .walk_stack = print_context_stack_bp, | ||
2290 | }; | ||
2291 | |||
2292 | void | 2271 | void |
2293 | perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) | 2272 | perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) |
2294 | { | 2273 | { |
2274 | struct unwind_state state; | ||
2275 | unsigned long addr; | ||
2276 | |||
2295 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | 2277 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { |
2296 | /* TODO: We don't support guest os callchain now */ | 2278 | /* TODO: We don't support guest os callchain now */ |
2297 | return; | 2279 | return; |
2298 | } | 2280 | } |
2299 | 2281 | ||
2300 | perf_callchain_store(entry, regs->ip); | 2282 | if (perf_callchain_store(entry, regs->ip)) |
2283 | return; | ||
2301 | 2284 | ||
2302 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); | 2285 | for (unwind_start(&state, current, regs, NULL); !unwind_done(&state); |
2286 | unwind_next_frame(&state)) { | ||
2287 | addr = unwind_get_return_address(&state); | ||
2288 | if (!addr || perf_callchain_store(entry, addr)) | ||
2289 | return; | ||
2290 | } | ||
2303 | } | 2291 | } |
2304 | 2292 | ||
2305 | static inline int | 2293 | static inline int |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index e77a6443104f..1b020381ab38 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -217,10 +217,14 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
217 | */ | 217 | */ |
218 | #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ | 218 | #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ |
219 | output, input...) \ | 219 | output, input...) \ |
220 | { \ | ||
221 | register void *__sp asm(_ASM_SP); \ | ||
220 | asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ | 222 | asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ |
221 | "call %P[new2]", feature2) \ | 223 | "call %P[new2]", feature2) \ |
222 | : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ | 224 | : output, "+r" (__sp) \ |
223 | [new2] "i" (newfunc2), ## input) | 225 | : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ |
226 | [new2] "i" (newfunc2), ## input); \ | ||
227 | } | ||
224 | 228 | ||
225 | /* | 229 | /* |
226 | * use this macro(s) if you need more than one output parameter | 230 | * use this macro(s) if you need more than one output parameter |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 4e10d73cf018..12080d87da3b 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -36,7 +36,7 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in | |||
36 | 36 | ||
37 | extern struct desc_ptr idt_descr; | 37 | extern struct desc_ptr idt_descr; |
38 | extern gate_desc idt_table[]; | 38 | extern gate_desc idt_table[]; |
39 | extern struct desc_ptr debug_idt_descr; | 39 | extern const struct desc_ptr debug_idt_descr; |
40 | extern gate_desc debug_idt_table[]; | 40 | extern gate_desc debug_idt_table[]; |
41 | 41 | ||
42 | struct gdt_page { | 42 | struct gdt_page { |
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index ae55a43e09c0..d4957ac72b48 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h | |||
@@ -45,7 +45,8 @@ | |||
45 | extern u64 xfeatures_mask; | 45 | extern u64 xfeatures_mask; |
46 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | 46 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; |
47 | 47 | ||
48 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); | 48 | extern void __init update_regset_xstate_info(unsigned int size, |
49 | u64 xstate_mask); | ||
49 | 50 | ||
50 | void fpu__xstate_clear_all_cpu_caps(void); | 51 | void fpu__xstate_clear_all_cpu_caps(void); |
51 | void *get_xsave_addr(struct xregs_state *xsave, int xstate); | 52 | void *get_xsave_addr(struct xregs_state *xsave, int xstate); |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index a4820d4df617..eccd0ac6bc38 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
@@ -6,6 +6,7 @@ | |||
6 | # define MCOUNT_ADDR ((unsigned long)(__fentry__)) | 6 | # define MCOUNT_ADDR ((unsigned long)(__fentry__)) |
7 | #else | 7 | #else |
8 | # define MCOUNT_ADDR ((unsigned long)(mcount)) | 8 | # define MCOUNT_ADDR ((unsigned long)(mcount)) |
9 | # define HAVE_FUNCTION_GRAPH_FP_TEST | ||
9 | #endif | 10 | #endif |
10 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ | 11 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ |
11 | 12 | ||
@@ -13,6 +14,8 @@ | |||
13 | #define ARCH_SUPPORTS_FTRACE_OPS 1 | 14 | #define ARCH_SUPPORTS_FTRACE_OPS 1 |
14 | #endif | 15 | #endif |
15 | 16 | ||
17 | #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR | ||
18 | |||
16 | #ifndef __ASSEMBLY__ | 19 | #ifndef __ASSEMBLY__ |
17 | extern void mcount(void); | 20 | extern void mcount(void); |
18 | extern atomic_t modifying_ftrace_code; | 21 | extern atomic_t modifying_ftrace_code; |
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 2674ee3de748..1052a797d71d 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h | |||
@@ -6,6 +6,7 @@ unsigned long kaslr_get_random_long(const char *purpose); | |||
6 | #ifdef CONFIG_RANDOMIZE_MEMORY | 6 | #ifdef CONFIG_RANDOMIZE_MEMORY |
7 | extern unsigned long page_offset_base; | 7 | extern unsigned long page_offset_base; |
8 | extern unsigned long vmalloc_base; | 8 | extern unsigned long vmalloc_base; |
9 | extern unsigned long vmemmap_base; | ||
9 | 10 | ||
10 | void kernel_randomize_memory(void); | 11 | void kernel_randomize_memory(void); |
11 | #else | 12 | #else |
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 1ef9d581b5d9..d31881188431 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h | |||
@@ -24,8 +24,6 @@ enum die_val { | |||
24 | extern void printk_address(unsigned long address); | 24 | extern void printk_address(unsigned long address); |
25 | extern void die(const char *, struct pt_regs *,long); | 25 | extern void die(const char *, struct pt_regs *,long); |
26 | extern int __must_check __die(const char *, struct pt_regs *, long); | 26 | extern int __must_check __die(const char *, struct pt_regs *, long); |
27 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, | ||
28 | unsigned long *sp, unsigned long bp); | ||
29 | extern void show_stack_regs(struct pt_regs *regs); | 27 | extern void show_stack_regs(struct pt_regs *regs); |
30 | extern void __show_regs(struct pt_regs *regs, int all); | 28 | extern void __show_regs(struct pt_regs *regs, int all); |
31 | extern unsigned long oops_begin(void); | 29 | extern unsigned long oops_begin(void); |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4cd8db05301f..ce932812f142 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -80,10 +80,6 @@ static inline unsigned long __read_cr4(void) | |||
80 | { | 80 | { |
81 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); | 81 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); |
82 | } | 82 | } |
83 | static inline unsigned long __read_cr4_safe(void) | ||
84 | { | ||
85 | return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); | ||
86 | } | ||
87 | 83 | ||
88 | static inline void __write_cr4(unsigned long x) | 84 | static inline void __write_cr4(unsigned long x) |
89 | { | 85 | { |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 60aac60ba25f..0f400c0e4979 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -108,7 +108,6 @@ struct pv_cpu_ops { | |||
108 | unsigned long (*read_cr0)(void); | 108 | unsigned long (*read_cr0)(void); |
109 | void (*write_cr0)(unsigned long); | 109 | void (*write_cr0)(unsigned long); |
110 | 110 | ||
111 | unsigned long (*read_cr4_safe)(void); | ||
112 | unsigned long (*read_cr4)(void); | 111 | unsigned long (*read_cr4)(void); |
113 | void (*write_cr4)(unsigned long); | 112 | void (*write_cr4)(unsigned long); |
114 | 113 | ||
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6fdef9eef2d5..3a264200c62f 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -57,11 +57,13 @@ typedef struct { pteval_t pte; } pte_t; | |||
57 | #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) | 57 | #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) |
58 | #define VMALLOC_SIZE_TB _AC(32, UL) | 58 | #define VMALLOC_SIZE_TB _AC(32, UL) |
59 | #define __VMALLOC_BASE _AC(0xffffc90000000000, UL) | 59 | #define __VMALLOC_BASE _AC(0xffffc90000000000, UL) |
60 | #define VMEMMAP_START _AC(0xffffea0000000000, UL) | 60 | #define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) |
61 | #ifdef CONFIG_RANDOMIZE_MEMORY | 61 | #ifdef CONFIG_RANDOMIZE_MEMORY |
62 | #define VMALLOC_START vmalloc_base | 62 | #define VMALLOC_START vmalloc_base |
63 | #define VMEMMAP_START vmemmap_base | ||
63 | #else | 64 | #else |
64 | #define VMALLOC_START __VMALLOC_BASE | 65 | #define VMALLOC_START __VMALLOC_BASE |
66 | #define VMEMMAP_START __VMEMMAP_BASE | ||
65 | #endif /* CONFIG_RANDOMIZE_MEMORY */ | 67 | #endif /* CONFIG_RANDOMIZE_MEMORY */ |
66 | #define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) | 68 | #define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) |
67 | #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) | 69 | #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 63def9537a2d..984a7bf17f6a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -389,9 +389,9 @@ struct thread_struct { | |||
389 | unsigned short fsindex; | 389 | unsigned short fsindex; |
390 | unsigned short gsindex; | 390 | unsigned short gsindex; |
391 | #endif | 391 | #endif |
392 | #ifdef CONFIG_X86_32 | 392 | |
393 | unsigned long ip; | 393 | u32 status; /* thread synchronous flags */ |
394 | #endif | 394 | |
395 | #ifdef CONFIG_X86_64 | 395 | #ifdef CONFIG_X86_64 |
396 | unsigned long fsbase; | 396 | unsigned long fsbase; |
397 | unsigned long gsbase; | 397 | unsigned long gsbase; |
@@ -438,6 +438,15 @@ struct thread_struct { | |||
438 | }; | 438 | }; |
439 | 439 | ||
440 | /* | 440 | /* |
441 | * Thread-synchronous status. | ||
442 | * | ||
443 | * This is different from the flags in that nobody else | ||
444 | * ever touches our thread-synchronous status, so we don't | ||
445 | * have to worry about atomic accesses. | ||
446 | */ | ||
447 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ | ||
448 | |||
449 | /* | ||
441 | * Set IOPL bits in EFLAGS from given mask | 450 | * Set IOPL bits in EFLAGS from given mask |
442 | */ | 451 | */ |
443 | static inline void native_set_iopl_mask(unsigned mask) | 452 | static inline void native_set_iopl_mask(unsigned mask) |
@@ -724,8 +733,6 @@ static inline void spin_lock_prefetch(const void *x) | |||
724 | .addr_limit = KERNEL_DS, \ | 733 | .addr_limit = KERNEL_DS, \ |
725 | } | 734 | } |
726 | 735 | ||
727 | extern unsigned long thread_saved_pc(struct task_struct *tsk); | ||
728 | |||
729 | /* | 736 | /* |
730 | * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. | 737 | * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. |
731 | * This is necessary to guarantee that the entire "struct pt_regs" | 738 | * This is necessary to guarantee that the entire "struct pt_regs" |
@@ -776,17 +783,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
776 | .addr_limit = KERNEL_DS, \ | 783 | .addr_limit = KERNEL_DS, \ |
777 | } | 784 | } |
778 | 785 | ||
779 | /* | ||
780 | * Return saved PC of a blocked thread. | ||
781 | * What is this good for? it will be always the scheduler or ret_from_fork. | ||
782 | */ | ||
783 | #define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8)) | ||
784 | |||
785 | #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) | 786 | #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) |
786 | extern unsigned long KSTK_ESP(struct task_struct *task); | 787 | extern unsigned long KSTK_ESP(struct task_struct *task); |
787 | 788 | ||
788 | #endif /* CONFIG_X86_64 */ | 789 | #endif /* CONFIG_X86_64 */ |
789 | 790 | ||
791 | extern unsigned long thread_saved_pc(struct task_struct *tsk); | ||
792 | |||
790 | extern void start_thread(struct pt_regs *regs, unsigned long new_ip, | 793 | extern void start_thread(struct pt_regs *regs, unsigned long new_ip, |
791 | unsigned long new_sp); | 794 | unsigned long new_sp); |
792 | 795 | ||
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index b2988c0ed829..230e1903acf0 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h | |||
@@ -44,9 +44,9 @@ struct trampoline_header { | |||
44 | extern struct real_mode_header *real_mode_header; | 44 | extern struct real_mode_header *real_mode_header; |
45 | extern unsigned char real_mode_blob_end[]; | 45 | extern unsigned char real_mode_blob_end[]; |
46 | 46 | ||
47 | extern unsigned long init_rsp; | ||
48 | extern unsigned long initial_code; | 47 | extern unsigned long initial_code; |
49 | extern unsigned long initial_gs; | 48 | extern unsigned long initial_gs; |
49 | extern unsigned long initial_stack; | ||
50 | 50 | ||
51 | extern unsigned char real_mode_blob[]; | 51 | extern unsigned char real_mode_blob[]; |
52 | extern unsigned char real_mode_relocs[]; | 52 | extern unsigned char real_mode_relocs[]; |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ebd0c164cd4e..19980b36f394 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -39,9 +39,6 @@ DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); | |||
39 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); | 39 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); |
40 | #endif | 40 | #endif |
41 | 41 | ||
42 | /* Static state in head.S used to set up a CPU */ | ||
43 | extern unsigned long stack_start; /* Initial stack pointer address */ | ||
44 | |||
45 | struct task_struct; | 42 | struct task_struct; |
46 | 43 | ||
47 | struct smp_ops { | 44 | struct smp_ops { |
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 587d7914ea4b..19a2224f9e16 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h | |||
@@ -59,22 +59,19 @@ static inline void native_write_cr3(unsigned long val) | |||
59 | static inline unsigned long native_read_cr4(void) | 59 | static inline unsigned long native_read_cr4(void) |
60 | { | 60 | { |
61 | unsigned long val; | 61 | unsigned long val; |
62 | asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); | ||
63 | return val; | ||
64 | } | ||
65 | |||
66 | static inline unsigned long native_read_cr4_safe(void) | ||
67 | { | ||
68 | unsigned long val; | ||
69 | /* This could fault if %cr4 does not exist. In x86_64, a cr4 always | ||
70 | * exists, so it will never fail. */ | ||
71 | #ifdef CONFIG_X86_32 | 62 | #ifdef CONFIG_X86_32 |
63 | /* | ||
64 | * This could fault if CR4 does not exist. Non-existent CR4 | ||
65 | * is functionally equivalent to CR4 == 0. Keep it simple and pretend | ||
66 | * that CR4 == 0 on CPUs that don't have CR4. | ||
67 | */ | ||
72 | asm volatile("1: mov %%cr4, %0\n" | 68 | asm volatile("1: mov %%cr4, %0\n" |
73 | "2:\n" | 69 | "2:\n" |
74 | _ASM_EXTABLE(1b, 2b) | 70 | _ASM_EXTABLE(1b, 2b) |
75 | : "=r" (val), "=m" (__force_order) : "0" (0)); | 71 | : "=r" (val), "=m" (__force_order) : "0" (0)); |
76 | #else | 72 | #else |
77 | val = native_read_cr4(); | 73 | /* CR4 always exists on x86_64. */ |
74 | asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); | ||
78 | #endif | 75 | #endif |
79 | return val; | 76 | return val; |
80 | } | 77 | } |
@@ -182,11 +179,6 @@ static inline unsigned long __read_cr4(void) | |||
182 | return native_read_cr4(); | 179 | return native_read_cr4(); |
183 | } | 180 | } |
184 | 181 | ||
185 | static inline unsigned long __read_cr4_safe(void) | ||
186 | { | ||
187 | return native_read_cr4_safe(); | ||
188 | } | ||
189 | |||
190 | static inline void __write_cr4(unsigned long x) | 182 | static inline void __write_cr4(unsigned long x) |
191 | { | 183 | { |
192 | native_write_cr4(x); | 184 | native_write_cr4(x); |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 0944218af9e2..37f2e0b377ad 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -8,86 +8,86 @@ | |||
8 | 8 | ||
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <linux/ptrace.h> | 10 | #include <linux/ptrace.h> |
11 | #include <asm/switch_to.h> | ||
12 | |||
13 | enum stack_type { | ||
14 | STACK_TYPE_UNKNOWN, | ||
15 | STACK_TYPE_TASK, | ||
16 | STACK_TYPE_IRQ, | ||
17 | STACK_TYPE_SOFTIRQ, | ||
18 | STACK_TYPE_EXCEPTION, | ||
19 | STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, | ||
20 | }; | ||
11 | 21 | ||
12 | extern int kstack_depth_to_print; | 22 | struct stack_info { |
13 | 23 | enum stack_type type; | |
14 | struct thread_info; | 24 | unsigned long *begin, *end, *next_sp; |
15 | struct stacktrace_ops; | ||
16 | |||
17 | typedef unsigned long (*walk_stack_t)(struct task_struct *task, | ||
18 | unsigned long *stack, | ||
19 | unsigned long bp, | ||
20 | const struct stacktrace_ops *ops, | ||
21 | void *data, | ||
22 | unsigned long *end, | ||
23 | int *graph); | ||
24 | |||
25 | extern unsigned long | ||
26 | print_context_stack(struct task_struct *task, | ||
27 | unsigned long *stack, unsigned long bp, | ||
28 | const struct stacktrace_ops *ops, void *data, | ||
29 | unsigned long *end, int *graph); | ||
30 | |||
31 | extern unsigned long | ||
32 | print_context_stack_bp(struct task_struct *task, | ||
33 | unsigned long *stack, unsigned long bp, | ||
34 | const struct stacktrace_ops *ops, void *data, | ||
35 | unsigned long *end, int *graph); | ||
36 | |||
37 | /* Generic stack tracer with callbacks */ | ||
38 | |||
39 | struct stacktrace_ops { | ||
40 | int (*address)(void *data, unsigned long address, int reliable); | ||
41 | /* On negative return stop dumping */ | ||
42 | int (*stack)(void *data, char *name); | ||
43 | walk_stack_t walk_stack; | ||
44 | }; | 25 | }; |
45 | 26 | ||
46 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | 27 | bool in_task_stack(unsigned long *stack, struct task_struct *task, |
47 | unsigned long *stack, unsigned long bp, | 28 | struct stack_info *info); |
48 | const struct stacktrace_ops *ops, void *data); | 29 | |
30 | int get_stack_info(unsigned long *stack, struct task_struct *task, | ||
31 | struct stack_info *info, unsigned long *visit_mask); | ||
32 | |||
33 | void stack_type_str(enum stack_type type, const char **begin, | ||
34 | const char **end); | ||
35 | |||
36 | static inline bool on_stack(struct stack_info *info, void *addr, size_t len) | ||
37 | { | ||
38 | void *begin = info->begin; | ||
39 | void *end = info->end; | ||
40 | |||
41 | return (info->type != STACK_TYPE_UNKNOWN && | ||
42 | addr >= begin && addr < end && | ||
43 | addr + len > begin && addr + len <= end); | ||
44 | } | ||
45 | |||
46 | extern int kstack_depth_to_print; | ||
49 | 47 | ||
50 | #ifdef CONFIG_X86_32 | 48 | #ifdef CONFIG_X86_32 |
51 | #define STACKSLOTS_PER_LINE 8 | 49 | #define STACKSLOTS_PER_LINE 8 |
52 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
53 | #else | 50 | #else |
54 | #define STACKSLOTS_PER_LINE 4 | 51 | #define STACKSLOTS_PER_LINE 4 |
55 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
56 | #endif | 52 | #endif |
57 | 53 | ||
58 | #ifdef CONFIG_FRAME_POINTER | 54 | #ifdef CONFIG_FRAME_POINTER |
59 | static inline unsigned long | 55 | static inline unsigned long * |
60 | stack_frame(struct task_struct *task, struct pt_regs *regs) | 56 | get_frame_pointer(struct task_struct *task, struct pt_regs *regs) |
61 | { | 57 | { |
62 | unsigned long bp; | ||
63 | |||
64 | if (regs) | 58 | if (regs) |
65 | return regs->bp; | 59 | return (unsigned long *)regs->bp; |
66 | 60 | ||
67 | if (task == current) { | 61 | if (task == current) |
68 | /* Grab bp right from our regs */ | 62 | return __builtin_frame_address(0); |
69 | get_bp(bp); | ||
70 | return bp; | ||
71 | } | ||
72 | 63 | ||
73 | /* bp is the last reg pushed by switch_to */ | 64 | return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp; |
74 | return *(unsigned long *)task->thread.sp; | ||
75 | } | 65 | } |
76 | #else | 66 | #else |
77 | static inline unsigned long | 67 | static inline unsigned long * |
78 | stack_frame(struct task_struct *task, struct pt_regs *regs) | 68 | get_frame_pointer(struct task_struct *task, struct pt_regs *regs) |
79 | { | 69 | { |
80 | return 0; | 70 | return NULL; |
71 | } | ||
72 | #endif /* CONFIG_FRAME_POINTER */ | ||
73 | |||
74 | static inline unsigned long * | ||
75 | get_stack_pointer(struct task_struct *task, struct pt_regs *regs) | ||
76 | { | ||
77 | if (regs) | ||
78 | return (unsigned long *)kernel_stack_pointer(regs); | ||
79 | |||
80 | if (task == current) | ||
81 | return __builtin_frame_address(0); | ||
82 | |||
83 | return (unsigned long *)task->thread.sp; | ||
81 | } | 84 | } |
82 | #endif | ||
83 | 85 | ||
84 | extern void | 86 | void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
85 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 87 | unsigned long *stack, char *log_lvl); |
86 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
87 | 88 | ||
88 | extern void | 89 | void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
89 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 90 | unsigned long *sp, char *log_lvl); |
90 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
91 | 91 | ||
92 | extern unsigned int code_bytes; | 92 | extern unsigned int code_bytes; |
93 | 93 | ||
@@ -106,7 +106,7 @@ static inline unsigned long caller_frame_pointer(void) | |||
106 | { | 106 | { |
107 | struct stack_frame *frame; | 107 | struct stack_frame *frame; |
108 | 108 | ||
109 | get_bp(frame); | 109 | frame = __builtin_frame_address(0); |
110 | 110 | ||
111 | #ifdef CONFIG_FRAME_POINTER | 111 | #ifdef CONFIG_FRAME_POINTER |
112 | frame = frame->next_frame; | 112 | frame = frame->next_frame; |
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8f321a1b03a1..5cb436acd463 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h | |||
@@ -2,130 +2,66 @@ | |||
2 | #define _ASM_X86_SWITCH_TO_H | 2 | #define _ASM_X86_SWITCH_TO_H |
3 | 3 | ||
4 | struct task_struct; /* one of the stranger aspects of C forward declarations */ | 4 | struct task_struct; /* one of the stranger aspects of C forward declarations */ |
5 | |||
6 | struct task_struct *__switch_to_asm(struct task_struct *prev, | ||
7 | struct task_struct *next); | ||
8 | |||
5 | __visible struct task_struct *__switch_to(struct task_struct *prev, | 9 | __visible struct task_struct *__switch_to(struct task_struct *prev, |
6 | struct task_struct *next); | 10 | struct task_struct *next); |
7 | struct tss_struct; | 11 | struct tss_struct; |
8 | void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 12 | void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
9 | struct tss_struct *tss); | 13 | struct tss_struct *tss); |
10 | 14 | ||
11 | #ifdef CONFIG_X86_32 | 15 | /* This runs runs on the previous thread's stack. */ |
16 | static inline void prepare_switch_to(struct task_struct *prev, | ||
17 | struct task_struct *next) | ||
18 | { | ||
19 | #ifdef CONFIG_VMAP_STACK | ||
20 | /* | ||
21 | * If we switch to a stack that has a top-level paging entry | ||
22 | * that is not present in the current mm, the resulting #PF will | ||
23 | * will be promoted to a double-fault and we'll panic. Probe | ||
24 | * the new stack now so that vmalloc_fault can fix up the page | ||
25 | * tables if needed. This can only happen if we use a stack | ||
26 | * in vmap space. | ||
27 | * | ||
28 | * We assume that the stack is aligned so that it never spans | ||
29 | * more than one top-level paging entry. | ||
30 | * | ||
31 | * To minimize cache pollution, just follow the stack pointer. | ||
32 | */ | ||
33 | READ_ONCE(*(unsigned char *)next->thread.sp); | ||
34 | #endif | ||
35 | } | ||
36 | |||
37 | asmlinkage void ret_from_fork(void); | ||
38 | |||
39 | /* data that is pointed to by thread.sp */ | ||
40 | struct inactive_task_frame { | ||
41 | #ifdef CONFIG_X86_64 | ||
42 | unsigned long r15; | ||
43 | unsigned long r14; | ||
44 | unsigned long r13; | ||
45 | unsigned long r12; | ||
46 | #else | ||
47 | unsigned long si; | ||
48 | unsigned long di; | ||
49 | #endif | ||
50 | unsigned long bx; | ||
51 | unsigned long bp; | ||
52 | unsigned long ret_addr; | ||
53 | }; | ||
12 | 54 | ||
13 | #ifdef CONFIG_CC_STACKPROTECTOR | 55 | struct fork_frame { |
14 | #define __switch_canary \ | 56 | struct inactive_task_frame frame; |
15 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ | 57 | struct pt_regs regs; |
16 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" | 58 | }; |
17 | #define __switch_canary_oparam \ | ||
18 | , [stack_canary] "=m" (stack_canary.canary) | ||
19 | #define __switch_canary_iparam \ | ||
20 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) | ||
21 | #else /* CC_STACKPROTECTOR */ | ||
22 | #define __switch_canary | ||
23 | #define __switch_canary_oparam | ||
24 | #define __switch_canary_iparam | ||
25 | #endif /* CC_STACKPROTECTOR */ | ||
26 | 59 | ||
27 | /* | ||
28 | * Saving eflags is important. It switches not only IOPL between tasks, | ||
29 | * it also protects other tasks from NT leaking through sysenter etc. | ||
30 | */ | ||
31 | #define switch_to(prev, next, last) \ | 60 | #define switch_to(prev, next, last) \ |
32 | do { \ | 61 | do { \ |
33 | /* \ | 62 | prepare_switch_to(prev, next); \ |
34 | * Context-switching clobbers all registers, so we clobber \ | ||
35 | * them explicitly, via unused output variables. \ | ||
36 | * (EAX and EBP is not listed because EBP is saved/restored \ | ||
37 | * explicitly for wchan access and EAX is the return value of \ | ||
38 | * __switch_to()) \ | ||
39 | */ \ | ||
40 | unsigned long ebx, ecx, edx, esi, edi; \ | ||
41 | \ | ||
42 | asm volatile("pushl %%ebp\n\t" /* save EBP */ \ | ||
43 | "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ | ||
44 | "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ | ||
45 | "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ | ||
46 | "pushl %[next_ip]\n\t" /* restore EIP */ \ | ||
47 | __switch_canary \ | ||
48 | "jmp __switch_to\n" /* regparm call */ \ | ||
49 | "1:\t" \ | ||
50 | "popl %%ebp\n\t" /* restore EBP */ \ | ||
51 | \ | ||
52 | /* output parameters */ \ | ||
53 | : [prev_sp] "=m" (prev->thread.sp), \ | ||
54 | [prev_ip] "=m" (prev->thread.ip), \ | ||
55 | "=a" (last), \ | ||
56 | \ | ||
57 | /* clobbered output registers: */ \ | ||
58 | "=b" (ebx), "=c" (ecx), "=d" (edx), \ | ||
59 | "=S" (esi), "=D" (edi) \ | ||
60 | \ | ||
61 | __switch_canary_oparam \ | ||
62 | \ | ||
63 | /* input parameters: */ \ | ||
64 | : [next_sp] "m" (next->thread.sp), \ | ||
65 | [next_ip] "m" (next->thread.ip), \ | ||
66 | \ | ||
67 | /* regparm parameters for __switch_to(): */ \ | ||
68 | [prev] "a" (prev), \ | ||
69 | [next] "d" (next) \ | ||
70 | \ | 63 | \ |
71 | __switch_canary_iparam \ | 64 | ((last) = __switch_to_asm((prev), (next))); \ |
72 | \ | ||
73 | : /* reloaded segment registers */ \ | ||
74 | "memory"); \ | ||
75 | } while (0) | 65 | } while (0) |
76 | 66 | ||
77 | #else /* CONFIG_X86_32 */ | ||
78 | |||
79 | /* frame pointer must be last for get_wchan */ | ||
80 | #define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" | ||
81 | #define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t" | ||
82 | |||
83 | #define __EXTRA_CLOBBER \ | ||
84 | , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ | ||
85 | "r12", "r13", "r14", "r15", "flags" | ||
86 | |||
87 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
88 | #define __switch_canary \ | ||
89 | "movq %P[task_canary](%%rsi),%%r8\n\t" \ | ||
90 | "movq %%r8,"__percpu_arg([gs_canary])"\n\t" | ||
91 | #define __switch_canary_oparam \ | ||
92 | , [gs_canary] "=m" (irq_stack_union.stack_canary) | ||
93 | #define __switch_canary_iparam \ | ||
94 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) | ||
95 | #else /* CC_STACKPROTECTOR */ | ||
96 | #define __switch_canary | ||
97 | #define __switch_canary_oparam | ||
98 | #define __switch_canary_iparam | ||
99 | #endif /* CC_STACKPROTECTOR */ | ||
100 | |||
101 | /* | ||
102 | * There is no need to save or restore flags, because flags are always | ||
103 | * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL | ||
104 | * has no effect. | ||
105 | */ | ||
106 | #define switch_to(prev, next, last) \ | ||
107 | asm volatile(SAVE_CONTEXT \ | ||
108 | "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ | ||
109 | "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ | ||
110 | "call __switch_to\n\t" \ | ||
111 | "movq "__percpu_arg([current_task])",%%rsi\n\t" \ | ||
112 | __switch_canary \ | ||
113 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ | ||
114 | "movq %%rax,%%rdi\n\t" \ | ||
115 | "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ | ||
116 | "jnz ret_from_fork\n\t" \ | ||
117 | RESTORE_CONTEXT \ | ||
118 | : "=a" (last) \ | ||
119 | __switch_canary_oparam \ | ||
120 | : [next] "S" (next), [prev] "D" (prev), \ | ||
121 | [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ | ||
122 | [ti_flags] "i" (offsetof(struct thread_info, flags)), \ | ||
123 | [_tif_fork] "i" (_TIF_FORK), \ | ||
124 | [thread_info] "i" (offsetof(struct task_struct, stack)), \ | ||
125 | [current_task] "m" (current_task) \ | ||
126 | __switch_canary_iparam \ | ||
127 | : "memory", "cc" __EXTRA_CLOBBER) | ||
128 | |||
129 | #endif /* CONFIG_X86_32 */ | ||
130 | |||
131 | #endif /* _ASM_X86_SWITCH_TO_H */ | 67 | #endif /* _ASM_X86_SWITCH_TO_H */ |
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 4e23dd15c661..e3c95e8e61c5 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h | |||
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, | |||
60 | * TS_COMPAT is set for 32-bit syscall entries and then | 60 | * TS_COMPAT is set for 32-bit syscall entries and then |
61 | * remains set until we return to user mode. | 61 | * remains set until we return to user mode. |
62 | */ | 62 | */ |
63 | if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED)) | 63 | if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
64 | /* | 64 | /* |
65 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO | 65 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO |
66 | * and will match correctly in comparisons. | 66 | * and will match correctly in comparisons. |
@@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, | |||
116 | unsigned long *args) | 116 | unsigned long *args) |
117 | { | 117 | { |
118 | # ifdef CONFIG_IA32_EMULATION | 118 | # ifdef CONFIG_IA32_EMULATION |
119 | if (task_thread_info(task)->status & TS_COMPAT) | 119 | if (task->thread.status & TS_COMPAT) |
120 | switch (i) { | 120 | switch (i) { |
121 | case 0: | 121 | case 0: |
122 | if (!n--) break; | 122 | if (!n--) break; |
@@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, | |||
177 | const unsigned long *args) | 177 | const unsigned long *args) |
178 | { | 178 | { |
179 | # ifdef CONFIG_IA32_EMULATION | 179 | # ifdef CONFIG_IA32_EMULATION |
180 | if (task_thread_info(task)->status & TS_COMPAT) | 180 | if (task->thread.status & TS_COMPAT) |
181 | switch (i) { | 181 | switch (i) { |
182 | case 0: | 182 | case 0: |
183 | if (!n--) break; | 183 | if (!n--) break; |
@@ -234,18 +234,8 @@ static inline void syscall_set_arguments(struct task_struct *task, | |||
234 | 234 | ||
235 | static inline int syscall_get_arch(void) | 235 | static inline int syscall_get_arch(void) |
236 | { | 236 | { |
237 | #ifdef CONFIG_IA32_EMULATION | 237 | /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ |
238 | /* | 238 | return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; |
239 | * TS_COMPAT is set for 32-bit syscall entry and then | ||
240 | * remains set until we return to user mode. | ||
241 | * | ||
242 | * x32 tasks should be considered AUDIT_ARCH_X86_64. | ||
243 | */ | ||
244 | if (task_thread_info(current)->status & TS_COMPAT) | ||
245 | return AUDIT_ARCH_I386; | ||
246 | #endif | ||
247 | /* Both x32 and x86_64 are considered "64-bit". */ | ||
248 | return AUDIT_ARCH_X86_64; | ||
249 | } | 239 | } |
250 | #endif /* CONFIG_X86_32 */ | 240 | #endif /* CONFIG_X86_32 */ |
251 | 241 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8b7c8d8e0852..2aaca53c0974 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -52,21 +52,6 @@ struct task_struct; | |||
52 | #include <asm/cpufeature.h> | 52 | #include <asm/cpufeature.h> |
53 | #include <linux/atomic.h> | 53 | #include <linux/atomic.h> |
54 | 54 | ||
55 | struct thread_info { | ||
56 | struct task_struct *task; /* main task structure */ | ||
57 | __u32 flags; /* low level flags */ | ||
58 | __u32 status; /* thread synchronous flags */ | ||
59 | __u32 cpu; /* current CPU */ | ||
60 | }; | ||
61 | |||
62 | #define INIT_THREAD_INFO(tsk) \ | ||
63 | { \ | ||
64 | .task = &tsk, \ | ||
65 | .flags = 0, \ | ||
66 | .cpu = 0, \ | ||
67 | } | ||
68 | |||
69 | #define init_thread_info (init_thread_union.thread_info) | ||
70 | #define init_stack (init_thread_union.stack) | 55 | #define init_stack (init_thread_union.stack) |
71 | 56 | ||
72 | #else /* !__ASSEMBLY__ */ | 57 | #else /* !__ASSEMBLY__ */ |
@@ -95,7 +80,6 @@ struct thread_info { | |||
95 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | 80 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ |
96 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 81 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
97 | #define TIF_IA32 17 /* IA32 compatibility process */ | 82 | #define TIF_IA32 17 /* IA32 compatibility process */ |
98 | #define TIF_FORK 18 /* ret_from_fork */ | ||
99 | #define TIF_NOHZ 19 /* in adaptive nohz mode */ | 83 | #define TIF_NOHZ 19 /* in adaptive nohz mode */ |
100 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ | 84 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ |
101 | #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ | 85 | #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ |
@@ -119,7 +103,6 @@ struct thread_info { | |||
119 | #define _TIF_UPROBE (1 << TIF_UPROBE) | 103 | #define _TIF_UPROBE (1 << TIF_UPROBE) |
120 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 104 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
121 | #define _TIF_IA32 (1 << TIF_IA32) | 105 | #define _TIF_IA32 (1 << TIF_IA32) |
122 | #define _TIF_FORK (1 << TIF_FORK) | ||
123 | #define _TIF_NOHZ (1 << TIF_NOHZ) | 106 | #define _TIF_NOHZ (1 << TIF_NOHZ) |
124 | #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) | 107 | #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) |
125 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 108 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
@@ -160,11 +143,6 @@ struct thread_info { | |||
160 | */ | 143 | */ |
161 | #ifndef __ASSEMBLY__ | 144 | #ifndef __ASSEMBLY__ |
162 | 145 | ||
163 | static inline struct thread_info *current_thread_info(void) | ||
164 | { | ||
165 | return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); | ||
166 | } | ||
167 | |||
168 | static inline unsigned long current_stack_pointer(void) | 146 | static inline unsigned long current_stack_pointer(void) |
169 | { | 147 | { |
170 | unsigned long sp; | 148 | unsigned long sp; |
@@ -226,60 +204,19 @@ static inline int arch_within_stack_frames(const void * const stack, | |||
226 | # define cpu_current_top_of_stack (cpu_tss + TSS_sp0) | 204 | # define cpu_current_top_of_stack (cpu_tss + TSS_sp0) |
227 | #endif | 205 | #endif |
228 | 206 | ||
229 | /* | ||
230 | * ASM operand which evaluates to a 'thread_info' address of | ||
231 | * the current task, if it is known that "reg" is exactly "off" | ||
232 | * bytes below the top of the stack currently. | ||
233 | * | ||
234 | * ( The kernel stack's size is known at build time, it is usually | ||
235 | * 2 or 4 pages, and the bottom of the kernel stack contains | ||
236 | * the thread_info structure. So to access the thread_info very | ||
237 | * quickly from assembly code we can calculate down from the | ||
238 | * top of the kernel stack to the bottom, using constant, | ||
239 | * build-time calculations only. ) | ||
240 | * | ||
241 | * For example, to fetch the current thread_info->flags value into %eax | ||
242 | * on x86-64 defconfig kernels, in syscall entry code where RSP is | ||
243 | * currently at exactly SIZEOF_PTREGS bytes away from the top of the | ||
244 | * stack: | ||
245 | * | ||
246 | * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax | ||
247 | * | ||
248 | * will translate to: | ||
249 | * | ||
250 | * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax | ||
251 | * | ||
252 | * which is below the current RSP by almost 16K. | ||
253 | */ | ||
254 | #define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg) | ||
255 | |||
256 | #endif | 207 | #endif |
257 | 208 | ||
258 | /* | ||
259 | * Thread-synchronous status. | ||
260 | * | ||
261 | * This is different from the flags in that nobody else | ||
262 | * ever touches our thread-synchronous status, so we don't | ||
263 | * have to worry about atomic accesses. | ||
264 | */ | ||
265 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ | ||
266 | #ifdef CONFIG_COMPAT | 209 | #ifdef CONFIG_COMPAT |
267 | #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ | 210 | #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ |
268 | #endif | 211 | #endif |
269 | |||
270 | #ifndef __ASSEMBLY__ | 212 | #ifndef __ASSEMBLY__ |
271 | 213 | ||
272 | static inline bool in_ia32_syscall(void) | ||
273 | { | ||
274 | #ifdef CONFIG_X86_32 | 214 | #ifdef CONFIG_X86_32 |
275 | return true; | 215 | #define in_ia32_syscall() true |
276 | #endif | 216 | #else |
277 | #ifdef CONFIG_IA32_EMULATION | 217 | #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ |
278 | if (current_thread_info()->status & TS_COMPAT) | 218 | current->thread.status & TS_COMPAT) |
279 | return true; | ||
280 | #endif | 219 | #endif |
281 | return false; | ||
282 | } | ||
283 | 220 | ||
284 | /* | 221 | /* |
285 | * Force syscall return via IRET by making it look as if there was | 222 | * Force syscall return via IRET by making it look as if there was |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index dee8a70382ba..6fa85944af83 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); | |||
81 | /* Initialize cr4 shadow for this CPU. */ | 81 | /* Initialize cr4 shadow for this CPU. */ |
82 | static inline void cr4_init_shadow(void) | 82 | static inline void cr4_init_shadow(void) |
83 | { | 83 | { |
84 | this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe()); | 84 | this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); |
85 | } | 85 | } |
86 | 86 | ||
87 | /* Set in this cpu's CR4. */ | 87 | /* Set in this cpu's CR4. */ |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index c3496619740a..01fd0a7f48cd 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -117,6 +117,12 @@ extern void ist_exit(struct pt_regs *regs); | |||
117 | extern void ist_begin_non_atomic(struct pt_regs *regs); | 117 | extern void ist_begin_non_atomic(struct pt_regs *regs); |
118 | extern void ist_end_non_atomic(void); | 118 | extern void ist_end_non_atomic(void); |
119 | 119 | ||
120 | #ifdef CONFIG_VMAP_STACK | ||
121 | void __noreturn handle_stack_overflow(const char *message, | ||
122 | struct pt_regs *regs, | ||
123 | unsigned long fault_address); | ||
124 | #endif | ||
125 | |||
120 | /* Interrupts/Exceptions */ | 126 | /* Interrupts/Exceptions */ |
121 | enum { | 127 | enum { |
122 | X86_TRAP_DE = 0, /* 0, Divide-by-zero */ | 128 | X86_TRAP_DE = 0, /* 0, Divide-by-zero */ |
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h new file mode 100644 index 000000000000..c4b6d1cafa46 --- /dev/null +++ b/arch/x86/include/asm/unwind.h | |||
@@ -0,0 +1,73 @@ | |||
1 | #ifndef _ASM_X86_UNWIND_H | ||
2 | #define _ASM_X86_UNWIND_H | ||
3 | |||
4 | #include <linux/sched.h> | ||
5 | #include <linux/ftrace.h> | ||
6 | #include <asm/ptrace.h> | ||
7 | #include <asm/stacktrace.h> | ||
8 | |||
9 | struct unwind_state { | ||
10 | struct stack_info stack_info; | ||
11 | unsigned long stack_mask; | ||
12 | struct task_struct *task; | ||
13 | int graph_idx; | ||
14 | #ifdef CONFIG_FRAME_POINTER | ||
15 | unsigned long *bp; | ||
16 | #else | ||
17 | unsigned long *sp; | ||
18 | #endif | ||
19 | }; | ||
20 | |||
21 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | ||
22 | struct pt_regs *regs, unsigned long *first_frame); | ||
23 | |||
24 | bool unwind_next_frame(struct unwind_state *state); | ||
25 | |||
26 | static inline bool unwind_done(struct unwind_state *state) | ||
27 | { | ||
28 | return state->stack_info.type == STACK_TYPE_UNKNOWN; | ||
29 | } | ||
30 | |||
31 | static inline | ||
32 | void unwind_start(struct unwind_state *state, struct task_struct *task, | ||
33 | struct pt_regs *regs, unsigned long *first_frame) | ||
34 | { | ||
35 | first_frame = first_frame ? : get_stack_pointer(task, regs); | ||
36 | |||
37 | __unwind_start(state, task, regs, first_frame); | ||
38 | } | ||
39 | |||
40 | #ifdef CONFIG_FRAME_POINTER | ||
41 | |||
42 | static inline | ||
43 | unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) | ||
44 | { | ||
45 | if (unwind_done(state)) | ||
46 | return NULL; | ||
47 | |||
48 | return state->bp + 1; | ||
49 | } | ||
50 | |||
51 | unsigned long unwind_get_return_address(struct unwind_state *state); | ||
52 | |||
53 | #else /* !CONFIG_FRAME_POINTER */ | ||
54 | |||
55 | static inline | ||
56 | unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) | ||
57 | { | ||
58 | return NULL; | ||
59 | } | ||
60 | |||
61 | static inline | ||
62 | unsigned long unwind_get_return_address(struct unwind_state *state) | ||
63 | { | ||
64 | if (unwind_done(state)) | ||
65 | return 0; | ||
66 | |||
67 | return ftrace_graph_ret_addr(state->task, &state->graph_idx, | ||
68 | *state->sp, state->sp); | ||
69 | } | ||
70 | |||
71 | #endif /* CONFIG_FRAME_POINTER */ | ||
72 | |||
73 | #endif /* _ASM_X86_UNWIND_H */ | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0503f5bfb18d..45257cf84370 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -125,6 +125,12 @@ obj-$(CONFIG_EFI) += sysfb_efi.o | |||
125 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | 125 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o |
126 | obj-$(CONFIG_TRACING) += tracepoint.o | 126 | obj-$(CONFIG_TRACING) += tracepoint.o |
127 | 127 | ||
128 | ifdef CONFIG_FRAME_POINTER | ||
129 | obj-y += unwind_frame.o | ||
130 | else | ||
131 | obj-y += unwind_guess.o | ||
132 | endif | ||
133 | |||
128 | ### | 134 | ### |
129 | # 64 bit specific files | 135 | # 64 bit specific files |
130 | ifeq ($(CONFIG_X86_64),y) | 136 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index adb3eaf8fe2a..48587335ede8 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -99,7 +99,7 @@ int x86_acpi_suspend_lowlevel(void) | |||
99 | saved_magic = 0x12345678; | 99 | saved_magic = 0x12345678; |
100 | #else /* CONFIG_64BIT */ | 100 | #else /* CONFIG_64BIT */ |
101 | #ifdef CONFIG_SMP | 101 | #ifdef CONFIG_SMP |
102 | stack_start = (unsigned long)temp_stack + sizeof(temp_stack); | 102 | initial_stack = (unsigned long)temp_stack + sizeof(temp_stack); |
103 | early_gdt_descr.address = | 103 | early_gdt_descr.address = |
104 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); | 104 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); |
105 | initial_gs = per_cpu_offset(smp_processor_id()); | 105 | initial_gs = per_cpu_offset(smp_processor_id()); |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 5b2ae106bd4a..8862da76ef6f 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -25,7 +25,7 @@ | |||
25 | static struct apic apic_physflat; | 25 | static struct apic apic_physflat; |
26 | static struct apic apic_flat; | 26 | static struct apic apic_flat; |
27 | 27 | ||
28 | struct apic __read_mostly *apic = &apic_flat; | 28 | struct apic *apic __ro_after_init = &apic_flat; |
29 | EXPORT_SYMBOL_GPL(apic); | 29 | EXPORT_SYMBOL_GPL(apic); |
30 | 30 | ||
31 | static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 31 | static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
@@ -154,7 +154,7 @@ static int flat_probe(void) | |||
154 | return 1; | 154 | return 1; |
155 | } | 155 | } |
156 | 156 | ||
157 | static struct apic apic_flat = { | 157 | static struct apic apic_flat __ro_after_init = { |
158 | .name = "flat", | 158 | .name = "flat", |
159 | .probe = flat_probe, | 159 | .probe = flat_probe, |
160 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, | 160 | .acpi_madt_oem_check = flat_acpi_madt_oem_check, |
@@ -248,7 +248,7 @@ static int physflat_probe(void) | |||
248 | return 0; | 248 | return 0; |
249 | } | 249 | } |
250 | 250 | ||
251 | static struct apic apic_physflat = { | 251 | static struct apic apic_physflat __ro_after_init = { |
252 | 252 | ||
253 | .name = "physical flat", | 253 | .name = "physical flat", |
254 | .probe = physflat_probe, | 254 | .probe = physflat_probe, |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index c05688b2deff..b109e4389c92 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -108,7 +108,7 @@ static void noop_apic_write(u32 reg, u32 v) | |||
108 | WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); | 108 | WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); |
109 | } | 109 | } |
110 | 110 | ||
111 | struct apic apic_noop = { | 111 | struct apic apic_noop __ro_after_init = { |
112 | .name = "noop", | 112 | .name = "noop", |
113 | .probe = noop_probe, | 113 | .probe = noop_probe, |
114 | .acpi_madt_oem_check = NULL, | 114 | .acpi_madt_oem_check = NULL, |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 06dbaa458bfe..56012010332c 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -142,7 +142,7 @@ static int probe_bigsmp(void) | |||
142 | return dmi_bigsmp; | 142 | return dmi_bigsmp; |
143 | } | 143 | } |
144 | 144 | ||
145 | static struct apic apic_bigsmp = { | 145 | static struct apic apic_bigsmp __ro_after_init = { |
146 | 146 | ||
147 | .name = "bigsmp", | 147 | .name = "bigsmp", |
148 | .probe = probe_bigsmp, | 148 | .probe = probe_bigsmp, |
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index ade25320df96..015bbf30e3e3 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c | |||
@@ -269,7 +269,7 @@ static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg) | |||
269 | hpet_msi_write(irq_data_get_irq_handler_data(data), msg); | 269 | hpet_msi_write(irq_data_get_irq_handler_data(data), msg); |
270 | } | 270 | } |
271 | 271 | ||
272 | static struct irq_chip hpet_msi_controller = { | 272 | static struct irq_chip hpet_msi_controller __ro_after_init = { |
273 | .name = "HPET-MSI", | 273 | .name = "HPET-MSI", |
274 | .irq_unmask = hpet_msi_unmask, | 274 | .irq_unmask = hpet_msi_unmask, |
275 | .irq_mask = hpet_msi_mask, | 275 | .irq_mask = hpet_msi_mask, |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 563096267ca2..c48264e202fd 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -72,7 +72,7 @@ static int probe_default(void) | |||
72 | return 1; | 72 | return 1; |
73 | } | 73 | } |
74 | 74 | ||
75 | static struct apic apic_default = { | 75 | static struct apic apic_default __ro_after_init = { |
76 | 76 | ||
77 | .name = "default", | 77 | .name = "default", |
78 | .probe = probe_default, | 78 | .probe = probe_default, |
@@ -126,7 +126,7 @@ static struct apic apic_default = { | |||
126 | 126 | ||
127 | apic_driver(apic_default); | 127 | apic_driver(apic_default); |
128 | 128 | ||
129 | struct apic *apic = &apic_default; | 129 | struct apic *apic __ro_after_init = &apic_default; |
130 | EXPORT_SYMBOL_GPL(apic); | 130 | EXPORT_SYMBOL_GPL(apic); |
131 | 131 | ||
132 | static int cmdline_apic __initdata; | 132 | static int cmdline_apic __initdata; |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 54f35d988025..200af5ae9662 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -227,7 +227,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, | |||
227 | cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); | 227 | cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); |
228 | } | 228 | } |
229 | 229 | ||
230 | static struct apic apic_x2apic_cluster = { | 230 | static struct apic apic_x2apic_cluster __ro_after_init = { |
231 | 231 | ||
232 | .name = "cluster x2apic", | 232 | .name = "cluster x2apic", |
233 | .probe = x2apic_cluster_probe, | 233 | .probe = x2apic_cluster_probe, |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 4f13f54f1b1f..ff111f05a314 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -98,7 +98,7 @@ static int x2apic_phys_probe(void) | |||
98 | return apic == &apic_x2apic_phys; | 98 | return apic == &apic_x2apic_phys; |
99 | } | 99 | } |
100 | 100 | ||
101 | static struct apic apic_x2apic_phys = { | 101 | static struct apic apic_x2apic_phys __ro_after_init = { |
102 | 102 | ||
103 | .name = "physical x2apic", | 103 | .name = "physical x2apic", |
104 | .probe = x2apic_phys_probe, | 104 | .probe = x2apic_phys_probe, |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index cb0673c1e940..b9f6157d4271 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -560,7 +560,7 @@ static int uv_probe(void) | |||
560 | return apic == &apic_x2apic_uv_x; | 560 | return apic == &apic_x2apic_uv_x; |
561 | } | 561 | } |
562 | 562 | ||
563 | static struct apic __refdata apic_x2apic_uv_x = { | 563 | static struct apic apic_x2apic_uv_x __ro_after_init = { |
564 | 564 | ||
565 | .name = "UV large system", | 565 | .name = "UV large system", |
566 | .probe = uv_probe, | 566 | .probe = uv_probe, |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 2bd5c6ff7ee7..c62e015b126c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -29,10 +29,13 @@ | |||
29 | 29 | ||
30 | void common(void) { | 30 | void common(void) { |
31 | BLANK(); | 31 | BLANK(); |
32 | OFFSET(TI_flags, thread_info, flags); | 32 | OFFSET(TASK_threadsp, task_struct, thread.sp); |
33 | OFFSET(TI_status, thread_info, status); | 33 | #ifdef CONFIG_CC_STACKPROTECTOR |
34 | OFFSET(TASK_stack_canary, task_struct, stack_canary); | ||
35 | #endif | ||
34 | 36 | ||
35 | BLANK(); | 37 | BLANK(); |
38 | OFFSET(TASK_TI_flags, task_struct, thread_info.flags); | ||
36 | OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); | 39 | OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); |
37 | 40 | ||
38 | BLANK(); | 41 | BLANK(); |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ecdc1d217dc0..880aa093268d 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -57,6 +57,11 @@ void foo(void) | |||
57 | /* Size of SYSENTER_stack */ | 57 | /* Size of SYSENTER_stack */ |
58 | DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); | 58 | DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); |
59 | 59 | ||
60 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
61 | BLANK(); | ||
62 | OFFSET(stack_canary_offset, stack_canary, canary); | ||
63 | #endif | ||
64 | |||
60 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) | 65 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) |
61 | BLANK(); | 66 | BLANK(); |
62 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 67 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index d875f97d4e0b..210927ee2e74 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -56,6 +56,11 @@ int main(void) | |||
56 | OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); | 56 | OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); |
57 | BLANK(); | 57 | BLANK(); |
58 | 58 | ||
59 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
60 | DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary)); | ||
61 | BLANK(); | ||
62 | #endif | ||
63 | |||
59 | DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); | 64 | DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); |
60 | DEFINE(NR_syscalls, sizeof(syscalls_64)); | 65 | DEFINE(NR_syscalls, sizeof(syscalls_64)); |
61 | 66 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index bcc9ccc220c9..9bd910a7dd0a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1264,9 +1264,14 @@ static __init int setup_disablecpuid(char *arg) | |||
1264 | __setup("clearcpuid=", setup_disablecpuid); | 1264 | __setup("clearcpuid=", setup_disablecpuid); |
1265 | 1265 | ||
1266 | #ifdef CONFIG_X86_64 | 1266 | #ifdef CONFIG_X86_64 |
1267 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; | 1267 | struct desc_ptr idt_descr __ro_after_init = { |
1268 | struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, | 1268 | .size = NR_VECTORS * 16 - 1, |
1269 | (unsigned long) debug_idt_table }; | 1269 | .address = (unsigned long) idt_table, |
1270 | }; | ||
1271 | const struct desc_ptr debug_idt_descr = { | ||
1272 | .size = NR_VECTORS * 16 - 1, | ||
1273 | .address = (unsigned long) debug_idt_table, | ||
1274 | }; | ||
1270 | 1275 | ||
1271 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1276 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
1272 | irq_stack_union) __aligned(PAGE_SIZE) __visible; | 1277 | irq_stack_union) __aligned(PAGE_SIZE) __visible; |
@@ -1280,7 +1285,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = | |||
1280 | EXPORT_PER_CPU_SYMBOL(current_task); | 1285 | EXPORT_PER_CPU_SYMBOL(current_task); |
1281 | 1286 | ||
1282 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 1287 | DEFINE_PER_CPU(char *, irq_stack_ptr) = |
1283 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | 1288 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE; |
1284 | 1289 | ||
1285 | DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; | 1290 | DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; |
1286 | 1291 | ||
@@ -1304,11 +1309,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | |||
1304 | /* May not be marked __init: used by software suspend */ | 1309 | /* May not be marked __init: used by software suspend */ |
1305 | void syscall_init(void) | 1310 | void syscall_init(void) |
1306 | { | 1311 | { |
1307 | /* | ||
1308 | * LSTAR and STAR live in a bit strange symbiosis. | ||
1309 | * They both write to the same internal register. STAR allows to | ||
1310 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | ||
1311 | */ | ||
1312 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); | 1312 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); |
1313 | wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); | 1313 | wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); |
1314 | 1314 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 28f1b54b7fad..24e87e74990d 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -72,14 +72,14 @@ static DEFINE_MUTEX(mtrr_mutex); | |||
72 | u64 size_or_mask, size_and_mask; | 72 | u64 size_or_mask, size_and_mask; |
73 | static bool mtrr_aps_delayed_init; | 73 | static bool mtrr_aps_delayed_init; |
74 | 74 | ||
75 | static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; | 75 | static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init; |
76 | 76 | ||
77 | const struct mtrr_ops *mtrr_if; | 77 | const struct mtrr_ops *mtrr_if; |
78 | 78 | ||
79 | static void set_mtrr(unsigned int reg, unsigned long base, | 79 | static void set_mtrr(unsigned int reg, unsigned long base, |
80 | unsigned long size, mtrr_type type); | 80 | unsigned long size, mtrr_type type); |
81 | 81 | ||
82 | void set_mtrr_ops(const struct mtrr_ops *ops) | 82 | void __init set_mtrr_ops(const struct mtrr_ops *ops) |
83 | { | 83 | { |
84 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) | 84 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) |
85 | mtrr_ops[ops->vendor] = ops; | 85 | mtrr_ops[ops->vendor] = ops; |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 6c7ced07d16d..ad8bd763efa5 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -54,7 +54,7 @@ void fill_mtrr_var_range(unsigned int index, | |||
54 | bool get_mtrr_state(void); | 54 | bool get_mtrr_state(void); |
55 | void mtrr_bp_pat_init(void); | 55 | void mtrr_bp_pat_init(void); |
56 | 56 | ||
57 | extern void set_mtrr_ops(const struct mtrr_ops *ops); | 57 | extern void __init set_mtrr_ops(const struct mtrr_ops *ops); |
58 | 58 | ||
59 | extern u64 size_or_mask, size_and_mask; | 59 | extern u64 size_or_mask, size_and_mask; |
60 | extern const struct mtrr_ops *mtrr_if; | 60 | extern const struct mtrr_ops *mtrr_if; |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 92e8f0a7159c..9b7cf5c28f5f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <linux/sysfs.h> | 17 | #include <linux/sysfs.h> |
18 | 18 | ||
19 | #include <asm/stacktrace.h> | 19 | #include <asm/stacktrace.h> |
20 | 20 | #include <asm/unwind.h> | |
21 | 21 | ||
22 | int panic_on_unrecovered_nmi; | 22 | int panic_on_unrecovered_nmi; |
23 | int panic_on_io_nmi; | 23 | int panic_on_io_nmi; |
@@ -25,11 +25,29 @@ unsigned int code_bytes = 64; | |||
25 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | 25 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; |
26 | static int die_counter; | 26 | static int die_counter; |
27 | 27 | ||
28 | bool in_task_stack(unsigned long *stack, struct task_struct *task, | ||
29 | struct stack_info *info) | ||
30 | { | ||
31 | unsigned long *begin = task_stack_page(task); | ||
32 | unsigned long *end = task_stack_page(task) + THREAD_SIZE; | ||
33 | |||
34 | if (stack < begin || stack >= end) | ||
35 | return false; | ||
36 | |||
37 | info->type = STACK_TYPE_TASK; | ||
38 | info->begin = begin; | ||
39 | info->end = end; | ||
40 | info->next_sp = NULL; | ||
41 | |||
42 | return true; | ||
43 | } | ||
44 | |||
28 | static void printk_stack_address(unsigned long address, int reliable, | 45 | static void printk_stack_address(unsigned long address, int reliable, |
29 | void *data) | 46 | char *log_lvl) |
30 | { | 47 | { |
48 | touch_nmi_watchdog(); | ||
31 | printk("%s [<%p>] %s%pB\n", | 49 | printk("%s [<%p>] %s%pB\n", |
32 | (char *)data, (void *)address, reliable ? "" : "? ", | 50 | log_lvl, (void *)address, reliable ? "" : "? ", |
33 | (void *)address); | 51 | (void *)address); |
34 | } | 52 | } |
35 | 53 | ||
@@ -38,176 +56,120 @@ void printk_address(unsigned long address) | |||
38 | pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address); | 56 | pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address); |
39 | } | 57 | } |
40 | 58 | ||
41 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 59 | void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
42 | static void | 60 | unsigned long *stack, char *log_lvl) |
43 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
44 | const struct stacktrace_ops *ops, | ||
45 | struct task_struct *task, int *graph) | ||
46 | { | 61 | { |
47 | unsigned long ret_addr; | 62 | struct unwind_state state; |
48 | int index; | 63 | struct stack_info stack_info = {0}; |
49 | 64 | unsigned long visit_mask = 0; | |
50 | if (addr != (unsigned long)return_to_handler) | 65 | int graph_idx = 0; |
51 | return; | ||
52 | |||
53 | index = task->curr_ret_stack; | ||
54 | |||
55 | if (!task->ret_stack || index < *graph) | ||
56 | return; | ||
57 | |||
58 | index -= *graph; | ||
59 | ret_addr = task->ret_stack[index].ret; | ||
60 | |||
61 | ops->address(data, ret_addr, 1); | ||
62 | 66 | ||
63 | (*graph)++; | 67 | printk("%sCall Trace:\n", log_lvl); |
64 | } | ||
65 | #else | ||
66 | static inline void | ||
67 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
68 | const struct stacktrace_ops *ops, | ||
69 | struct task_struct *task, int *graph) | ||
70 | { } | ||
71 | #endif | ||
72 | |||
73 | /* | ||
74 | * x86-64 can have up to three kernel stacks: | ||
75 | * process stack | ||
76 | * interrupt stack | ||
77 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
78 | */ | ||
79 | |||
80 | static inline int valid_stack_ptr(struct task_struct *task, | ||
81 | void *p, unsigned int size, void *end) | ||
82 | { | ||
83 | void *t = task_stack_page(task); | ||
84 | if (end) { | ||
85 | if (p < end && p >= (end-THREAD_SIZE)) | ||
86 | return 1; | ||
87 | else | ||
88 | return 0; | ||
89 | } | ||
90 | return p >= t && p < t + THREAD_SIZE - size; | ||
91 | } | ||
92 | 68 | ||
93 | unsigned long | 69 | unwind_start(&state, task, regs, stack); |
94 | print_context_stack(struct task_struct *task, | ||
95 | unsigned long *stack, unsigned long bp, | ||
96 | const struct stacktrace_ops *ops, void *data, | ||
97 | unsigned long *end, int *graph) | ||
98 | { | ||
99 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
100 | 70 | ||
101 | /* | 71 | /* |
102 | * If we overflowed the stack into a guard page, jump back to the | 72 | * Iterate through the stacks, starting with the current stack pointer. |
103 | * bottom of the usable stack. | 73 | * Each stack has a pointer to the next one. |
74 | * | ||
75 | * x86-64 can have several stacks: | ||
76 | * - task stack | ||
77 | * - interrupt stack | ||
78 | * - HW exception stacks (double fault, nmi, debug, mce) | ||
79 | * | ||
80 | * x86-32 can have up to three stacks: | ||
81 | * - task stack | ||
82 | * - softirq stack | ||
83 | * - hardirq stack | ||
104 | */ | 84 | */ |
105 | if ((unsigned long)task_stack_page(task) - (unsigned long)stack < | 85 | for (; stack; stack = stack_info.next_sp) { |
106 | PAGE_SIZE) | 86 | const char *str_begin, *str_end; |
107 | stack = (unsigned long *)task_stack_page(task); | ||
108 | |||
109 | while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { | ||
110 | unsigned long addr; | ||
111 | |||
112 | addr = *stack; | ||
113 | if (__kernel_text_address(addr)) { | ||
114 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
115 | ops->address(data, addr, 1); | ||
116 | frame = frame->next_frame; | ||
117 | bp = (unsigned long) frame; | ||
118 | } else { | ||
119 | ops->address(data, addr, 0); | ||
120 | } | ||
121 | print_ftrace_graph_addr(addr, data, ops, task, graph); | ||
122 | } | ||
123 | stack++; | ||
124 | } | ||
125 | return bp; | ||
126 | } | ||
127 | EXPORT_SYMBOL_GPL(print_context_stack); | ||
128 | |||
129 | unsigned long | ||
130 | print_context_stack_bp(struct task_struct *task, | ||
131 | unsigned long *stack, unsigned long bp, | ||
132 | const struct stacktrace_ops *ops, void *data, | ||
133 | unsigned long *end, int *graph) | ||
134 | { | ||
135 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
136 | unsigned long *ret_addr = &frame->return_address; | ||
137 | 87 | ||
138 | while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { | 88 | /* |
139 | unsigned long addr = *ret_addr; | 89 | * If we overflowed the task stack into a guard page, jump back |
90 | * to the bottom of the usable stack. | ||
91 | */ | ||
92 | if (task_stack_page(task) - (void *)stack < PAGE_SIZE) | ||
93 | stack = task_stack_page(task); | ||
140 | 94 | ||
141 | if (!__kernel_text_address(addr)) | 95 | if (get_stack_info(stack, task, &stack_info, &visit_mask)) |
142 | break; | 96 | break; |
143 | 97 | ||
144 | if (ops->address(data, addr, 1)) | 98 | stack_type_str(stack_info.type, &str_begin, &str_end); |
145 | break; | 99 | if (str_begin) |
146 | frame = frame->next_frame; | 100 | printk("%s <%s> ", log_lvl, str_begin); |
147 | ret_addr = &frame->return_address; | 101 | |
148 | print_ftrace_graph_addr(addr, data, ops, task, graph); | 102 | /* |
149 | } | 103 | * Scan the stack, printing any text addresses we find. At the |
150 | 104 | * same time, follow proper stack frames with the unwinder. | |
151 | return (unsigned long)frame; | 105 | * |
152 | } | 106 | * Addresses found during the scan which are not reported by |
153 | EXPORT_SYMBOL_GPL(print_context_stack_bp); | 107 | * the unwinder are considered to be additional clues which are |
154 | 108 | * sometimes useful for debugging and are prefixed with '?'. | |
155 | static int print_trace_stack(void *data, char *name) | 109 | * This also serves as a failsafe option in case the unwinder |
156 | { | 110 | * goes off in the weeds. |
157 | printk("%s <%s> ", (char *)data, name); | 111 | */ |
158 | return 0; | 112 | for (; stack < stack_info.end; stack++) { |
159 | } | 113 | unsigned long real_addr; |
160 | 114 | int reliable = 0; | |
161 | /* | 115 | unsigned long addr = *stack; |
162 | * Print one address/symbol entries per line. | 116 | unsigned long *ret_addr_p = |
163 | */ | 117 | unwind_get_return_address_ptr(&state); |
164 | static int print_trace_address(void *data, unsigned long addr, int reliable) | 118 | |
165 | { | 119 | if (!__kernel_text_address(addr)) |
166 | touch_nmi_watchdog(); | 120 | continue; |
167 | printk_stack_address(addr, reliable, data); | 121 | |
168 | return 0; | 122 | if (stack == ret_addr_p) |
169 | } | 123 | reliable = 1; |
170 | 124 | ||
171 | static const struct stacktrace_ops print_trace_ops = { | 125 | /* |
172 | .stack = print_trace_stack, | 126 | * When function graph tracing is enabled for a |
173 | .address = print_trace_address, | 127 | * function, its return address on the stack is |
174 | .walk_stack = print_context_stack, | 128 | * replaced with the address of an ftrace handler |
175 | }; | 129 | * (return_to_handler). In that case, before printing |
176 | 130 | * the "real" address, we want to print the handler | |
177 | void | 131 | * address as an "unreliable" hint that function graph |
178 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 132 | * tracing was involved. |
179 | unsigned long *stack, unsigned long bp, char *log_lvl) | 133 | */ |
180 | { | 134 | real_addr = ftrace_graph_ret_addr(task, &graph_idx, |
181 | printk("%sCall Trace:\n", log_lvl); | 135 | addr, stack); |
182 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 136 | if (real_addr != addr) |
183 | } | 137 | printk_stack_address(addr, 0, log_lvl); |
138 | printk_stack_address(real_addr, reliable, log_lvl); | ||
139 | |||
140 | if (!reliable) | ||
141 | continue; | ||
142 | |||
143 | /* | ||
144 | * Get the next frame from the unwinder. No need to | ||
145 | * check for an error: if anything goes wrong, the rest | ||
146 | * of the addresses will just be printed as unreliable. | ||
147 | */ | ||
148 | unwind_next_frame(&state); | ||
149 | } | ||
184 | 150 | ||
185 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 151 | if (str_end) |
186 | unsigned long *stack, unsigned long bp) | 152 | printk("%s <%s> ", log_lvl, str_end); |
187 | { | 153 | } |
188 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
189 | } | 154 | } |
190 | 155 | ||
191 | void show_stack(struct task_struct *task, unsigned long *sp) | 156 | void show_stack(struct task_struct *task, unsigned long *sp) |
192 | { | 157 | { |
193 | unsigned long bp = 0; | 158 | task = task ? : current; |
194 | unsigned long stack; | ||
195 | 159 | ||
196 | /* | 160 | /* |
197 | * Stack frames below this one aren't interesting. Don't show them | 161 | * Stack frames below this one aren't interesting. Don't show them |
198 | * if we're printing for %current. | 162 | * if we're printing for %current. |
199 | */ | 163 | */ |
200 | if (!sp && (!task || task == current)) { | 164 | if (!sp && task == current) |
201 | sp = &stack; | 165 | sp = get_stack_pointer(current, NULL); |
202 | bp = stack_frame(current, NULL); | ||
203 | } | ||
204 | 166 | ||
205 | show_stack_log_lvl(task, NULL, sp, bp, ""); | 167 | show_stack_log_lvl(task, NULL, sp, ""); |
206 | } | 168 | } |
207 | 169 | ||
208 | void show_stack_regs(struct pt_regs *regs) | 170 | void show_stack_regs(struct pt_regs *regs) |
209 | { | 171 | { |
210 | show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, ""); | 172 | show_stack_log_lvl(current, regs, NULL, ""); |
211 | } | 173 | } |
212 | 174 | ||
213 | static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; | 175 | static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 09675712eba8..06eb322b5f9f 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -16,93 +16,121 @@ | |||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | static void *is_irq_stack(void *p, void *irq) | 19 | void stack_type_str(enum stack_type type, const char **begin, const char **end) |
20 | { | 20 | { |
21 | if (p < irq || p >= (irq + THREAD_SIZE)) | 21 | switch (type) { |
22 | return NULL; | 22 | case STACK_TYPE_IRQ: |
23 | return irq + THREAD_SIZE; | 23 | case STACK_TYPE_SOFTIRQ: |
24 | *begin = "IRQ"; | ||
25 | *end = "EOI"; | ||
26 | break; | ||
27 | default: | ||
28 | *begin = NULL; | ||
29 | *end = NULL; | ||
30 | } | ||
24 | } | 31 | } |
25 | 32 | ||
26 | 33 | static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) | |
27 | static void *is_hardirq_stack(unsigned long *stack, int cpu) | ||
28 | { | 34 | { |
29 | void *irq = per_cpu(hardirq_stack, cpu); | 35 | unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack); |
36 | unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); | ||
30 | 37 | ||
31 | return is_irq_stack(stack, irq); | 38 | /* |
32 | } | 39 | * This is a software stack, so 'end' can be a valid stack pointer. |
40 | * It just means the stack is empty. | ||
41 | */ | ||
42 | if (stack < begin || stack > end) | ||
43 | return false; | ||
33 | 44 | ||
34 | static void *is_softirq_stack(unsigned long *stack, int cpu) | 45 | info->type = STACK_TYPE_IRQ; |
35 | { | 46 | info->begin = begin; |
36 | void *irq = per_cpu(softirq_stack, cpu); | 47 | info->end = end; |
37 | 48 | ||
38 | return is_irq_stack(stack, irq); | 49 | /* |
50 | * See irq_32.c -- the next stack pointer is stored at the beginning of | ||
51 | * the stack. | ||
52 | */ | ||
53 | info->next_sp = (unsigned long *)*begin; | ||
54 | |||
55 | return true; | ||
39 | } | 56 | } |
40 | 57 | ||
41 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 58 | static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) |
42 | unsigned long *stack, unsigned long bp, | ||
43 | const struct stacktrace_ops *ops, void *data) | ||
44 | { | 59 | { |
45 | const unsigned cpu = get_cpu(); | 60 | unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack); |
46 | int graph = 0; | 61 | unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); |
47 | u32 *prev_esp; | ||
48 | 62 | ||
49 | if (!task) | 63 | /* |
50 | task = current; | 64 | * This is a software stack, so 'end' can be a valid stack pointer. |
65 | * It just means the stack is empty. | ||
66 | */ | ||
67 | if (stack < begin || stack > end) | ||
68 | return false; | ||
51 | 69 | ||
52 | if (!stack) { | 70 | info->type = STACK_TYPE_SOFTIRQ; |
53 | unsigned long dummy; | 71 | info->begin = begin; |
72 | info->end = end; | ||
54 | 73 | ||
55 | stack = &dummy; | 74 | /* |
56 | if (task != current) | 75 | * The next stack pointer is stored at the beginning of the stack. |
57 | stack = (unsigned long *)task->thread.sp; | 76 | * See irq_32.c. |
58 | } | 77 | */ |
78 | info->next_sp = (unsigned long *)*begin; | ||
59 | 79 | ||
60 | if (!bp) | 80 | return true; |
61 | bp = stack_frame(task, regs); | 81 | } |
62 | 82 | ||
63 | for (;;) { | 83 | int get_stack_info(unsigned long *stack, struct task_struct *task, |
64 | void *end_stack; | 84 | struct stack_info *info, unsigned long *visit_mask) |
85 | { | ||
86 | if (!stack) | ||
87 | goto unknown; | ||
65 | 88 | ||
66 | end_stack = is_hardirq_stack(stack, cpu); | 89 | task = task ? : current; |
67 | if (!end_stack) | ||
68 | end_stack = is_softirq_stack(stack, cpu); | ||
69 | 90 | ||
70 | bp = ops->walk_stack(task, stack, bp, ops, data, | 91 | if (in_task_stack(stack, task, info)) |
71 | end_stack, &graph); | 92 | goto recursion_check; |
72 | 93 | ||
73 | /* Stop if not on irq stack */ | 94 | if (task != current) |
74 | if (!end_stack) | 95 | goto unknown; |
75 | break; | ||
76 | 96 | ||
77 | /* The previous esp is saved on the bottom of the stack */ | 97 | if (in_hardirq_stack(stack, info)) |
78 | prev_esp = (u32 *)(end_stack - THREAD_SIZE); | 98 | goto recursion_check; |
79 | stack = (unsigned long *)*prev_esp; | ||
80 | if (!stack) | ||
81 | break; | ||
82 | 99 | ||
83 | if (ops->stack(data, "IRQ") < 0) | 100 | if (in_softirq_stack(stack, info)) |
84 | break; | 101 | goto recursion_check; |
85 | touch_nmi_watchdog(); | 102 | |
103 | goto unknown; | ||
104 | |||
105 | recursion_check: | ||
106 | /* | ||
107 | * Make sure we don't iterate through any given stack more than once. | ||
108 | * If it comes up a second time then there's something wrong going on: | ||
109 | * just break out and report an unknown stack type. | ||
110 | */ | ||
111 | if (visit_mask) { | ||
112 | if (*visit_mask & (1UL << info->type)) | ||
113 | goto unknown; | ||
114 | *visit_mask |= 1UL << info->type; | ||
86 | } | 115 | } |
87 | put_cpu(); | 116 | |
117 | return 0; | ||
118 | |||
119 | unknown: | ||
120 | info->type = STACK_TYPE_UNKNOWN; | ||
121 | return -EINVAL; | ||
88 | } | 122 | } |
89 | EXPORT_SYMBOL(dump_trace); | ||
90 | 123 | ||
91 | void | 124 | void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
92 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 125 | unsigned long *sp, char *log_lvl) |
93 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
94 | { | 126 | { |
95 | unsigned long *stack; | 127 | unsigned long *stack; |
96 | int i; | 128 | int i; |
97 | 129 | ||
98 | if (sp == NULL) { | 130 | if (!try_get_task_stack(task)) |
99 | if (regs) | 131 | return; |
100 | sp = (unsigned long *)regs->sp; | 132 | |
101 | else if (task) | 133 | sp = sp ? : get_stack_pointer(task, regs); |
102 | sp = (unsigned long *)task->thread.sp; | ||
103 | else | ||
104 | sp = (unsigned long *)&sp; | ||
105 | } | ||
106 | 134 | ||
107 | stack = sp; | 135 | stack = sp; |
108 | for (i = 0; i < kstack_depth_to_print; i++) { | 136 | for (i = 0; i < kstack_depth_to_print; i++) { |
@@ -117,7 +145,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
117 | touch_nmi_watchdog(); | 145 | touch_nmi_watchdog(); |
118 | } | 146 | } |
119 | pr_cont("\n"); | 147 | pr_cont("\n"); |
120 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 148 | show_trace_log_lvl(task, regs, sp, log_lvl); |
149 | |||
150 | put_task_stack(task); | ||
121 | } | 151 | } |
122 | 152 | ||
123 | 153 | ||
@@ -139,7 +169,7 @@ void show_regs(struct pt_regs *regs) | |||
139 | u8 *ip; | 169 | u8 *ip; |
140 | 170 | ||
141 | pr_emerg("Stack:\n"); | 171 | pr_emerg("Stack:\n"); |
142 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); | 172 | show_stack_log_lvl(current, regs, NULL, KERN_EMERG); |
143 | 173 | ||
144 | pr_emerg("Code:"); | 174 | pr_emerg("Code:"); |
145 | 175 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 9ee4520ce83c..36cf1a498227 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -16,261 +16,145 @@ | |||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | static char *exception_stack_names[N_EXCEPTION_STACKS] = { | ||
20 | [ DOUBLEFAULT_STACK-1 ] = "#DF", | ||
21 | [ NMI_STACK-1 ] = "NMI", | ||
22 | [ DEBUG_STACK-1 ] = "#DB", | ||
23 | [ MCE_STACK-1 ] = "#MC", | ||
24 | }; | ||
19 | 25 | ||
20 | #define N_EXCEPTION_STACKS_END \ | 26 | static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = { |
21 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) | 27 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, |
22 | 28 | [DEBUG_STACK - 1] = DEBUG_STKSZ | |
23 | static char x86_stack_ids[][8] = { | ||
24 | [ DEBUG_STACK-1 ] = "#DB", | ||
25 | [ NMI_STACK-1 ] = "NMI", | ||
26 | [ DOUBLEFAULT_STACK-1 ] = "#DF", | ||
27 | [ MCE_STACK-1 ] = "#MC", | ||
28 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
29 | [ N_EXCEPTION_STACKS ... | ||
30 | N_EXCEPTION_STACKS_END ] = "#DB[?]" | ||
31 | #endif | ||
32 | }; | 29 | }; |
33 | 30 | ||
34 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 31 | void stack_type_str(enum stack_type type, const char **begin, const char **end) |
35 | unsigned *usedp, char **idp) | ||
36 | { | 32 | { |
37 | unsigned k; | 33 | BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); |
38 | 34 | ||
39 | /* | 35 | switch (type) { |
40 | * Iterate over all exception stacks, and figure out whether | 36 | case STACK_TYPE_IRQ: |
41 | * 'stack' is in one of them: | 37 | *begin = "IRQ"; |
42 | */ | 38 | *end = "EOI"; |
43 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | 39 | break; |
44 | unsigned long end = per_cpu(orig_ist, cpu).ist[k]; | 40 | case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST: |
45 | /* | 41 | *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION]; |
46 | * Is 'stack' above this exception frame's end? | 42 | *end = "EOE"; |
47 | * If yes then skip to the next frame. | 43 | break; |
48 | */ | 44 | default: |
49 | if (stack >= end) | 45 | *begin = NULL; |
50 | continue; | 46 | *end = NULL; |
51 | /* | ||
52 | * Is 'stack' above this exception frame's start address? | ||
53 | * If yes then we found the right frame. | ||
54 | */ | ||
55 | if (stack >= end - EXCEPTION_STKSZ) { | ||
56 | /* | ||
57 | * Make sure we only iterate through an exception | ||
58 | * stack once. If it comes up for the second time | ||
59 | * then there's something wrong going on - just | ||
60 | * break out and return NULL: | ||
61 | */ | ||
62 | if (*usedp & (1U << k)) | ||
63 | break; | ||
64 | *usedp |= 1U << k; | ||
65 | *idp = x86_stack_ids[k]; | ||
66 | return (unsigned long *)end; | ||
67 | } | ||
68 | /* | ||
69 | * If this is a debug stack, and if it has a larger size than | ||
70 | * the usual exception stacks, then 'stack' might still | ||
71 | * be within the lower portion of the debug stack: | ||
72 | */ | ||
73 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
74 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | ||
75 | unsigned j = N_EXCEPTION_STACKS - 1; | ||
76 | |||
77 | /* | ||
78 | * Black magic. A large debug stack is composed of | ||
79 | * multiple exception stack entries, which we | ||
80 | * iterate through now. Dont look: | ||
81 | */ | ||
82 | do { | ||
83 | ++j; | ||
84 | end -= EXCEPTION_STKSZ; | ||
85 | x86_stack_ids[j][4] = '1' + | ||
86 | (j - N_EXCEPTION_STACKS); | ||
87 | } while (stack < end - EXCEPTION_STKSZ); | ||
88 | if (*usedp & (1U << j)) | ||
89 | break; | ||
90 | *usedp |= 1U << j; | ||
91 | *idp = x86_stack_ids[j]; | ||
92 | return (unsigned long *)end; | ||
93 | } | ||
94 | #endif | ||
95 | } | 47 | } |
96 | return NULL; | ||
97 | } | 48 | } |
98 | 49 | ||
99 | static inline int | 50 | static bool in_exception_stack(unsigned long *stack, struct stack_info *info) |
100 | in_irq_stack(unsigned long *stack, unsigned long *irq_stack, | ||
101 | unsigned long *irq_stack_end) | ||
102 | { | 51 | { |
103 | return (stack >= irq_stack && stack < irq_stack_end); | 52 | unsigned long *begin, *end; |
104 | } | 53 | struct pt_regs *regs; |
105 | 54 | unsigned k; | |
106 | static const unsigned long irq_stack_size = | ||
107 | (IRQ_STACK_SIZE - 64) / sizeof(unsigned long); | ||
108 | |||
109 | enum stack_type { | ||
110 | STACK_IS_UNKNOWN, | ||
111 | STACK_IS_NORMAL, | ||
112 | STACK_IS_EXCEPTION, | ||
113 | STACK_IS_IRQ, | ||
114 | }; | ||
115 | |||
116 | static enum stack_type | ||
117 | analyze_stack(int cpu, struct task_struct *task, unsigned long *stack, | ||
118 | unsigned long **stack_end, unsigned long *irq_stack, | ||
119 | unsigned *used, char **id) | ||
120 | { | ||
121 | unsigned long addr; | ||
122 | 55 | ||
123 | addr = ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 56 | BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); |
124 | if ((unsigned long)task_stack_page(task) == addr) | ||
125 | return STACK_IS_NORMAL; | ||
126 | 57 | ||
127 | *stack_end = in_exception_stack(cpu, (unsigned long)stack, | 58 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
128 | used, id); | 59 | end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k]; |
129 | if (*stack_end) | 60 | begin = end - (exception_stack_sizes[k] / sizeof(long)); |
130 | return STACK_IS_EXCEPTION; | 61 | regs = (struct pt_regs *)end - 1; |
131 | 62 | ||
132 | if (!irq_stack) | 63 | if (stack < begin || stack >= end) |
133 | return STACK_IS_NORMAL; | 64 | continue; |
134 | 65 | ||
135 | *stack_end = irq_stack; | 66 | info->type = STACK_TYPE_EXCEPTION + k; |
136 | irq_stack = irq_stack - irq_stack_size; | 67 | info->begin = begin; |
68 | info->end = end; | ||
69 | info->next_sp = (unsigned long *)regs->sp; | ||
137 | 70 | ||
138 | if (in_irq_stack(stack, irq_stack, *stack_end)) | 71 | return true; |
139 | return STACK_IS_IRQ; | 72 | } |
140 | 73 | ||
141 | return STACK_IS_UNKNOWN; | 74 | return false; |
142 | } | 75 | } |
143 | 76 | ||
144 | /* | 77 | static bool in_irq_stack(unsigned long *stack, struct stack_info *info) |
145 | * x86-64 can have up to three kernel stacks: | ||
146 | * process stack | ||
147 | * interrupt stack | ||
148 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
149 | */ | ||
150 | |||
151 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
152 | unsigned long *stack, unsigned long bp, | ||
153 | const struct stacktrace_ops *ops, void *data) | ||
154 | { | 78 | { |
155 | const unsigned cpu = get_cpu(); | 79 | unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr); |
156 | unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); | 80 | unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long)); |
157 | unsigned long dummy; | ||
158 | unsigned used = 0; | ||
159 | int graph = 0; | ||
160 | int done = 0; | ||
161 | |||
162 | if (!task) | ||
163 | task = current; | ||
164 | |||
165 | if (!stack) { | ||
166 | if (regs) | ||
167 | stack = (unsigned long *)regs->sp; | ||
168 | else if (task != current) | ||
169 | stack = (unsigned long *)task->thread.sp; | ||
170 | else | ||
171 | stack = &dummy; | ||
172 | } | ||
173 | 81 | ||
174 | if (!bp) | ||
175 | bp = stack_frame(task, regs); | ||
176 | /* | 82 | /* |
177 | * Print function call entries in all stacks, starting at the | 83 | * This is a software stack, so 'end' can be a valid stack pointer. |
178 | * current stack address. If the stacks consist of nested | 84 | * It just means the stack is empty. |
179 | * exceptions | ||
180 | */ | 85 | */ |
181 | while (!done) { | 86 | if (stack < begin || stack > end) |
182 | unsigned long *stack_end; | 87 | return false; |
183 | enum stack_type stype; | ||
184 | char *id; | ||
185 | 88 | ||
186 | stype = analyze_stack(cpu, task, stack, &stack_end, | 89 | info->type = STACK_TYPE_IRQ; |
187 | irq_stack, &used, &id); | 90 | info->begin = begin; |
91 | info->end = end; | ||
188 | 92 | ||
189 | /* Default finish unless specified to continue */ | 93 | /* |
190 | done = 1; | 94 | * The next stack pointer is the first thing pushed by the entry code |
95 | * after switching to the irq stack. | ||
96 | */ | ||
97 | info->next_sp = (unsigned long *)*(end - 1); | ||
191 | 98 | ||
192 | switch (stype) { | 99 | return true; |
100 | } | ||
193 | 101 | ||
194 | /* Break out early if we are on the thread stack */ | 102 | int get_stack_info(unsigned long *stack, struct task_struct *task, |
195 | case STACK_IS_NORMAL: | 103 | struct stack_info *info, unsigned long *visit_mask) |
196 | break; | 104 | { |
105 | if (!stack) | ||
106 | goto unknown; | ||
197 | 107 | ||
198 | case STACK_IS_EXCEPTION: | 108 | task = task ? : current; |
199 | 109 | ||
200 | if (ops->stack(data, id) < 0) | 110 | if (in_task_stack(stack, task, info)) |
201 | break; | 111 | goto recursion_check; |
202 | 112 | ||
203 | bp = ops->walk_stack(task, stack, bp, ops, | 113 | if (task != current) |
204 | data, stack_end, &graph); | 114 | goto unknown; |
205 | ops->stack(data, "<EOE>"); | ||
206 | /* | ||
207 | * We link to the next stack via the | ||
208 | * second-to-last pointer (index -2 to end) in the | ||
209 | * exception stack: | ||
210 | */ | ||
211 | stack = (unsigned long *) stack_end[-2]; | ||
212 | done = 0; | ||
213 | break; | ||
214 | 115 | ||
215 | case STACK_IS_IRQ: | 116 | if (in_exception_stack(stack, info)) |
117 | goto recursion_check; | ||
216 | 118 | ||
217 | if (ops->stack(data, "IRQ") < 0) | 119 | if (in_irq_stack(stack, info)) |
218 | break; | 120 | goto recursion_check; |
219 | bp = ops->walk_stack(task, stack, bp, | ||
220 | ops, data, stack_end, &graph); | ||
221 | /* | ||
222 | * We link to the next stack (which would be | ||
223 | * the process stack normally) the last | ||
224 | * pointer (index -1 to end) in the IRQ stack: | ||
225 | */ | ||
226 | stack = (unsigned long *) (stack_end[-1]); | ||
227 | irq_stack = NULL; | ||
228 | ops->stack(data, "EOI"); | ||
229 | done = 0; | ||
230 | break; | ||
231 | 121 | ||
232 | case STACK_IS_UNKNOWN: | 122 | goto unknown; |
233 | ops->stack(data, "UNK"); | ||
234 | break; | ||
235 | } | ||
236 | } | ||
237 | 123 | ||
124 | recursion_check: | ||
238 | /* | 125 | /* |
239 | * This handles the process stack: | 126 | * Make sure we don't iterate through any given stack more than once. |
127 | * If it comes up a second time then there's something wrong going on: | ||
128 | * just break out and report an unknown stack type. | ||
240 | */ | 129 | */ |
241 | bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); | 130 | if (visit_mask) { |
242 | put_cpu(); | 131 | if (*visit_mask & (1UL << info->type)) |
132 | goto unknown; | ||
133 | *visit_mask |= 1UL << info->type; | ||
134 | } | ||
135 | |||
136 | return 0; | ||
137 | |||
138 | unknown: | ||
139 | info->type = STACK_TYPE_UNKNOWN; | ||
140 | return -EINVAL; | ||
243 | } | 141 | } |
244 | EXPORT_SYMBOL(dump_trace); | ||
245 | 142 | ||
246 | void | 143 | void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
247 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 144 | unsigned long *sp, char *log_lvl) |
248 | unsigned long *sp, unsigned long bp, char *log_lvl) | ||
249 | { | 145 | { |
250 | unsigned long *irq_stack_end; | 146 | unsigned long *irq_stack_end; |
251 | unsigned long *irq_stack; | 147 | unsigned long *irq_stack; |
252 | unsigned long *stack; | 148 | unsigned long *stack; |
253 | int cpu; | ||
254 | int i; | 149 | int i; |
255 | 150 | ||
256 | preempt_disable(); | 151 | if (!try_get_task_stack(task)) |
257 | cpu = smp_processor_id(); | 152 | return; |
258 | 153 | ||
259 | irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); | 154 | irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr); |
260 | irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); | 155 | irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long)); |
261 | 156 | ||
262 | /* | 157 | sp = sp ? : get_stack_pointer(task, regs); |
263 | * Debugging aid: "show_stack(NULL, NULL);" prints the | ||
264 | * back trace for this cpu: | ||
265 | */ | ||
266 | if (sp == NULL) { | ||
267 | if (regs) | ||
268 | sp = (unsigned long *)regs->sp; | ||
269 | else if (task) | ||
270 | sp = (unsigned long *)task->thread.sp; | ||
271 | else | ||
272 | sp = (unsigned long *)&sp; | ||
273 | } | ||
274 | 158 | ||
275 | stack = sp; | 159 | stack = sp; |
276 | for (i = 0; i < kstack_depth_to_print; i++) { | 160 | for (i = 0; i < kstack_depth_to_print; i++) { |
@@ -299,18 +183,17 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
299 | stack++; | 183 | stack++; |
300 | touch_nmi_watchdog(); | 184 | touch_nmi_watchdog(); |
301 | } | 185 | } |
302 | preempt_enable(); | ||
303 | 186 | ||
304 | pr_cont("\n"); | 187 | pr_cont("\n"); |
305 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 188 | show_trace_log_lvl(task, regs, sp, log_lvl); |
189 | |||
190 | put_task_stack(task); | ||
306 | } | 191 | } |
307 | 192 | ||
308 | void show_regs(struct pt_regs *regs) | 193 | void show_regs(struct pt_regs *regs) |
309 | { | 194 | { |
310 | int i; | 195 | int i; |
311 | unsigned long sp; | ||
312 | 196 | ||
313 | sp = regs->sp; | ||
314 | show_regs_print_info(KERN_DEFAULT); | 197 | show_regs_print_info(KERN_DEFAULT); |
315 | __show_regs(regs, 1); | 198 | __show_regs(regs, 1); |
316 | 199 | ||
@@ -325,8 +208,7 @@ void show_regs(struct pt_regs *regs) | |||
325 | u8 *ip; | 208 | u8 *ip; |
326 | 209 | ||
327 | printk(KERN_DEFAULT "Stack:\n"); | 210 | printk(KERN_DEFAULT "Stack:\n"); |
328 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | 211 | show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT); |
329 | 0, KERN_DEFAULT); | ||
330 | 212 | ||
331 | printk(KERN_DEFAULT "Code: "); | 213 | printk(KERN_DEFAULT "Code: "); |
332 | 214 | ||
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 93982aebb398..2f2b8c7ccb85 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void) | |||
317 | on_boot_cpu = 0; | 317 | on_boot_cpu = 0; |
318 | 318 | ||
319 | WARN_ON_FPU(current->thread.fpu.fpstate_active); | 319 | WARN_ON_FPU(current->thread.fpu.fpstate_active); |
320 | current_thread_info()->status = 0; | ||
321 | 320 | ||
322 | if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) | 321 | if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) |
323 | eagerfpu = ENABLE; | 322 | eagerfpu = ENABLE; |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d036cfb4495d..8639bb2ae058 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -1029,7 +1029,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, | |||
1029 | } | 1029 | } |
1030 | 1030 | ||
1031 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, | 1031 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, |
1032 | frame_pointer) == -EBUSY) { | 1032 | frame_pointer, parent) == -EBUSY) { |
1033 | *parent = old; | 1033 | *parent = old; |
1034 | return; | 1034 | return; |
1035 | } | 1035 | } |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 6f8902b0d151..5f401262f12d 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -94,7 +94,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) | |||
94 | */ | 94 | */ |
95 | __HEAD | 95 | __HEAD |
96 | ENTRY(startup_32) | 96 | ENTRY(startup_32) |
97 | movl pa(stack_start),%ecx | 97 | movl pa(initial_stack),%ecx |
98 | 98 | ||
99 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | 99 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking |
100 | us to not reload segments */ | 100 | us to not reload segments */ |
@@ -286,7 +286,7 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
286 | * start_secondary(). | 286 | * start_secondary(). |
287 | */ | 287 | */ |
288 | ENTRY(start_cpu0) | 288 | ENTRY(start_cpu0) |
289 | movl stack_start, %ecx | 289 | movl initial_stack, %ecx |
290 | movl %ecx, %esp | 290 | movl %ecx, %esp |
291 | jmp *(initial_code) | 291 | jmp *(initial_code) |
292 | ENDPROC(start_cpu0) | 292 | ENDPROC(start_cpu0) |
@@ -307,7 +307,7 @@ ENTRY(startup_32_smp) | |||
307 | movl %eax,%es | 307 | movl %eax,%es |
308 | movl %eax,%fs | 308 | movl %eax,%fs |
309 | movl %eax,%gs | 309 | movl %eax,%gs |
310 | movl pa(stack_start),%ecx | 310 | movl pa(initial_stack),%ecx |
311 | movl %eax,%ss | 311 | movl %eax,%ss |
312 | leal -__PAGE_OFFSET(%ecx),%esp | 312 | leal -__PAGE_OFFSET(%ecx),%esp |
313 | 313 | ||
@@ -703,7 +703,7 @@ ENTRY(initial_page_table) | |||
703 | 703 | ||
704 | .data | 704 | .data |
705 | .balign 4 | 705 | .balign 4 |
706 | ENTRY(stack_start) | 706 | ENTRY(initial_stack) |
707 | .long init_thread_union+THREAD_SIZE | 707 | .long init_thread_union+THREAD_SIZE |
708 | 708 | ||
709 | __INITRODATA | 709 | __INITRODATA |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9f8efc9f0075..c98a559c346e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -66,7 +66,7 @@ startup_64: | |||
66 | */ | 66 | */ |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Setup stack for verify_cpu(). "-8" because stack_start is defined | 69 | * Setup stack for verify_cpu(). "-8" because initial_stack is defined |
70 | * this way, see below. Our best guess is a NULL ptr for stack | 70 | * this way, see below. Our best guess is a NULL ptr for stack |
71 | * termination heuristics and we don't want to break anything which | 71 | * termination heuristics and we don't want to break anything which |
72 | * might depend on it (kgdb, ...). | 72 | * might depend on it (kgdb, ...). |
@@ -226,7 +226,7 @@ ENTRY(secondary_startup_64) | |||
226 | movq %rax, %cr0 | 226 | movq %rax, %cr0 |
227 | 227 | ||
228 | /* Setup a boot time stack */ | 228 | /* Setup a boot time stack */ |
229 | movq stack_start(%rip), %rsp | 229 | movq initial_stack(%rip), %rsp |
230 | 230 | ||
231 | /* zero EFLAGS after setting rsp */ | 231 | /* zero EFLAGS after setting rsp */ |
232 | pushq $0 | 232 | pushq $0 |
@@ -310,7 +310,7 @@ ENDPROC(secondary_startup_64) | |||
310 | * start_secondary(). | 310 | * start_secondary(). |
311 | */ | 311 | */ |
312 | ENTRY(start_cpu0) | 312 | ENTRY(start_cpu0) |
313 | movq stack_start(%rip),%rsp | 313 | movq initial_stack(%rip),%rsp |
314 | movq initial_code(%rip),%rax | 314 | movq initial_code(%rip),%rax |
315 | pushq $0 # fake return address to stop unwinder | 315 | pushq $0 # fake return address to stop unwinder |
316 | pushq $__KERNEL_CS # set correct cs | 316 | pushq $__KERNEL_CS # set correct cs |
@@ -319,17 +319,15 @@ ENTRY(start_cpu0) | |||
319 | ENDPROC(start_cpu0) | 319 | ENDPROC(start_cpu0) |
320 | #endif | 320 | #endif |
321 | 321 | ||
322 | /* SMP bootup changes these two */ | 322 | /* Both SMP bootup and ACPI suspend change these variables */ |
323 | __REFDATA | 323 | __REFDATA |
324 | .balign 8 | 324 | .balign 8 |
325 | GLOBAL(initial_code) | 325 | GLOBAL(initial_code) |
326 | .quad x86_64_start_kernel | 326 | .quad x86_64_start_kernel |
327 | GLOBAL(initial_gs) | 327 | GLOBAL(initial_gs) |
328 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 328 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
329 | 329 | GLOBAL(initial_stack) | |
330 | GLOBAL(stack_start) | ||
331 | .quad init_thread_union+THREAD_SIZE-8 | 330 | .quad init_thread_union+THREAD_SIZE-8 |
332 | .word 0 | ||
333 | __FINITDATA | 331 | __FINITDATA |
334 | 332 | ||
335 | bad_address: | 333 | bad_address: |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4a7903714065..9ebd0b0e73d9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) | |||
40 | if (user_mode(regs)) | 40 | if (user_mode(regs)) |
41 | return; | 41 | return; |
42 | 42 | ||
43 | if (regs->sp >= curbase + sizeof(struct thread_info) + | 43 | if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && |
44 | sizeof(struct pt_regs) + STACK_TOP_MARGIN && | ||
45 | regs->sp <= curbase + THREAD_SIZE) | 44 | regs->sp <= curbase + THREAD_SIZE) |
46 | return; | 45 | return; |
47 | 46 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 04cde527d728..8e36f249646e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <asm/apicdef.h> | 50 | #include <asm/apicdef.h> |
51 | #include <asm/apic.h> | 51 | #include <asm/apic.h> |
52 | #include <asm/nmi.h> | 52 | #include <asm/nmi.h> |
53 | #include <asm/switch_to.h> | ||
53 | 54 | ||
54 | struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = | 55 | struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = |
55 | { | 56 | { |
@@ -166,21 +167,19 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
166 | gdb_regs[GDB_DX] = 0; | 167 | gdb_regs[GDB_DX] = 0; |
167 | gdb_regs[GDB_SI] = 0; | 168 | gdb_regs[GDB_SI] = 0; |
168 | gdb_regs[GDB_DI] = 0; | 169 | gdb_regs[GDB_DI] = 0; |
169 | gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp; | 170 | gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp; |
170 | #ifdef CONFIG_X86_32 | 171 | #ifdef CONFIG_X86_32 |
171 | gdb_regs[GDB_DS] = __KERNEL_DS; | 172 | gdb_regs[GDB_DS] = __KERNEL_DS; |
172 | gdb_regs[GDB_ES] = __KERNEL_DS; | 173 | gdb_regs[GDB_ES] = __KERNEL_DS; |
173 | gdb_regs[GDB_PS] = 0; | 174 | gdb_regs[GDB_PS] = 0; |
174 | gdb_regs[GDB_CS] = __KERNEL_CS; | 175 | gdb_regs[GDB_CS] = __KERNEL_CS; |
175 | gdb_regs[GDB_PC] = p->thread.ip; | ||
176 | gdb_regs[GDB_SS] = __KERNEL_DS; | 176 | gdb_regs[GDB_SS] = __KERNEL_DS; |
177 | gdb_regs[GDB_FS] = 0xFFFF; | 177 | gdb_regs[GDB_FS] = 0xFFFF; |
178 | gdb_regs[GDB_GS] = 0xFFFF; | 178 | gdb_regs[GDB_GS] = 0xFFFF; |
179 | #else | 179 | #else |
180 | gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); | 180 | gdb_regs32[GDB_PS] = 0; |
181 | gdb_regs32[GDB_CS] = __KERNEL_CS; | 181 | gdb_regs32[GDB_CS] = __KERNEL_CS; |
182 | gdb_regs32[GDB_SS] = __KERNEL_DS; | 182 | gdb_regs32[GDB_SS] = __KERNEL_DS; |
183 | gdb_regs[GDB_PC] = 0; | ||
184 | gdb_regs[GDB_R8] = 0; | 183 | gdb_regs[GDB_R8] = 0; |
185 | gdb_regs[GDB_R9] = 0; | 184 | gdb_regs[GDB_R9] = 0; |
186 | gdb_regs[GDB_R10] = 0; | 185 | gdb_regs[GDB_R10] = 0; |
@@ -190,6 +189,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
190 | gdb_regs[GDB_R14] = 0; | 189 | gdb_regs[GDB_R14] = 0; |
191 | gdb_regs[GDB_R15] = 0; | 190 | gdb_regs[GDB_R15] = 0; |
192 | #endif | 191 | #endif |
192 | gdb_regs[GDB_PC] = 0; | ||
193 | gdb_regs[GDB_SP] = p->thread.sp; | 193 | gdb_regs[GDB_SP] = p->thread.sp; |
194 | } | 194 | } |
195 | 195 | ||
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index c2bedaea11f7..4afc67f5facc 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c | |||
@@ -184,7 +184,7 @@ out: | |||
184 | 184 | ||
185 | static struct kobj_attribute type_attr = __ATTR_RO(type); | 185 | static struct kobj_attribute type_attr = __ATTR_RO(type); |
186 | 186 | ||
187 | static struct bin_attribute data_attr = { | 187 | static struct bin_attribute data_attr __ro_after_init = { |
188 | .attr = { | 188 | .attr = { |
189 | .name = "data", | 189 | .name = "data", |
190 | .mode = S_IRUGO, | 190 | .mode = S_IRUGO, |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 3692249a70f1..60b9949f1e65 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <asm/x86_init.h> | 29 | #include <asm/x86_init.h> |
30 | #include <asm/reboot.h> | 30 | #include <asm/reboot.h> |
31 | 31 | ||
32 | static int kvmclock = 1; | 32 | static int kvmclock __ro_after_init = 1; |
33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | 33 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; |
34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | 34 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; |
35 | static cycle_t kvm_sched_clock_offset; | 35 | static cycle_t kvm_sched_clock_offset; |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1acfd76e3e26..bbf3d5933eaa 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = { | |||
332 | .read_cr0 = native_read_cr0, | 332 | .read_cr0 = native_read_cr0, |
333 | .write_cr0 = native_write_cr0, | 333 | .write_cr0 = native_write_cr0, |
334 | .read_cr4 = native_read_cr4, | 334 | .read_cr4 = native_read_cr4, |
335 | .read_cr4_safe = native_read_cr4_safe, | ||
336 | .write_cr4 = native_write_cr4, | 335 | .write_cr4 = native_write_cr4, |
337 | #ifdef CONFIG_X86_64 | 336 | #ifdef CONFIG_X86_64 |
338 | .read_cr8 = native_read_cr8, | 337 | .read_cr8 = native_read_cr8, |
@@ -389,7 +388,7 @@ NOKPROBE_SYMBOL(native_load_idt); | |||
389 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) | 388 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) |
390 | #endif | 389 | #endif |
391 | 390 | ||
392 | struct pv_mmu_ops pv_mmu_ops = { | 391 | struct pv_mmu_ops pv_mmu_ops __ro_after_init = { |
393 | 392 | ||
394 | .read_cr2 = native_read_cr2, | 393 | .read_cr2 = native_read_cr2, |
395 | .write_cr2 = native_write_cr2, | 394 | .write_cr2 = native_write_cr2, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 62c0b0ea2ce4..4002b475171c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/tlbflush.h> | 32 | #include <asm/tlbflush.h> |
33 | #include <asm/mce.h> | 33 | #include <asm/mce.h> |
34 | #include <asm/vm86.h> | 34 | #include <asm/vm86.h> |
35 | #include <asm/switch_to.h> | ||
35 | 36 | ||
36 | /* | 37 | /* |
37 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, | 38 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, |
@@ -513,6 +514,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) | |||
513 | } | 514 | } |
514 | 515 | ||
515 | /* | 516 | /* |
517 | * Return saved PC of a blocked thread. | ||
518 | * What is this good for? it will be always the scheduler or ret_from_fork. | ||
519 | */ | ||
520 | unsigned long thread_saved_pc(struct task_struct *tsk) | ||
521 | { | ||
522 | struct inactive_task_frame *frame = | ||
523 | (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp); | ||
524 | return READ_ONCE_NOCHECK(frame->ret_addr); | ||
525 | } | ||
526 | |||
527 | /* | ||
516 | * Called from fs/proc with a reference on @p to find the function | 528 | * Called from fs/proc with a reference on @p to find the function |
517 | * which called into schedule(). This needs to be done carefully | 529 | * which called into schedule(). This needs to be done carefully |
518 | * because the task might wake up and we might look at a stack | 530 | * because the task might wake up and we might look at a stack |
@@ -520,15 +532,18 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) | |||
520 | */ | 532 | */ |
521 | unsigned long get_wchan(struct task_struct *p) | 533 | unsigned long get_wchan(struct task_struct *p) |
522 | { | 534 | { |
523 | unsigned long start, bottom, top, sp, fp, ip; | 535 | unsigned long start, bottom, top, sp, fp, ip, ret = 0; |
524 | int count = 0; | 536 | int count = 0; |
525 | 537 | ||
526 | if (!p || p == current || p->state == TASK_RUNNING) | 538 | if (!p || p == current || p->state == TASK_RUNNING) |
527 | return 0; | 539 | return 0; |
528 | 540 | ||
541 | if (!try_get_task_stack(p)) | ||
542 | return 0; | ||
543 | |||
529 | start = (unsigned long)task_stack_page(p); | 544 | start = (unsigned long)task_stack_page(p); |
530 | if (!start) | 545 | if (!start) |
531 | return 0; | 546 | goto out; |
532 | 547 | ||
533 | /* | 548 | /* |
534 | * Layout of the stack page: | 549 | * Layout of the stack page: |
@@ -537,9 +552,7 @@ unsigned long get_wchan(struct task_struct *p) | |||
537 | * PADDING | 552 | * PADDING |
538 | * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING | 553 | * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING |
539 | * stack | 554 | * stack |
540 | * ----------- bottom = start + sizeof(thread_info) | 555 | * ----------- bottom = start |
541 | * thread_info | ||
542 | * ----------- start | ||
543 | * | 556 | * |
544 | * The tasks stack pointer points at the location where the | 557 | * The tasks stack pointer points at the location where the |
545 | * framepointer is stored. The data on the stack is: | 558 | * framepointer is stored. The data on the stack is: |
@@ -550,20 +563,25 @@ unsigned long get_wchan(struct task_struct *p) | |||
550 | */ | 563 | */ |
551 | top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; | 564 | top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; |
552 | top -= 2 * sizeof(unsigned long); | 565 | top -= 2 * sizeof(unsigned long); |
553 | bottom = start + sizeof(struct thread_info); | 566 | bottom = start; |
554 | 567 | ||
555 | sp = READ_ONCE(p->thread.sp); | 568 | sp = READ_ONCE(p->thread.sp); |
556 | if (sp < bottom || sp > top) | 569 | if (sp < bottom || sp > top) |
557 | return 0; | 570 | goto out; |
558 | 571 | ||
559 | fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); | 572 | fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp); |
560 | do { | 573 | do { |
561 | if (fp < bottom || fp > top) | 574 | if (fp < bottom || fp > top) |
562 | return 0; | 575 | goto out; |
563 | ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); | 576 | ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); |
564 | if (!in_sched_functions(ip)) | 577 | if (!in_sched_functions(ip)) { |
565 | return ip; | 578 | ret = ip; |
579 | goto out; | ||
580 | } | ||
566 | fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); | 581 | fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); |
567 | } while (count++ < 16 && p->state != TASK_RUNNING); | 582 | } while (count++ < 16 && p->state != TASK_RUNNING); |
568 | return 0; | 583 | |
584 | out: | ||
585 | put_task_stack(p); | ||
586 | return ret; | ||
569 | } | 587 | } |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d86be29c38c7..bd7be8efdc4c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -55,17 +55,6 @@ | |||
55 | #include <asm/switch_to.h> | 55 | #include <asm/switch_to.h> |
56 | #include <asm/vm86.h> | 56 | #include <asm/vm86.h> |
57 | 57 | ||
58 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | ||
59 | asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); | ||
60 | |||
61 | /* | ||
62 | * Return saved PC of a blocked thread. | ||
63 | */ | ||
64 | unsigned long thread_saved_pc(struct task_struct *tsk) | ||
65 | { | ||
66 | return ((unsigned long *)tsk->thread.sp)[3]; | ||
67 | } | ||
68 | |||
69 | void __show_regs(struct pt_regs *regs, int all) | 58 | void __show_regs(struct pt_regs *regs, int all) |
70 | { | 59 | { |
71 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; | 60 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; |
@@ -101,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
101 | cr0 = read_cr0(); | 90 | cr0 = read_cr0(); |
102 | cr2 = read_cr2(); | 91 | cr2 = read_cr2(); |
103 | cr3 = read_cr3(); | 92 | cr3 = read_cr3(); |
104 | cr4 = __read_cr4_safe(); | 93 | cr4 = __read_cr4(); |
105 | printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", | 94 | printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", |
106 | cr0, cr2, cr3, cr4); | 95 | cr0, cr2, cr3, cr4); |
107 | 96 | ||
@@ -133,35 +122,31 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
133 | unsigned long arg, struct task_struct *p, unsigned long tls) | 122 | unsigned long arg, struct task_struct *p, unsigned long tls) |
134 | { | 123 | { |
135 | struct pt_regs *childregs = task_pt_regs(p); | 124 | struct pt_regs *childregs = task_pt_regs(p); |
125 | struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs); | ||
126 | struct inactive_task_frame *frame = &fork_frame->frame; | ||
136 | struct task_struct *tsk; | 127 | struct task_struct *tsk; |
137 | int err; | 128 | int err; |
138 | 129 | ||
139 | p->thread.sp = (unsigned long) childregs; | 130 | frame->bp = 0; |
131 | frame->ret_addr = (unsigned long) ret_from_fork; | ||
132 | p->thread.sp = (unsigned long) fork_frame; | ||
140 | p->thread.sp0 = (unsigned long) (childregs+1); | 133 | p->thread.sp0 = (unsigned long) (childregs+1); |
141 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | 134 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); |
142 | 135 | ||
143 | if (unlikely(p->flags & PF_KTHREAD)) { | 136 | if (unlikely(p->flags & PF_KTHREAD)) { |
144 | /* kernel thread */ | 137 | /* kernel thread */ |
145 | memset(childregs, 0, sizeof(struct pt_regs)); | 138 | memset(childregs, 0, sizeof(struct pt_regs)); |
146 | p->thread.ip = (unsigned long) ret_from_kernel_thread; | 139 | frame->bx = sp; /* function */ |
147 | task_user_gs(p) = __KERNEL_STACK_CANARY; | 140 | frame->di = arg; |
148 | childregs->ds = __USER_DS; | ||
149 | childregs->es = __USER_DS; | ||
150 | childregs->fs = __KERNEL_PERCPU; | ||
151 | childregs->bx = sp; /* function */ | ||
152 | childregs->bp = arg; | ||
153 | childregs->orig_ax = -1; | ||
154 | childregs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
155 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; | ||
156 | p->thread.io_bitmap_ptr = NULL; | 141 | p->thread.io_bitmap_ptr = NULL; |
157 | return 0; | 142 | return 0; |
158 | } | 143 | } |
144 | frame->bx = 0; | ||
159 | *childregs = *current_pt_regs(); | 145 | *childregs = *current_pt_regs(); |
160 | childregs->ax = 0; | 146 | childregs->ax = 0; |
161 | if (sp) | 147 | if (sp) |
162 | childregs->sp = sp; | 148 | childregs->sp = sp; |
163 | 149 | ||
164 | p->thread.ip = (unsigned long) ret_from_fork; | ||
165 | task_user_gs(p) = get_user_gs(current_pt_regs()); | 150 | task_user_gs(p) = get_user_gs(current_pt_regs()); |
166 | 151 | ||
167 | p->thread.io_bitmap_ptr = NULL; | 152 | p->thread.io_bitmap_ptr = NULL; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8f84bf..de9acaf2d371 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -50,8 +50,6 @@ | |||
50 | #include <asm/switch_to.h> | 50 | #include <asm/switch_to.h> |
51 | #include <asm/xen/hypervisor.h> | 51 | #include <asm/xen/hypervisor.h> |
52 | 52 | ||
53 | asmlinkage extern void ret_from_fork(void); | ||
54 | |||
55 | __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); | 53 | __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); |
56 | 54 | ||
57 | /* Prints also some state that isn't saved in the pt_regs */ | 55 | /* Prints also some state that isn't saved in the pt_regs */ |
@@ -141,12 +139,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
141 | { | 139 | { |
142 | int err; | 140 | int err; |
143 | struct pt_regs *childregs; | 141 | struct pt_regs *childregs; |
142 | struct fork_frame *fork_frame; | ||
143 | struct inactive_task_frame *frame; | ||
144 | struct task_struct *me = current; | 144 | struct task_struct *me = current; |
145 | 145 | ||
146 | p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; | 146 | p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; |
147 | childregs = task_pt_regs(p); | 147 | childregs = task_pt_regs(p); |
148 | p->thread.sp = (unsigned long) childregs; | 148 | fork_frame = container_of(childregs, struct fork_frame, regs); |
149 | set_tsk_thread_flag(p, TIF_FORK); | 149 | frame = &fork_frame->frame; |
150 | frame->bp = 0; | ||
151 | frame->ret_addr = (unsigned long) ret_from_fork; | ||
152 | p->thread.sp = (unsigned long) fork_frame; | ||
150 | p->thread.io_bitmap_ptr = NULL; | 153 | p->thread.io_bitmap_ptr = NULL; |
151 | 154 | ||
152 | savesegment(gs, p->thread.gsindex); | 155 | savesegment(gs, p->thread.gsindex); |
@@ -160,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
160 | if (unlikely(p->flags & PF_KTHREAD)) { | 163 | if (unlikely(p->flags & PF_KTHREAD)) { |
161 | /* kernel thread */ | 164 | /* kernel thread */ |
162 | memset(childregs, 0, sizeof(struct pt_regs)); | 165 | memset(childregs, 0, sizeof(struct pt_regs)); |
163 | childregs->sp = (unsigned long)childregs; | 166 | frame->bx = sp; /* function */ |
164 | childregs->ss = __KERNEL_DS; | 167 | frame->r12 = arg; |
165 | childregs->bx = sp; /* function */ | ||
166 | childregs->bp = arg; | ||
167 | childregs->orig_ax = -1; | ||
168 | childregs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
169 | childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; | ||
170 | return 0; | 168 | return 0; |
171 | } | 169 | } |
170 | frame->bx = 0; | ||
172 | *childregs = *current_pt_regs(); | 171 | *childregs = *current_pt_regs(); |
173 | 172 | ||
174 | childregs->ax = 0; | 173 | childregs->ax = 0; |
@@ -511,7 +510,7 @@ void set_personality_ia32(bool x32) | |||
511 | current->personality &= ~READ_IMPLIES_EXEC; | 510 | current->personality &= ~READ_IMPLIES_EXEC; |
512 | /* in_compat_syscall() uses the presence of the x32 | 511 | /* in_compat_syscall() uses the presence of the x32 |
513 | syscall bit flag to determine compat status */ | 512 | syscall bit flag to determine compat status */ |
514 | current_thread_info()->status &= ~TS_COMPAT; | 513 | current->thread.status &= ~TS_COMPAT; |
515 | } else { | 514 | } else { |
516 | set_thread_flag(TIF_IA32); | 515 | set_thread_flag(TIF_IA32); |
517 | clear_thread_flag(TIF_X32); | 516 | clear_thread_flag(TIF_X32); |
@@ -519,7 +518,7 @@ void set_personality_ia32(bool x32) | |||
519 | current->mm->context.ia32_compat = TIF_IA32; | 518 | current->mm->context.ia32_compat = TIF_IA32; |
520 | current->personality |= force_personality32; | 519 | current->personality |= force_personality32; |
521 | /* Prepare the first "return" to user space */ | 520 | /* Prepare the first "return" to user space */ |
522 | current_thread_info()->status |= TS_COMPAT; | 521 | current->thread.status |= TS_COMPAT; |
523 | } | 522 | } |
524 | } | 523 | } |
525 | EXPORT_SYMBOL_GPL(set_personality_ia32); | 524 | EXPORT_SYMBOL_GPL(set_personality_ia32); |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f79576a541ff..ce94c38cf4d6 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) | |||
173 | return sp; | 173 | return sp; |
174 | 174 | ||
175 | prev_esp = (u32 *)(context); | 175 | prev_esp = (u32 *)(context); |
176 | if (prev_esp) | 176 | if (*prev_esp) |
177 | return (unsigned long)prev_esp; | 177 | return (unsigned long)*prev_esp; |
178 | 178 | ||
179 | return (unsigned long)regs; | 179 | return (unsigned long)regs; |
180 | } | 180 | } |
@@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) | |||
934 | */ | 934 | */ |
935 | regs->orig_ax = value; | 935 | regs->orig_ax = value; |
936 | if (syscall_get_nr(child, regs) >= 0) | 936 | if (syscall_get_nr(child, regs) >= 0) |
937 | task_thread_info(child)->status |= TS_I386_REGS_POKED; | 937 | child->thread.status |= TS_I386_REGS_POKED; |
938 | break; | 938 | break; |
939 | 939 | ||
940 | case offsetof(struct user32, regs.eflags): | 940 | case offsetof(struct user32, regs.eflags): |
@@ -1250,7 +1250,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1250 | 1250 | ||
1251 | #ifdef CONFIG_X86_64 | 1251 | #ifdef CONFIG_X86_64 |
1252 | 1252 | ||
1253 | static struct user_regset x86_64_regsets[] __read_mostly = { | 1253 | static struct user_regset x86_64_regsets[] __ro_after_init = { |
1254 | [REGSET_GENERAL] = { | 1254 | [REGSET_GENERAL] = { |
1255 | .core_note_type = NT_PRSTATUS, | 1255 | .core_note_type = NT_PRSTATUS, |
1256 | .n = sizeof(struct user_regs_struct) / sizeof(long), | 1256 | .n = sizeof(struct user_regs_struct) / sizeof(long), |
@@ -1291,7 +1291,7 @@ static const struct user_regset_view user_x86_64_view = { | |||
1291 | #endif /* CONFIG_X86_64 */ | 1291 | #endif /* CONFIG_X86_64 */ |
1292 | 1292 | ||
1293 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 1293 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
1294 | static struct user_regset x86_32_regsets[] __read_mostly = { | 1294 | static struct user_regset x86_32_regsets[] __ro_after_init = { |
1295 | [REGSET_GENERAL] = { | 1295 | [REGSET_GENERAL] = { |
1296 | .core_note_type = NT_PRSTATUS, | 1296 | .core_note_type = NT_PRSTATUS, |
1297 | .n = sizeof(struct user_regs_struct32) / sizeof(u32), | 1297 | .n = sizeof(struct user_regs_struct32) / sizeof(u32), |
@@ -1344,7 +1344,7 @@ static const struct user_regset_view user_x86_32_view = { | |||
1344 | */ | 1344 | */ |
1345 | u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | 1345 | u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; |
1346 | 1346 | ||
1347 | void update_regset_xstate_info(unsigned int size, u64 xstate_mask) | 1347 | void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask) |
1348 | { | 1348 | { |
1349 | #ifdef CONFIG_X86_64 | 1349 | #ifdef CONFIG_X86_64 |
1350 | x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); | 1350 | x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 63bf27d972b7..e244c19a2451 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -705,7 +705,7 @@ static void native_machine_power_off(void) | |||
705 | tboot_shutdown(TB_SHUTDOWN_HALT); | 705 | tboot_shutdown(TB_SHUTDOWN_HALT); |
706 | } | 706 | } |
707 | 707 | ||
708 | struct machine_ops machine_ops = { | 708 | struct machine_ops machine_ops __ro_after_init = { |
709 | .power_off = native_machine_power_off, | 709 | .power_off = native_machine_power_off, |
710 | .shutdown = native_machine_shutdown, | 710 | .shutdown = native_machine_shutdown, |
711 | .emergency_restart = native_machine_emergency_restart, | 711 | .emergency_restart = native_machine_emergency_restart, |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2c4bc85dfe90..eeb094ea794a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -210,9 +210,9 @@ EXPORT_SYMBOL(boot_cpu_data); | |||
210 | 210 | ||
211 | 211 | ||
212 | #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) | 212 | #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) |
213 | __visible unsigned long mmu_cr4_features; | 213 | __visible unsigned long mmu_cr4_features __ro_after_init; |
214 | #else | 214 | #else |
215 | __visible unsigned long mmu_cr4_features = X86_CR4_PAE; | 215 | __visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE; |
216 | #endif | 216 | #endif |
217 | 217 | ||
218 | /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ | 218 | /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ |
@@ -1137,7 +1137,7 @@ void __init setup_arch(char **cmdline_p) | |||
1137 | * auditing all the early-boot CR4 manipulation would be needed to | 1137 | * auditing all the early-boot CR4 manipulation would be needed to |
1138 | * rule it out. | 1138 | * rule it out. |
1139 | */ | 1139 | */ |
1140 | mmu_cr4_features = __read_cr4_safe(); | 1140 | mmu_cr4_features = __read_cr4(); |
1141 | 1141 | ||
1142 | memblock_set_current_limit(get_max_mapped()); | 1142 | memblock_set_current_limit(get_max_mapped()); |
1143 | 1143 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 7a40e068302d..2bbd27f89802 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -33,7 +33,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); | |||
33 | DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; | 33 | DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; |
34 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | 34 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); |
35 | 35 | ||
36 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { | 36 | unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = { |
37 | [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, | 37 | [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, |
38 | }; | 38 | }; |
39 | EXPORT_SYMBOL(__per_cpu_offset); | 39 | EXPORT_SYMBOL(__per_cpu_offset); |
@@ -246,7 +246,7 @@ void __init setup_per_cpu_areas(void) | |||
246 | #ifdef CONFIG_X86_64 | 246 | #ifdef CONFIG_X86_64 |
247 | per_cpu(irq_stack_ptr, cpu) = | 247 | per_cpu(irq_stack_ptr, cpu) = |
248 | per_cpu(irq_stack_union.irq_stack, cpu) + | 248 | per_cpu(irq_stack_union.irq_stack, cpu) + |
249 | IRQ_STACK_SIZE - 64; | 249 | IRQ_STACK_SIZE; |
250 | #endif | 250 | #endif |
251 | #ifdef CONFIG_NUMA | 251 | #ifdef CONFIG_NUMA |
252 | per_cpu(x86_cpu_to_node_map, cpu) = | 252 | per_cpu(x86_cpu_to_node_map, cpu) = |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 04cb3212db2d..da20ecb5397a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -783,7 +783,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) | |||
783 | * than the tracee. | 783 | * than the tracee. |
784 | */ | 784 | */ |
785 | #ifdef CONFIG_IA32_EMULATION | 785 | #ifdef CONFIG_IA32_EMULATION |
786 | if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) | 786 | if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
787 | return __NR_ia32_restart_syscall; | 787 | return __NR_ia32_restart_syscall; |
788 | #endif | 788 | #endif |
789 | #ifdef CONFIG_X86_X32_ABI | 789 | #ifdef CONFIG_X86_X32_ABI |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 54e2f1a968a4..7249dcf2cbcb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -943,7 +943,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
943 | per_cpu(cpu_current_top_of_stack, cpu) = | 943 | per_cpu(cpu_current_top_of_stack, cpu) = |
944 | (unsigned long)task_stack_page(idle) + THREAD_SIZE; | 944 | (unsigned long)task_stack_page(idle) + THREAD_SIZE; |
945 | #else | 945 | #else |
946 | clear_tsk_thread_flag(idle, TIF_FORK); | ||
947 | initial_gs = per_cpu_offset(cpu); | 946 | initial_gs = per_cpu_offset(cpu); |
948 | #endif | 947 | #endif |
949 | } | 948 | } |
@@ -970,7 +969,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
970 | 969 | ||
971 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 970 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
972 | initial_code = (unsigned long)start_secondary; | 971 | initial_code = (unsigned long)start_secondary; |
973 | stack_start = idle->thread.sp; | 972 | initial_stack = idle->thread.sp; |
974 | 973 | ||
975 | /* | 974 | /* |
976 | * Enable the espfix hack for this CPU | 975 | * Enable the espfix hack for this CPU |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 4738f5e0f2ab..0653788026e2 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -8,80 +8,69 @@ | |||
8 | #include <linux/export.h> | 8 | #include <linux/export.h> |
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <asm/stacktrace.h> | 10 | #include <asm/stacktrace.h> |
11 | #include <asm/unwind.h> | ||
11 | 12 | ||
12 | static int save_stack_stack(void *data, char *name) | 13 | static int save_stack_address(struct stack_trace *trace, unsigned long addr, |
14 | bool nosched) | ||
13 | { | 15 | { |
14 | return 0; | ||
15 | } | ||
16 | |||
17 | static int | ||
18 | __save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) | ||
19 | { | ||
20 | struct stack_trace *trace = data; | ||
21 | #ifdef CONFIG_FRAME_POINTER | ||
22 | if (!reliable) | ||
23 | return 0; | ||
24 | #endif | ||
25 | if (nosched && in_sched_functions(addr)) | 16 | if (nosched && in_sched_functions(addr)) |
26 | return 0; | 17 | return 0; |
18 | |||
27 | if (trace->skip > 0) { | 19 | if (trace->skip > 0) { |
28 | trace->skip--; | 20 | trace->skip--; |
29 | return 0; | 21 | return 0; |
30 | } | 22 | } |
31 | if (trace->nr_entries < trace->max_entries) { | ||
32 | trace->entries[trace->nr_entries++] = addr; | ||
33 | return 0; | ||
34 | } else { | ||
35 | return -1; /* no more room, stop walking the stack */ | ||
36 | } | ||
37 | } | ||
38 | 23 | ||
39 | static int save_stack_address(void *data, unsigned long addr, int reliable) | 24 | if (trace->nr_entries >= trace->max_entries) |
40 | { | 25 | return -1; |
41 | return __save_stack_address(data, addr, reliable, false); | 26 | |
27 | trace->entries[trace->nr_entries++] = addr; | ||
28 | return 0; | ||
42 | } | 29 | } |
43 | 30 | ||
44 | static int | 31 | static void __save_stack_trace(struct stack_trace *trace, |
45 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) | 32 | struct task_struct *task, struct pt_regs *regs, |
33 | bool nosched) | ||
46 | { | 34 | { |
47 | return __save_stack_address(data, addr, reliable, true); | 35 | struct unwind_state state; |
48 | } | 36 | unsigned long addr; |
49 | 37 | ||
50 | static const struct stacktrace_ops save_stack_ops = { | 38 | if (regs) |
51 | .stack = save_stack_stack, | 39 | save_stack_address(trace, regs->ip, nosched); |
52 | .address = save_stack_address, | ||
53 | .walk_stack = print_context_stack, | ||
54 | }; | ||
55 | 40 | ||
56 | static const struct stacktrace_ops save_stack_ops_nosched = { | 41 | for (unwind_start(&state, task, regs, NULL); !unwind_done(&state); |
57 | .stack = save_stack_stack, | 42 | unwind_next_frame(&state)) { |
58 | .address = save_stack_address_nosched, | 43 | addr = unwind_get_return_address(&state); |
59 | .walk_stack = print_context_stack, | 44 | if (!addr || save_stack_address(trace, addr, nosched)) |
60 | }; | 45 | break; |
46 | } | ||
47 | |||
48 | if (trace->nr_entries < trace->max_entries) | ||
49 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
50 | } | ||
61 | 51 | ||
62 | /* | 52 | /* |
63 | * Save stack-backtrace addresses into a stack_trace buffer. | 53 | * Save stack-backtrace addresses into a stack_trace buffer. |
64 | */ | 54 | */ |
65 | void save_stack_trace(struct stack_trace *trace) | 55 | void save_stack_trace(struct stack_trace *trace) |
66 | { | 56 | { |
67 | dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); | 57 | __save_stack_trace(trace, current, NULL, false); |
68 | if (trace->nr_entries < trace->max_entries) | ||
69 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
70 | } | 58 | } |
71 | EXPORT_SYMBOL_GPL(save_stack_trace); | 59 | EXPORT_SYMBOL_GPL(save_stack_trace); |
72 | 60 | ||
73 | void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) | 61 | void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) |
74 | { | 62 | { |
75 | dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); | 63 | __save_stack_trace(trace, current, regs, false); |
76 | if (trace->nr_entries < trace->max_entries) | ||
77 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
78 | } | 64 | } |
79 | 65 | ||
80 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 66 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
81 | { | 67 | { |
82 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); | 68 | if (!try_get_task_stack(tsk)) |
83 | if (trace->nr_entries < trace->max_entries) | 69 | return; |
84 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 70 | |
71 | __save_stack_trace(trace, tsk, NULL, true); | ||
72 | |||
73 | put_task_stack(tsk); | ||
85 | } | 74 | } |
86 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | 75 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); |
87 | 76 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b70ca12dd389..bd4e3d4d3625 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) | |||
292 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) | 292 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) |
293 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) | 293 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) |
294 | 294 | ||
295 | #ifdef CONFIG_VMAP_STACK | ||
296 | __visible void __noreturn handle_stack_overflow(const char *message, | ||
297 | struct pt_regs *regs, | ||
298 | unsigned long fault_address) | ||
299 | { | ||
300 | printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n", | ||
301 | (void *)fault_address, current->stack, | ||
302 | (char *)current->stack + THREAD_SIZE - 1); | ||
303 | die(message, regs, 0); | ||
304 | |||
305 | /* Be absolutely certain we don't return. */ | ||
306 | panic(message); | ||
307 | } | ||
308 | #endif | ||
309 | |||
295 | #ifdef CONFIG_X86_64 | 310 | #ifdef CONFIG_X86_64 |
296 | /* Runs on IST stack */ | 311 | /* Runs on IST stack */ |
297 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | 312 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
298 | { | 313 | { |
299 | static const char str[] = "double fault"; | 314 | static const char str[] = "double fault"; |
300 | struct task_struct *tsk = current; | 315 | struct task_struct *tsk = current; |
316 | #ifdef CONFIG_VMAP_STACK | ||
317 | unsigned long cr2; | ||
318 | #endif | ||
301 | 319 | ||
302 | #ifdef CONFIG_X86_ESPFIX64 | 320 | #ifdef CONFIG_X86_ESPFIX64 |
303 | extern unsigned char native_irq_return_iret[]; | 321 | extern unsigned char native_irq_return_iret[]; |
@@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
332 | tsk->thread.error_code = error_code; | 350 | tsk->thread.error_code = error_code; |
333 | tsk->thread.trap_nr = X86_TRAP_DF; | 351 | tsk->thread.trap_nr = X86_TRAP_DF; |
334 | 352 | ||
353 | #ifdef CONFIG_VMAP_STACK | ||
354 | /* | ||
355 | * If we overflow the stack into a guard page, the CPU will fail | ||
356 | * to deliver #PF and will send #DF instead. Similarly, if we | ||
357 | * take any non-IST exception while too close to the bottom of | ||
358 | * the stack, the processor will get a page fault while | ||
359 | * delivering the exception and will generate a double fault. | ||
360 | * | ||
361 | * According to the SDM (footnote in 6.15 under "Interrupt 14 - | ||
362 | * Page-Fault Exception (#PF): | ||
363 | * | ||
364 | * Processors update CR2 whenever a page fault is detected. If a | ||
365 | * second page fault occurs while an earlier page fault is being | ||
366 | * deliv- ered, the faulting linear address of the second fault will | ||
367 | * overwrite the contents of CR2 (replacing the previous | ||
368 | * address). These updates to CR2 occur even if the page fault | ||
369 | * results in a double fault or occurs during the delivery of a | ||
370 | * double fault. | ||
371 | * | ||
372 | * The logic below has a small possibility of incorrectly diagnosing | ||
373 | * some errors as stack overflows. For example, if the IDT or GDT | ||
374 | * gets corrupted such that #GP delivery fails due to a bad descriptor | ||
375 | * causing #GP and we hit this condition while CR2 coincidentally | ||
376 | * points to the stack guard page, we'll think we overflowed the | ||
377 | * stack. Given that we're going to panic one way or another | ||
378 | * if this happens, this isn't necessarily worth fixing. | ||
379 | * | ||
380 | * If necessary, we could improve the test by only diagnosing | ||
381 | * a stack overflow if the saved RSP points within 47 bytes of | ||
382 | * the bottom of the stack: if RSP == tsk_stack + 48 and we | ||
383 | * take an exception, the stack is already aligned and there | ||
384 | * will be enough room SS, RSP, RFLAGS, CS, RIP, and a | ||
385 | * possible error code, so a stack overflow would *not* double | ||
386 | * fault. With any less space left, exception delivery could | ||
387 | * fail, and, as a practical matter, we've overflowed the | ||
388 | * stack even if the actual trigger for the double fault was | ||
389 | * something else. | ||
390 | */ | ||
391 | cr2 = read_cr2(); | ||
392 | if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE) | ||
393 | handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2); | ||
394 | #endif | ||
395 | |||
335 | #ifdef CONFIG_DOUBLEFAULT | 396 | #ifdef CONFIG_DOUBLEFAULT |
336 | df_debug(regs, error_code); | 397 | df_debug(regs, error_code); |
337 | #endif | 398 | #endif |
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c new file mode 100644 index 000000000000..a2456d4d286a --- /dev/null +++ b/arch/x86/kernel/unwind_frame.c | |||
@@ -0,0 +1,93 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <asm/ptrace.h> | ||
3 | #include <asm/bitops.h> | ||
4 | #include <asm/stacktrace.h> | ||
5 | #include <asm/unwind.h> | ||
6 | |||
7 | #define FRAME_HEADER_SIZE (sizeof(long) * 2) | ||
8 | |||
9 | unsigned long unwind_get_return_address(struct unwind_state *state) | ||
10 | { | ||
11 | unsigned long addr; | ||
12 | unsigned long *addr_p = unwind_get_return_address_ptr(state); | ||
13 | |||
14 | if (unwind_done(state)) | ||
15 | return 0; | ||
16 | |||
17 | addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p, | ||
18 | addr_p); | ||
19 | |||
20 | return __kernel_text_address(addr) ? addr : 0; | ||
21 | } | ||
22 | EXPORT_SYMBOL_GPL(unwind_get_return_address); | ||
23 | |||
24 | static bool update_stack_state(struct unwind_state *state, void *addr, | ||
25 | size_t len) | ||
26 | { | ||
27 | struct stack_info *info = &state->stack_info; | ||
28 | |||
29 | /* | ||
30 | * If addr isn't on the current stack, switch to the next one. | ||
31 | * | ||
32 | * We may have to traverse multiple stacks to deal with the possibility | ||
33 | * that 'info->next_sp' could point to an empty stack and 'addr' could | ||
34 | * be on a subsequent stack. | ||
35 | */ | ||
36 | while (!on_stack(info, addr, len)) | ||
37 | if (get_stack_info(info->next_sp, state->task, info, | ||
38 | &state->stack_mask)) | ||
39 | return false; | ||
40 | |||
41 | return true; | ||
42 | } | ||
43 | |||
44 | bool unwind_next_frame(struct unwind_state *state) | ||
45 | { | ||
46 | unsigned long *next_bp; | ||
47 | |||
48 | if (unwind_done(state)) | ||
49 | return false; | ||
50 | |||
51 | next_bp = (unsigned long *)*state->bp; | ||
52 | |||
53 | /* make sure the next frame's data is accessible */ | ||
54 | if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE)) | ||
55 | return false; | ||
56 | |||
57 | /* move to the next frame */ | ||
58 | state->bp = next_bp; | ||
59 | return true; | ||
60 | } | ||
61 | EXPORT_SYMBOL_GPL(unwind_next_frame); | ||
62 | |||
63 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | ||
64 | struct pt_regs *regs, unsigned long *first_frame) | ||
65 | { | ||
66 | memset(state, 0, sizeof(*state)); | ||
67 | state->task = task; | ||
68 | |||
69 | /* don't even attempt to start from user mode regs */ | ||
70 | if (regs && user_mode(regs)) { | ||
71 | state->stack_info.type = STACK_TYPE_UNKNOWN; | ||
72 | return; | ||
73 | } | ||
74 | |||
75 | /* set up the starting stack frame */ | ||
76 | state->bp = get_frame_pointer(task, regs); | ||
77 | |||
78 | /* initialize stack info and make sure the frame data is accessible */ | ||
79 | get_stack_info(state->bp, state->task, &state->stack_info, | ||
80 | &state->stack_mask); | ||
81 | update_stack_state(state, state->bp, FRAME_HEADER_SIZE); | ||
82 | |||
83 | /* | ||
84 | * The caller can provide the address of the first frame directly | ||
85 | * (first_frame) or indirectly (regs->sp) to indicate which stack frame | ||
86 | * to start unwinding at. Skip ahead until we reach it. | ||
87 | */ | ||
88 | while (!unwind_done(state) && | ||
89 | (!on_stack(&state->stack_info, first_frame, sizeof(long)) || | ||
90 | state->bp < first_frame)) | ||
91 | unwind_next_frame(state); | ||
92 | } | ||
93 | EXPORT_SYMBOL_GPL(__unwind_start); | ||
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c new file mode 100644 index 000000000000..b5a834c93065 --- /dev/null +++ b/arch/x86/kernel/unwind_guess.c | |||
@@ -0,0 +1,43 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/ftrace.h> | ||
3 | #include <asm/ptrace.h> | ||
4 | #include <asm/bitops.h> | ||
5 | #include <asm/stacktrace.h> | ||
6 | #include <asm/unwind.h> | ||
7 | |||
8 | bool unwind_next_frame(struct unwind_state *state) | ||
9 | { | ||
10 | struct stack_info *info = &state->stack_info; | ||
11 | |||
12 | if (unwind_done(state)) | ||
13 | return false; | ||
14 | |||
15 | do { | ||
16 | for (state->sp++; state->sp < info->end; state->sp++) | ||
17 | if (__kernel_text_address(*state->sp)) | ||
18 | return true; | ||
19 | |||
20 | state->sp = info->next_sp; | ||
21 | |||
22 | } while (!get_stack_info(state->sp, state->task, info, | ||
23 | &state->stack_mask)); | ||
24 | |||
25 | return false; | ||
26 | } | ||
27 | EXPORT_SYMBOL_GPL(unwind_next_frame); | ||
28 | |||
29 | void __unwind_start(struct unwind_state *state, struct task_struct *task, | ||
30 | struct pt_regs *regs, unsigned long *first_frame) | ||
31 | { | ||
32 | memset(state, 0, sizeof(*state)); | ||
33 | |||
34 | state->task = task; | ||
35 | state->sp = first_frame; | ||
36 | |||
37 | get_stack_info(first_frame, state->task, &state->stack_info, | ||
38 | &state->stack_mask); | ||
39 | |||
40 | if (!__kernel_text_address(*first_frame)) | ||
41 | unwind_next_frame(state); | ||
42 | } | ||
43 | EXPORT_SYMBOL_GPL(__unwind_start); | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 76c5e52436c4..0bd9f1287f39 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -91,7 +91,7 @@ struct x86_cpuinit_ops x86_cpuinit = { | |||
91 | static void default_nmi_init(void) { }; | 91 | static void default_nmi_init(void) { }; |
92 | static int default_i8042_detect(void) { return 1; }; | 92 | static int default_i8042_detect(void) { return 1; }; |
93 | 93 | ||
94 | struct x86_platform_ops x86_platform = { | 94 | struct x86_platform_ops x86_platform __ro_after_init = { |
95 | .calibrate_cpu = native_calibrate_cpu, | 95 | .calibrate_cpu = native_calibrate_cpu, |
96 | .calibrate_tsc = native_calibrate_tsc, | 96 | .calibrate_tsc = native_calibrate_tsc, |
97 | .get_wallclock = mach_get_cmos_time, | 97 | .get_wallclock = mach_get_cmos_time, |
@@ -108,7 +108,7 @@ struct x86_platform_ops x86_platform = { | |||
108 | EXPORT_SYMBOL_GPL(x86_platform); | 108 | EXPORT_SYMBOL_GPL(x86_platform); |
109 | 109 | ||
110 | #if defined(CONFIG_PCI_MSI) | 110 | #if defined(CONFIG_PCI_MSI) |
111 | struct x86_msi_ops x86_msi = { | 111 | struct x86_msi_ops x86_msi __ro_after_init = { |
112 | .setup_msi_irqs = native_setup_msi_irqs, | 112 | .setup_msi_irqs = native_setup_msi_irqs, |
113 | .teardown_msi_irq = native_teardown_msi_irq, | 113 | .teardown_msi_irq = native_teardown_msi_irq, |
114 | .teardown_msi_irqs = default_teardown_msi_irqs, | 114 | .teardown_msi_irqs = default_teardown_msi_irqs, |
@@ -137,7 +137,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev) | |||
137 | } | 137 | } |
138 | #endif | 138 | #endif |
139 | 139 | ||
140 | struct x86_io_apic_ops x86_io_apic_ops = { | 140 | struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = { |
141 | .read = native_io_apic_read, | 141 | .read = native_io_apic_read, |
142 | .disable = native_disable_io_apic, | 142 | .disable = native_disable_io_apic, |
143 | }; | 143 | }; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index af523d84d102..1e6b84b96ea6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -4961,7 +4961,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) | |||
4961 | avic_handle_ldr_update(vcpu); | 4961 | avic_handle_ldr_update(vcpu); |
4962 | } | 4962 | } |
4963 | 4963 | ||
4964 | static struct kvm_x86_ops svm_x86_ops = { | 4964 | static struct kvm_x86_ops svm_x86_ops __ro_after_init = { |
4965 | .cpu_has_kvm_support = has_svm, | 4965 | .cpu_has_kvm_support = has_svm, |
4966 | .disabled_by_bios = is_disabled, | 4966 | .disabled_by_bios = is_disabled, |
4967 | .hardware_setup = svm_hardware_setup, | 4967 | .hardware_setup = svm_hardware_setup, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5cede40e2552..121fdf6e9ed0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -11177,7 +11177,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) | |||
11177 | ~FEATURE_CONTROL_LMCE; | 11177 | ~FEATURE_CONTROL_LMCE; |
11178 | } | 11178 | } |
11179 | 11179 | ||
11180 | static struct kvm_x86_ops vmx_x86_ops = { | 11180 | static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { |
11181 | .cpu_has_kvm_support = cpu_has_kvm_support, | 11181 | .cpu_has_kvm_support = cpu_has_kvm_support, |
11182 | .disabled_by_bios = vmx_disabled_by_bios, | 11182 | .disabled_by_bios = vmx_disabled_by_bios, |
11183 | .hardware_setup = hardware_setup, | 11183 | .hardware_setup = hardware_setup, |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index dc8023060456..0b92fce3e6c0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -753,6 +753,38 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
753 | return; | 753 | return; |
754 | } | 754 | } |
755 | 755 | ||
756 | #ifdef CONFIG_VMAP_STACK | ||
757 | /* | ||
758 | * Stack overflow? During boot, we can fault near the initial | ||
759 | * stack in the direct map, but that's not an overflow -- check | ||
760 | * that we're in vmalloc space to avoid this. | ||
761 | */ | ||
762 | if (is_vmalloc_addr((void *)address) && | ||
763 | (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || | ||
764 | address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { | ||
765 | register void *__sp asm("rsp"); | ||
766 | unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); | ||
767 | /* | ||
768 | * We're likely to be running with very little stack space | ||
769 | * left. It's plausible that we'd hit this condition but | ||
770 | * double-fault even before we get this far, in which case | ||
771 | * we're fine: the double-fault handler will deal with it. | ||
772 | * | ||
773 | * We don't want to make it all the way into the oops code | ||
774 | * and then double-fault, though, because we're likely to | ||
775 | * break the console driver and lose most of the stack dump. | ||
776 | */ | ||
777 | asm volatile ("movq %[stack], %%rsp\n\t" | ||
778 | "call handle_stack_overflow\n\t" | ||
779 | "1: jmp 1b" | ||
780 | : "+r" (__sp) | ||
781 | : "D" ("kernel stack overflow (page fault)"), | ||
782 | "S" (regs), "d" (address), | ||
783 | [stack] "rm" (stack)); | ||
784 | unreachable(); | ||
785 | } | ||
786 | #endif | ||
787 | |||
756 | /* | 788 | /* |
757 | * 32-bit: | 789 | * 32-bit: |
758 | * | 790 | * |
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index bda8d5eef04d..ddd2661c4502 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c | |||
@@ -40,17 +40,26 @@ | |||
40 | * You need to add an if/def entry if you introduce a new memory region | 40 | * You need to add an if/def entry if you introduce a new memory region |
41 | * compatible with KASLR. Your entry must be in logical order with memory | 41 | * compatible with KASLR. Your entry must be in logical order with memory |
42 | * layout. For example, ESPFIX is before EFI because its virtual address is | 42 | * layout. For example, ESPFIX is before EFI because its virtual address is |
43 | * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to | 43 | * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to |
44 | * ensure that this order is correct and won't be changed. | 44 | * ensure that this order is correct and won't be changed. |
45 | */ | 45 | */ |
46 | static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; | 46 | static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; |
47 | static const unsigned long vaddr_end = VMEMMAP_START; | 47 | |
48 | #if defined(CONFIG_X86_ESPFIX64) | ||
49 | static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; | ||
50 | #elif defined(CONFIG_EFI) | ||
51 | static const unsigned long vaddr_end = EFI_VA_START; | ||
52 | #else | ||
53 | static const unsigned long vaddr_end = __START_KERNEL_map; | ||
54 | #endif | ||
48 | 55 | ||
49 | /* Default values */ | 56 | /* Default values */ |
50 | unsigned long page_offset_base = __PAGE_OFFSET_BASE; | 57 | unsigned long page_offset_base = __PAGE_OFFSET_BASE; |
51 | EXPORT_SYMBOL(page_offset_base); | 58 | EXPORT_SYMBOL(page_offset_base); |
52 | unsigned long vmalloc_base = __VMALLOC_BASE; | 59 | unsigned long vmalloc_base = __VMALLOC_BASE; |
53 | EXPORT_SYMBOL(vmalloc_base); | 60 | EXPORT_SYMBOL(vmalloc_base); |
61 | unsigned long vmemmap_base = __VMEMMAP_BASE; | ||
62 | EXPORT_SYMBOL(vmemmap_base); | ||
54 | 63 | ||
55 | /* | 64 | /* |
56 | * Memory regions randomized by KASLR (except modules that use a separate logic | 65 | * Memory regions randomized by KASLR (except modules that use a separate logic |
@@ -63,6 +72,7 @@ static __initdata struct kaslr_memory_region { | |||
63 | } kaslr_regions[] = { | 72 | } kaslr_regions[] = { |
64 | { &page_offset_base, 64/* Maximum */ }, | 73 | { &page_offset_base, 64/* Maximum */ }, |
65 | { &vmalloc_base, VMALLOC_SIZE_TB }, | 74 | { &vmalloc_base, VMALLOC_SIZE_TB }, |
75 | { &vmemmap_base, 1 }, | ||
66 | }; | 76 | }; |
67 | 77 | ||
68 | /* Get size in bytes used by the memory region */ | 78 | /* Get size in bytes used by the memory region */ |
@@ -89,6 +99,18 @@ void __init kernel_randomize_memory(void) | |||
89 | struct rnd_state rand_state; | 99 | struct rnd_state rand_state; |
90 | unsigned long remain_entropy; | 100 | unsigned long remain_entropy; |
91 | 101 | ||
102 | /* | ||
103 | * All these BUILD_BUG_ON checks ensures the memory layout is | ||
104 | * consistent with the vaddr_start/vaddr_end variables. | ||
105 | */ | ||
106 | BUILD_BUG_ON(vaddr_start >= vaddr_end); | ||
107 | BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) && | ||
108 | vaddr_end >= EFI_VA_START); | ||
109 | BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) || | ||
110 | config_enabled(CONFIG_EFI)) && | ||
111 | vaddr_end >= __START_KERNEL_map); | ||
112 | BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); | ||
113 | |||
92 | if (!kaslr_memory_enabled()) | 114 | if (!kaslr_memory_enabled()) |
93 | return; | 115 | return; |
94 | 116 | ||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 4dbe65622810..a7655f6caf7d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |||
77 | unsigned cpu = smp_processor_id(); | 77 | unsigned cpu = smp_processor_id(); |
78 | 78 | ||
79 | if (likely(prev != next)) { | 79 | if (likely(prev != next)) { |
80 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { | ||
81 | /* | ||
82 | * If our current stack is in vmalloc space and isn't | ||
83 | * mapped in the new pgd, we'll double-fault. Forcibly | ||
84 | * map it. | ||
85 | */ | ||
86 | unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); | ||
87 | |||
88 | pgd_t *pgd = next->pgd + stack_pgd_index; | ||
89 | |||
90 | if (unlikely(pgd_none(*pgd))) | ||
91 | set_pgd(pgd, init_mm.pgd[stack_pgd_index]); | ||
92 | } | ||
93 | |||
80 | #ifdef CONFIG_SMP | 94 | #ifdef CONFIG_SMP |
81 | this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); | 95 | this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); |
82 | this_cpu_write(cpu_tlbstate.active_mm, next); | 96 | this_cpu_write(cpu_tlbstate.active_mm, next); |
83 | #endif | 97 | #endif |
98 | |||
84 | cpumask_set_cpu(cpu, mm_cpumask(next)); | 99 | cpumask_set_cpu(cpu, mm_cpumask(next)); |
85 | 100 | ||
86 | /* | 101 | /* |
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index cb31a4440e58..a2488b6e27d6 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -16,27 +16,7 @@ | |||
16 | 16 | ||
17 | #include <asm/ptrace.h> | 17 | #include <asm/ptrace.h> |
18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
19 | 19 | #include <asm/unwind.h> | |
20 | static int backtrace_stack(void *data, char *name) | ||
21 | { | ||
22 | /* Yes, we want all stacks */ | ||
23 | return 0; | ||
24 | } | ||
25 | |||
26 | static int backtrace_address(void *data, unsigned long addr, int reliable) | ||
27 | { | ||
28 | unsigned int *depth = data; | ||
29 | |||
30 | if ((*depth)--) | ||
31 | oprofile_add_trace(addr); | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | static struct stacktrace_ops backtrace_ops = { | ||
36 | .stack = backtrace_stack, | ||
37 | .address = backtrace_address, | ||
38 | .walk_stack = print_context_stack, | ||
39 | }; | ||
40 | 20 | ||
41 | #ifdef CONFIG_COMPAT | 21 | #ifdef CONFIG_COMPAT |
42 | static struct stack_frame_ia32 * | 22 | static struct stack_frame_ia32 * |
@@ -113,10 +93,29 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) | |||
113 | struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); | 93 | struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); |
114 | 94 | ||
115 | if (!user_mode(regs)) { | 95 | if (!user_mode(regs)) { |
116 | unsigned long stack = kernel_stack_pointer(regs); | 96 | struct unwind_state state; |
117 | if (depth) | 97 | unsigned long addr; |
118 | dump_trace(NULL, regs, (unsigned long *)stack, 0, | 98 | |
119 | &backtrace_ops, &depth); | 99 | if (!depth) |
100 | return; | ||
101 | |||
102 | oprofile_add_trace(regs->ip); | ||
103 | |||
104 | if (!--depth) | ||
105 | return; | ||
106 | |||
107 | for (unwind_start(&state, current, regs, NULL); | ||
108 | !unwind_done(&state); unwind_next_frame(&state)) { | ||
109 | addr = unwind_get_return_address(&state); | ||
110 | if (!addr) | ||
111 | break; | ||
112 | |||
113 | oprofile_add_trace(addr); | ||
114 | |||
115 | if (!--depth) | ||
116 | break; | ||
117 | } | ||
118 | |||
120 | return; | 119 | return; |
121 | } | 120 | } |
122 | 121 | ||
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 9770e55e768f..1d97cea3b3a4 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -120,9 +120,12 @@ static unsigned long __init bios32_service(unsigned long service) | |||
120 | static struct { | 120 | static struct { |
121 | unsigned long address; | 121 | unsigned long address; |
122 | unsigned short segment; | 122 | unsigned short segment; |
123 | } pci_indirect = { 0, __KERNEL_CS }; | 123 | } pci_indirect __ro_after_init = { |
124 | .address = 0, | ||
125 | .segment = __KERNEL_CS, | ||
126 | }; | ||
124 | 127 | ||
125 | static int pci_bios_present; | 128 | static int pci_bios_present __ro_after_init; |
126 | 129 | ||
127 | static int __init check_pcibios(void) | 130 | static int __init check_pcibios(void) |
128 | { | 131 | { |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index b12c26e2e309..53cace2ec0e2 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -130,7 +130,7 @@ static void __save_processor_state(struct saved_context *ctxt) | |||
130 | ctxt->cr0 = read_cr0(); | 130 | ctxt->cr0 = read_cr0(); |
131 | ctxt->cr2 = read_cr2(); | 131 | ctxt->cr2 = read_cr2(); |
132 | ctxt->cr3 = read_cr3(); | 132 | ctxt->cr3 = read_cr3(); |
133 | ctxt->cr4 = __read_cr4_safe(); | 133 | ctxt->cr4 = __read_cr4(); |
134 | #ifdef CONFIG_X86_64 | 134 | #ifdef CONFIG_X86_64 |
135 | ctxt->cr8 = read_cr8(); | 135 | ctxt->cr8 = read_cr8(); |
136 | #endif | 136 | #endif |
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index a7ef7b131e25..5766ead6fdb9 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c | |||
@@ -194,7 +194,7 @@ int peek_user(struct task_struct *child, long addr, long data) | |||
194 | 194 | ||
195 | static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) | 195 | static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) |
196 | { | 196 | { |
197 | int err, n, cpu = ((struct thread_info *) child->stack)->cpu; | 197 | int err, n, cpu = task_cpu(child); |
198 | struct user_i387_struct fpregs; | 198 | struct user_i387_struct fpregs; |
199 | 199 | ||
200 | err = save_i387_registers(userspace_pid[cpu], | 200 | err = save_i387_registers(userspace_pid[cpu], |
@@ -211,7 +211,7 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c | |||
211 | 211 | ||
212 | static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) | 212 | static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) |
213 | { | 213 | { |
214 | int n, cpu = ((struct thread_info *) child->stack)->cpu; | 214 | int n, cpu = task_cpu(child); |
215 | struct user_i387_struct fpregs; | 215 | struct user_i387_struct fpregs; |
216 | 216 | ||
217 | n = copy_from_user(&fpregs, buf, sizeof(fpregs)); | 217 | n = copy_from_user(&fpregs, buf, sizeof(fpregs)); |
@@ -224,7 +224,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c | |||
224 | 224 | ||
225 | static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) | 225 | static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) |
226 | { | 226 | { |
227 | int err, n, cpu = ((struct thread_info *) child->stack)->cpu; | 227 | int err, n, cpu = task_cpu(child); |
228 | struct user_fxsr_struct fpregs; | 228 | struct user_fxsr_struct fpregs; |
229 | 229 | ||
230 | err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); | 230 | err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); |
@@ -240,7 +240,7 @@ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct * | |||
240 | 240 | ||
241 | static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) | 241 | static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) |
242 | { | 242 | { |
243 | int n, cpu = ((struct thread_info *) child->stack)->cpu; | 243 | int n, cpu = task_cpu(child); |
244 | struct user_fxsr_struct fpregs; | 244 | struct user_fxsr_struct fpregs; |
245 | 245 | ||
246 | n = copy_from_user(&fpregs, buf, sizeof(fpregs)); | 246 | n = copy_from_user(&fpregs, buf, sizeof(fpregs)); |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bc9aaba01a22..f1d2182e071f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1237,7 +1237,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { | |||
1237 | .write_cr0 = xen_write_cr0, | 1237 | .write_cr0 = xen_write_cr0, |
1238 | 1238 | ||
1239 | .read_cr4 = native_read_cr4, | 1239 | .read_cr4 = native_read_cr4, |
1240 | .read_cr4_safe = native_read_cr4_safe, | ||
1241 | .write_cr4 = xen_write_cr4, | 1240 | .write_cr4 = xen_write_cr4, |
1242 | 1241 | ||
1243 | #ifdef CONFIG_X86_64 | 1242 | #ifdef CONFIG_X86_64 |
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 96de97a46079..4025291ea0ae 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c | |||
@@ -940,15 +940,13 @@ static void build_inv_irt(struct iommu_cmd *cmd, u16 devid) | |||
940 | * Writes the command to the IOMMUs command buffer and informs the | 940 | * Writes the command to the IOMMUs command buffer and informs the |
941 | * hardware about the new command. | 941 | * hardware about the new command. |
942 | */ | 942 | */ |
943 | static int iommu_queue_command_sync(struct amd_iommu *iommu, | 943 | static int __iommu_queue_command_sync(struct amd_iommu *iommu, |
944 | struct iommu_cmd *cmd, | 944 | struct iommu_cmd *cmd, |
945 | bool sync) | 945 | bool sync) |
946 | { | 946 | { |
947 | u32 left, tail, head, next_tail; | 947 | u32 left, tail, head, next_tail; |
948 | unsigned long flags; | ||
949 | 948 | ||
950 | again: | 949 | again: |
951 | spin_lock_irqsave(&iommu->lock, flags); | ||
952 | 950 | ||
953 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | 951 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); |
954 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 952 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
@@ -957,15 +955,14 @@ again: | |||
957 | 955 | ||
958 | if (left <= 2) { | 956 | if (left <= 2) { |
959 | struct iommu_cmd sync_cmd; | 957 | struct iommu_cmd sync_cmd; |
960 | volatile u64 sem = 0; | ||
961 | int ret; | 958 | int ret; |
962 | 959 | ||
963 | build_completion_wait(&sync_cmd, (u64)&sem); | 960 | iommu->cmd_sem = 0; |
964 | copy_cmd_to_buffer(iommu, &sync_cmd, tail); | ||
965 | 961 | ||
966 | spin_unlock_irqrestore(&iommu->lock, flags); | 962 | build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem); |
963 | copy_cmd_to_buffer(iommu, &sync_cmd, tail); | ||
967 | 964 | ||
968 | if ((ret = wait_on_sem(&sem)) != 0) | 965 | if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0) |
969 | return ret; | 966 | return ret; |
970 | 967 | ||
971 | goto again; | 968 | goto again; |
@@ -976,9 +973,21 @@ again: | |||
976 | /* We need to sync now to make sure all commands are processed */ | 973 | /* We need to sync now to make sure all commands are processed */ |
977 | iommu->need_sync = sync; | 974 | iommu->need_sync = sync; |
978 | 975 | ||
976 | return 0; | ||
977 | } | ||
978 | |||
979 | static int iommu_queue_command_sync(struct amd_iommu *iommu, | ||
980 | struct iommu_cmd *cmd, | ||
981 | bool sync) | ||
982 | { | ||
983 | unsigned long flags; | ||
984 | int ret; | ||
985 | |||
986 | spin_lock_irqsave(&iommu->lock, flags); | ||
987 | ret = __iommu_queue_command_sync(iommu, cmd, sync); | ||
979 | spin_unlock_irqrestore(&iommu->lock, flags); | 988 | spin_unlock_irqrestore(&iommu->lock, flags); |
980 | 989 | ||
981 | return 0; | 990 | return ret; |
982 | } | 991 | } |
983 | 992 | ||
984 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | 993 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) |
@@ -993,19 +1002,29 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
993 | static int iommu_completion_wait(struct amd_iommu *iommu) | 1002 | static int iommu_completion_wait(struct amd_iommu *iommu) |
994 | { | 1003 | { |
995 | struct iommu_cmd cmd; | 1004 | struct iommu_cmd cmd; |
996 | volatile u64 sem = 0; | 1005 | unsigned long flags; |
997 | int ret; | 1006 | int ret; |
998 | 1007 | ||
999 | if (!iommu->need_sync) | 1008 | if (!iommu->need_sync) |
1000 | return 0; | 1009 | return 0; |
1001 | 1010 | ||
1002 | build_completion_wait(&cmd, (u64)&sem); | ||
1003 | 1011 | ||
1004 | ret = iommu_queue_command_sync(iommu, &cmd, false); | 1012 | build_completion_wait(&cmd, (u64)&iommu->cmd_sem); |
1013 | |||
1014 | spin_lock_irqsave(&iommu->lock, flags); | ||
1015 | |||
1016 | iommu->cmd_sem = 0; | ||
1017 | |||
1018 | ret = __iommu_queue_command_sync(iommu, &cmd, false); | ||
1005 | if (ret) | 1019 | if (ret) |
1006 | return ret; | 1020 | goto out_unlock; |
1021 | |||
1022 | ret = wait_on_sem(&iommu->cmd_sem); | ||
1007 | 1023 | ||
1008 | return wait_on_sem(&sem); | 1024 | out_unlock: |
1025 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
1026 | |||
1027 | return ret; | ||
1009 | } | 1028 | } |
1010 | 1029 | ||
1011 | static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) | 1030 | static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) |
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index caf5e3822715..9652848e3155 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h | |||
@@ -524,6 +524,8 @@ struct amd_iommu { | |||
524 | struct irq_domain *ir_domain; | 524 | struct irq_domain *ir_domain; |
525 | struct irq_domain *msi_domain; | 525 | struct irq_domain *msi_domain; |
526 | #endif | 526 | #endif |
527 | |||
528 | volatile u64 __aligned(8) cmd_sem; | ||
527 | }; | 529 | }; |
528 | 530 | ||
529 | #define ACPIHID_UID_LEN 256 | 531 | #define ACPIHID_UID_LEN 256 |
diff --git a/fs/proc/base.c b/fs/proc/base.c index ac0df4dde823..3b792ab3c0dc 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -483,7 +483,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, | |||
483 | save_stack_trace_tsk(task, &trace); | 483 | save_stack_trace_tsk(task, &trace); |
484 | 484 | ||
485 | for (i = 0; i < trace.nr_entries; i++) { | 485 | for (i = 0; i < trace.nr_entries; i++) { |
486 | seq_printf(m, "[<%pK>] %pS\n", | 486 | seq_printf(m, "[<%pK>] %pB\n", |
487 | (void *)entries[i], (void *)entries[i]); | 487 | (void *)entries[i], (void *)entries[i]); |
488 | } | 488 | } |
489 | unlock_trace(task); | 489 | unlock_trace(task); |
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7d565afe35d2..6f93ac46e7f0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h | |||
@@ -795,7 +795,12 @@ struct ftrace_ret_stack { | |||
795 | unsigned long func; | 795 | unsigned long func; |
796 | unsigned long long calltime; | 796 | unsigned long long calltime; |
797 | unsigned long long subtime; | 797 | unsigned long long subtime; |
798 | #ifdef HAVE_FUNCTION_GRAPH_FP_TEST | ||
798 | unsigned long fp; | 799 | unsigned long fp; |
800 | #endif | ||
801 | #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR | ||
802 | unsigned long *retp; | ||
803 | #endif | ||
799 | }; | 804 | }; |
800 | 805 | ||
801 | /* | 806 | /* |
@@ -807,7 +812,10 @@ extern void return_to_handler(void); | |||
807 | 812 | ||
808 | extern int | 813 | extern int |
809 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, | 814 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, |
810 | unsigned long frame_pointer); | 815 | unsigned long frame_pointer, unsigned long *retp); |
816 | |||
817 | unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, | ||
818 | unsigned long ret, unsigned long *retp); | ||
811 | 819 | ||
812 | /* | 820 | /* |
813 | * Sometimes we don't want to trace a function with the function | 821 | * Sometimes we don't want to trace a function with the function |
@@ -870,6 +878,13 @@ static inline int task_curr_ret_stack(struct task_struct *tsk) | |||
870 | return -1; | 878 | return -1; |
871 | } | 879 | } |
872 | 880 | ||
881 | static inline unsigned long | ||
882 | ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, | ||
883 | unsigned long *retp) | ||
884 | { | ||
885 | return ret; | ||
886 | } | ||
887 | |||
873 | static inline void pause_graph_tracing(void) { } | 888 | static inline void pause_graph_tracing(void) { } |
874 | static inline void unpause_graph_tracing(void) { } | 889 | static inline void unpause_graph_tracing(void) { } |
875 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 890 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f8834f820ec2..325f649d77ff 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <net/net_namespace.h> | 15 | #include <net/net_namespace.h> |
16 | #include <linux/sched/rt.h> | 16 | #include <linux/sched/rt.h> |
17 | 17 | ||
18 | #include <asm/thread_info.h> | ||
19 | |||
18 | #ifdef CONFIG_SMP | 20 | #ifdef CONFIG_SMP |
19 | # define INIT_PUSHABLE_TASKS(tsk) \ | 21 | # define INIT_PUSHABLE_TASKS(tsk) \ |
20 | .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), | 22 | .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), |
@@ -183,12 +185,21 @@ extern struct task_group root_task_group; | |||
183 | # define INIT_KASAN(tsk) | 185 | # define INIT_KASAN(tsk) |
184 | #endif | 186 | #endif |
185 | 187 | ||
188 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
189 | # define INIT_TASK_TI(tsk) \ | ||
190 | .thread_info = INIT_THREAD_INFO(tsk), \ | ||
191 | .stack_refcount = ATOMIC_INIT(1), | ||
192 | #else | ||
193 | # define INIT_TASK_TI(tsk) | ||
194 | #endif | ||
195 | |||
186 | /* | 196 | /* |
187 | * INIT_TASK is used to set up the first task table, touch at | 197 | * INIT_TASK is used to set up the first task table, touch at |
188 | * your own risk!. Base=0, limit=0x1fffff (=2MB) | 198 | * your own risk!. Base=0, limit=0x1fffff (=2MB) |
189 | */ | 199 | */ |
190 | #define INIT_TASK(tsk) \ | 200 | #define INIT_TASK(tsk) \ |
191 | { \ | 201 | { \ |
202 | INIT_TASK_TI(tsk) \ | ||
192 | .state = 0, \ | 203 | .state = 0, \ |
193 | .stack = init_stack, \ | 204 | .stack = init_stack, \ |
194 | .usage = ATOMIC_INIT(2), \ | 205 | .usage = ATOMIC_INIT(2), \ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index f76d75fc9eaf..7543a476178b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1471,6 +1471,13 @@ struct tlbflush_unmap_batch { | |||
1471 | }; | 1471 | }; |
1472 | 1472 | ||
1473 | struct task_struct { | 1473 | struct task_struct { |
1474 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
1475 | /* | ||
1476 | * For reasons of header soup (see current_thread_info()), this | ||
1477 | * must be the first element of task_struct. | ||
1478 | */ | ||
1479 | struct thread_info thread_info; | ||
1480 | #endif | ||
1474 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 1481 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
1475 | void *stack; | 1482 | void *stack; |
1476 | atomic_t usage; | 1483 | atomic_t usage; |
@@ -1480,6 +1487,9 @@ struct task_struct { | |||
1480 | #ifdef CONFIG_SMP | 1487 | #ifdef CONFIG_SMP |
1481 | struct llist_node wake_entry; | 1488 | struct llist_node wake_entry; |
1482 | int on_cpu; | 1489 | int on_cpu; |
1490 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
1491 | unsigned int cpu; /* current CPU */ | ||
1492 | #endif | ||
1483 | unsigned int wakee_flips; | 1493 | unsigned int wakee_flips; |
1484 | unsigned long wakee_flip_decay_ts; | 1494 | unsigned long wakee_flip_decay_ts; |
1485 | struct task_struct *last_wakee; | 1495 | struct task_struct *last_wakee; |
@@ -1936,6 +1946,13 @@ struct task_struct { | |||
1936 | #ifdef CONFIG_MMU | 1946 | #ifdef CONFIG_MMU |
1937 | struct task_struct *oom_reaper_list; | 1947 | struct task_struct *oom_reaper_list; |
1938 | #endif | 1948 | #endif |
1949 | #ifdef CONFIG_VMAP_STACK | ||
1950 | struct vm_struct *stack_vm_area; | ||
1951 | #endif | ||
1952 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
1953 | /* A live task holds one reference. */ | ||
1954 | atomic_t stack_refcount; | ||
1955 | #endif | ||
1939 | /* CPU-specific state of this task */ | 1956 | /* CPU-specific state of this task */ |
1940 | struct thread_struct thread; | 1957 | struct thread_struct thread; |
1941 | /* | 1958 | /* |
@@ -1952,6 +1969,18 @@ extern int arch_task_struct_size __read_mostly; | |||
1952 | # define arch_task_struct_size (sizeof(struct task_struct)) | 1969 | # define arch_task_struct_size (sizeof(struct task_struct)) |
1953 | #endif | 1970 | #endif |
1954 | 1971 | ||
1972 | #ifdef CONFIG_VMAP_STACK | ||
1973 | static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) | ||
1974 | { | ||
1975 | return t->stack_vm_area; | ||
1976 | } | ||
1977 | #else | ||
1978 | static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) | ||
1979 | { | ||
1980 | return NULL; | ||
1981 | } | ||
1982 | #endif | ||
1983 | |||
1955 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1984 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |
1956 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | 1985 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) |
1957 | 1986 | ||
@@ -2586,7 +2615,9 @@ extern void ia64_set_curr_task(int cpu, struct task_struct *p); | |||
2586 | void yield(void); | 2615 | void yield(void); |
2587 | 2616 | ||
2588 | union thread_union { | 2617 | union thread_union { |
2618 | #ifndef CONFIG_THREAD_INFO_IN_TASK | ||
2589 | struct thread_info thread_info; | 2619 | struct thread_info thread_info; |
2620 | #endif | ||
2590 | unsigned long stack[THREAD_SIZE/sizeof(long)]; | 2621 | unsigned long stack[THREAD_SIZE/sizeof(long)]; |
2591 | }; | 2622 | }; |
2592 | 2623 | ||
@@ -3074,10 +3105,34 @@ static inline void threadgroup_change_end(struct task_struct *tsk) | |||
3074 | cgroup_threadgroup_change_end(tsk); | 3105 | cgroup_threadgroup_change_end(tsk); |
3075 | } | 3106 | } |
3076 | 3107 | ||
3077 | #ifndef __HAVE_THREAD_FUNCTIONS | 3108 | #ifdef CONFIG_THREAD_INFO_IN_TASK |
3109 | |||
3110 | static inline struct thread_info *task_thread_info(struct task_struct *task) | ||
3111 | { | ||
3112 | return &task->thread_info; | ||
3113 | } | ||
3114 | |||
3115 | /* | ||
3116 | * When accessing the stack of a non-current task that might exit, use | ||
3117 | * try_get_task_stack() instead. task_stack_page will return a pointer | ||
3118 | * that could get freed out from under you. | ||
3119 | */ | ||
3120 | static inline void *task_stack_page(const struct task_struct *task) | ||
3121 | { | ||
3122 | return task->stack; | ||
3123 | } | ||
3124 | |||
3125 | #define setup_thread_stack(new,old) do { } while(0) | ||
3126 | |||
3127 | static inline unsigned long *end_of_stack(const struct task_struct *task) | ||
3128 | { | ||
3129 | return task->stack; | ||
3130 | } | ||
3131 | |||
3132 | #elif !defined(__HAVE_THREAD_FUNCTIONS) | ||
3078 | 3133 | ||
3079 | #define task_thread_info(task) ((struct thread_info *)(task)->stack) | 3134 | #define task_thread_info(task) ((struct thread_info *)(task)->stack) |
3080 | #define task_stack_page(task) ((task)->stack) | 3135 | #define task_stack_page(task) ((void *)(task)->stack) |
3081 | 3136 | ||
3082 | static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) | 3137 | static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) |
3083 | { | 3138 | { |
@@ -3104,6 +3159,24 @@ static inline unsigned long *end_of_stack(struct task_struct *p) | |||
3104 | } | 3159 | } |
3105 | 3160 | ||
3106 | #endif | 3161 | #endif |
3162 | |||
3163 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
3164 | static inline void *try_get_task_stack(struct task_struct *tsk) | ||
3165 | { | ||
3166 | return atomic_inc_not_zero(&tsk->stack_refcount) ? | ||
3167 | task_stack_page(tsk) : NULL; | ||
3168 | } | ||
3169 | |||
3170 | extern void put_task_stack(struct task_struct *tsk); | ||
3171 | #else | ||
3172 | static inline void *try_get_task_stack(struct task_struct *tsk) | ||
3173 | { | ||
3174 | return task_stack_page(tsk); | ||
3175 | } | ||
3176 | |||
3177 | static inline void put_task_stack(struct task_struct *tsk) {} | ||
3178 | #endif | ||
3179 | |||
3107 | #define task_stack_end_corrupted(task) \ | 3180 | #define task_stack_end_corrupted(task) \ |
3108 | (*(end_of_stack(task)) != STACK_END_MAGIC) | 3181 | (*(end_of_stack(task)) != STACK_END_MAGIC) |
3109 | 3182 | ||
@@ -3390,7 +3463,11 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) | |||
3390 | 3463 | ||
3391 | static inline unsigned int task_cpu(const struct task_struct *p) | 3464 | static inline unsigned int task_cpu(const struct task_struct *p) |
3392 | { | 3465 | { |
3466 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
3467 | return p->cpu; | ||
3468 | #else | ||
3393 | return task_thread_info(p)->cpu; | 3469 | return task_thread_info(p)->cpu; |
3470 | #endif | ||
3394 | } | 3471 | } |
3395 | 3472 | ||
3396 | static inline int task_node(const struct task_struct *p) | 3473 | static inline int task_node(const struct task_struct *p) |
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 2b5b10eed74f..45f004e9cc59 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h | |||
@@ -13,6 +13,21 @@ | |||
13 | struct timespec; | 13 | struct timespec; |
14 | struct compat_timespec; | 14 | struct compat_timespec; |
15 | 15 | ||
16 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
17 | struct thread_info { | ||
18 | unsigned long flags; /* low level flags */ | ||
19 | }; | ||
20 | |||
21 | #define INIT_THREAD_INFO(tsk) \ | ||
22 | { \ | ||
23 | .flags = 0, \ | ||
24 | } | ||
25 | #endif | ||
26 | |||
27 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
28 | #define current_thread_info() ((struct thread_info *)current) | ||
29 | #endif | ||
30 | |||
16 | /* | 31 | /* |
17 | * System call restart block. | 32 | * System call restart block. |
18 | */ | 33 | */ |
diff --git a/init/Kconfig b/init/Kconfig index cac3f096050d..3b9a47fe843b 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -26,6 +26,16 @@ config IRQ_WORK | |||
26 | config BUILDTIME_EXTABLE_SORT | 26 | config BUILDTIME_EXTABLE_SORT |
27 | bool | 27 | bool |
28 | 28 | ||
29 | config THREAD_INFO_IN_TASK | ||
30 | bool | ||
31 | help | ||
32 | Select this to move thread_info off the stack into task_struct. To | ||
33 | make this work, an arch will need to remove all thread_info fields | ||
34 | except flags and fix any runtime bugs. | ||
35 | |||
36 | One subtle change that will be needed is to use try_get_task_stack() | ||
37 | and put_task_stack() in save_thread_stack_tsk() and get_wchan(). | ||
38 | |||
29 | menu "General setup" | 39 | menu "General setup" |
30 | 40 | ||
31 | config BROKEN | 41 | config BROKEN |
diff --git a/init/init_task.c b/init/init_task.c index ba0a7f362d9e..11f83be1fa79 100644 --- a/init/init_task.c +++ b/init/init_task.c | |||
@@ -22,5 +22,8 @@ EXPORT_SYMBOL(init_task); | |||
22 | * Initial thread structure. Alignment of this is handled by a special | 22 | * Initial thread structure. Alignment of this is handled by a special |
23 | * linker map entry. | 23 | * linker map entry. |
24 | */ | 24 | */ |
25 | union thread_union init_thread_union __init_task_data = | 25 | union thread_union init_thread_union __init_task_data = { |
26 | { INIT_THREAD_INFO(init_task) }; | 26 | #ifndef CONFIG_THREAD_INFO_IN_TASK |
27 | INIT_THREAD_INFO(init_task) | ||
28 | #endif | ||
29 | }; | ||
diff --git a/kernel/fork.c b/kernel/fork.c index beb31725f7e2..c060c7e7c247 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -158,19 +158,83 @@ void __weak arch_release_thread_stack(unsigned long *stack) | |||
158 | * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a | 158 | * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a |
159 | * kmemcache based allocator. | 159 | * kmemcache based allocator. |
160 | */ | 160 | */ |
161 | # if THREAD_SIZE >= PAGE_SIZE | 161 | # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) |
162 | static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, | 162 | |
163 | int node) | 163 | #ifdef CONFIG_VMAP_STACK |
164 | /* | ||
165 | * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB | ||
166 | * flush. Try to minimize the number of calls by caching stacks. | ||
167 | */ | ||
168 | #define NR_CACHED_STACKS 2 | ||
169 | static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); | ||
170 | #endif | ||
171 | |||
172 | static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) | ||
164 | { | 173 | { |
174 | #ifdef CONFIG_VMAP_STACK | ||
175 | void *stack; | ||
176 | int i; | ||
177 | |||
178 | local_irq_disable(); | ||
179 | for (i = 0; i < NR_CACHED_STACKS; i++) { | ||
180 | struct vm_struct *s = this_cpu_read(cached_stacks[i]); | ||
181 | |||
182 | if (!s) | ||
183 | continue; | ||
184 | this_cpu_write(cached_stacks[i], NULL); | ||
185 | |||
186 | tsk->stack_vm_area = s; | ||
187 | local_irq_enable(); | ||
188 | return s->addr; | ||
189 | } | ||
190 | local_irq_enable(); | ||
191 | |||
192 | stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, | ||
193 | VMALLOC_START, VMALLOC_END, | ||
194 | THREADINFO_GFP | __GFP_HIGHMEM, | ||
195 | PAGE_KERNEL, | ||
196 | 0, node, __builtin_return_address(0)); | ||
197 | |||
198 | /* | ||
199 | * We can't call find_vm_area() in interrupt context, and | ||
200 | * free_thread_stack() can be called in interrupt context, | ||
201 | * so cache the vm_struct. | ||
202 | */ | ||
203 | if (stack) | ||
204 | tsk->stack_vm_area = find_vm_area(stack); | ||
205 | return stack; | ||
206 | #else | ||
165 | struct page *page = alloc_pages_node(node, THREADINFO_GFP, | 207 | struct page *page = alloc_pages_node(node, THREADINFO_GFP, |
166 | THREAD_SIZE_ORDER); | 208 | THREAD_SIZE_ORDER); |
167 | 209 | ||
168 | return page ? page_address(page) : NULL; | 210 | return page ? page_address(page) : NULL; |
211 | #endif | ||
169 | } | 212 | } |
170 | 213 | ||
171 | static inline void free_thread_stack(unsigned long *stack) | 214 | static inline void free_thread_stack(struct task_struct *tsk) |
172 | { | 215 | { |
173 | __free_pages(virt_to_page(stack), THREAD_SIZE_ORDER); | 216 | #ifdef CONFIG_VMAP_STACK |
217 | if (task_stack_vm_area(tsk)) { | ||
218 | unsigned long flags; | ||
219 | int i; | ||
220 | |||
221 | local_irq_save(flags); | ||
222 | for (i = 0; i < NR_CACHED_STACKS; i++) { | ||
223 | if (this_cpu_read(cached_stacks[i])) | ||
224 | continue; | ||
225 | |||
226 | this_cpu_write(cached_stacks[i], tsk->stack_vm_area); | ||
227 | local_irq_restore(flags); | ||
228 | return; | ||
229 | } | ||
230 | local_irq_restore(flags); | ||
231 | |||
232 | vfree(tsk->stack); | ||
233 | return; | ||
234 | } | ||
235 | #endif | ||
236 | |||
237 | __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); | ||
174 | } | 238 | } |
175 | # else | 239 | # else |
176 | static struct kmem_cache *thread_stack_cache; | 240 | static struct kmem_cache *thread_stack_cache; |
@@ -181,9 +245,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, | |||
181 | return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); | 245 | return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); |
182 | } | 246 | } |
183 | 247 | ||
184 | static void free_thread_stack(unsigned long *stack) | 248 | static void free_thread_stack(struct task_struct *tsk) |
185 | { | 249 | { |
186 | kmem_cache_free(thread_stack_cache, stack); | 250 | kmem_cache_free(thread_stack_cache, tsk->stack); |
187 | } | 251 | } |
188 | 252 | ||
189 | void thread_stack_cache_init(void) | 253 | void thread_stack_cache_init(void) |
@@ -213,24 +277,76 @@ struct kmem_cache *vm_area_cachep; | |||
213 | /* SLAB cache for mm_struct structures (tsk->mm) */ | 277 | /* SLAB cache for mm_struct structures (tsk->mm) */ |
214 | static struct kmem_cache *mm_cachep; | 278 | static struct kmem_cache *mm_cachep; |
215 | 279 | ||
216 | static void account_kernel_stack(unsigned long *stack, int account) | 280 | static void account_kernel_stack(struct task_struct *tsk, int account) |
217 | { | 281 | { |
218 | /* All stack pages are in the same zone and belong to the same memcg. */ | 282 | void *stack = task_stack_page(tsk); |
219 | struct page *first_page = virt_to_page(stack); | 283 | struct vm_struct *vm = task_stack_vm_area(tsk); |
284 | |||
285 | BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); | ||
286 | |||
287 | if (vm) { | ||
288 | int i; | ||
220 | 289 | ||
221 | mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, | 290 | BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); |
222 | THREAD_SIZE / 1024 * account); | ||
223 | 291 | ||
224 | memcg_kmem_update_page_stat( | 292 | for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { |
225 | first_page, MEMCG_KERNEL_STACK_KB, | 293 | mod_zone_page_state(page_zone(vm->pages[i]), |
226 | account * (THREAD_SIZE / 1024)); | 294 | NR_KERNEL_STACK_KB, |
295 | PAGE_SIZE / 1024 * account); | ||
296 | } | ||
297 | |||
298 | /* All stack pages belong to the same memcg. */ | ||
299 | memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB, | ||
300 | account * (THREAD_SIZE / 1024)); | ||
301 | } else { | ||
302 | /* | ||
303 | * All stack pages are in the same zone and belong to the | ||
304 | * same memcg. | ||
305 | */ | ||
306 | struct page *first_page = virt_to_page(stack); | ||
307 | |||
308 | mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, | ||
309 | THREAD_SIZE / 1024 * account); | ||
310 | |||
311 | memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB, | ||
312 | account * (THREAD_SIZE / 1024)); | ||
313 | } | ||
227 | } | 314 | } |
228 | 315 | ||
229 | void free_task(struct task_struct *tsk) | 316 | static void release_task_stack(struct task_struct *tsk) |
230 | { | 317 | { |
231 | account_kernel_stack(tsk->stack, -1); | 318 | account_kernel_stack(tsk, -1); |
232 | arch_release_thread_stack(tsk->stack); | 319 | arch_release_thread_stack(tsk->stack); |
233 | free_thread_stack(tsk->stack); | 320 | free_thread_stack(tsk); |
321 | tsk->stack = NULL; | ||
322 | #ifdef CONFIG_VMAP_STACK | ||
323 | tsk->stack_vm_area = NULL; | ||
324 | #endif | ||
325 | } | ||
326 | |||
327 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
328 | void put_task_stack(struct task_struct *tsk) | ||
329 | { | ||
330 | if (atomic_dec_and_test(&tsk->stack_refcount)) | ||
331 | release_task_stack(tsk); | ||
332 | } | ||
333 | #endif | ||
334 | |||
335 | void free_task(struct task_struct *tsk) | ||
336 | { | ||
337 | #ifndef CONFIG_THREAD_INFO_IN_TASK | ||
338 | /* | ||
339 | * The task is finally done with both the stack and thread_info, | ||
340 | * so free both. | ||
341 | */ | ||
342 | release_task_stack(tsk); | ||
343 | #else | ||
344 | /* | ||
345 | * If the task had a separate stack allocation, it should be gone | ||
346 | * by now. | ||
347 | */ | ||
348 | WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0); | ||
349 | #endif | ||
234 | rt_mutex_debug_task_free(tsk); | 350 | rt_mutex_debug_task_free(tsk); |
235 | ftrace_graph_exit_task(tsk); | 351 | ftrace_graph_exit_task(tsk); |
236 | put_seccomp_filter(tsk); | 352 | put_seccomp_filter(tsk); |
@@ -342,6 +458,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) | |||
342 | { | 458 | { |
343 | struct task_struct *tsk; | 459 | struct task_struct *tsk; |
344 | unsigned long *stack; | 460 | unsigned long *stack; |
461 | struct vm_struct *stack_vm_area; | ||
345 | int err; | 462 | int err; |
346 | 463 | ||
347 | if (node == NUMA_NO_NODE) | 464 | if (node == NUMA_NO_NODE) |
@@ -354,11 +471,26 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) | |||
354 | if (!stack) | 471 | if (!stack) |
355 | goto free_tsk; | 472 | goto free_tsk; |
356 | 473 | ||
474 | stack_vm_area = task_stack_vm_area(tsk); | ||
475 | |||
357 | err = arch_dup_task_struct(tsk, orig); | 476 | err = arch_dup_task_struct(tsk, orig); |
477 | |||
478 | /* | ||
479 | * arch_dup_task_struct() clobbers the stack-related fields. Make | ||
480 | * sure they're properly initialized before using any stack-related | ||
481 | * functions again. | ||
482 | */ | ||
483 | tsk->stack = stack; | ||
484 | #ifdef CONFIG_VMAP_STACK | ||
485 | tsk->stack_vm_area = stack_vm_area; | ||
486 | #endif | ||
487 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
488 | atomic_set(&tsk->stack_refcount, 1); | ||
489 | #endif | ||
490 | |||
358 | if (err) | 491 | if (err) |
359 | goto free_stack; | 492 | goto free_stack; |
360 | 493 | ||
361 | tsk->stack = stack; | ||
362 | #ifdef CONFIG_SECCOMP | 494 | #ifdef CONFIG_SECCOMP |
363 | /* | 495 | /* |
364 | * We must handle setting up seccomp filters once we're under | 496 | * We must handle setting up seccomp filters once we're under |
@@ -390,14 +522,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) | |||
390 | tsk->task_frag.page = NULL; | 522 | tsk->task_frag.page = NULL; |
391 | tsk->wake_q.next = NULL; | 523 | tsk->wake_q.next = NULL; |
392 | 524 | ||
393 | account_kernel_stack(stack, 1); | 525 | account_kernel_stack(tsk, 1); |
394 | 526 | ||
395 | kcov_task_init(tsk); | 527 | kcov_task_init(tsk); |
396 | 528 | ||
397 | return tsk; | 529 | return tsk; |
398 | 530 | ||
399 | free_stack: | 531 | free_stack: |
400 | free_thread_stack(stack); | 532 | free_thread_stack(tsk); |
401 | free_tsk: | 533 | free_tsk: |
402 | free_task_struct(tsk); | 534 | free_task_struct(tsk); |
403 | return NULL; | 535 | return NULL; |
@@ -1715,6 +1847,7 @@ bad_fork_cleanup_count: | |||
1715 | atomic_dec(&p->cred->user->processes); | 1847 | atomic_dec(&p->cred->user->processes); |
1716 | exit_creds(p); | 1848 | exit_creds(p); |
1717 | bad_fork_free: | 1849 | bad_fork_free: |
1850 | put_task_stack(p); | ||
1718 | free_task(p); | 1851 | free_task(p); |
1719 | fork_out: | 1852 | fork_out: |
1720 | return ERR_PTR(retval); | 1853 | return ERR_PTR(retval); |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 9ff173dca1ae..4ab4c3766a80 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -64,7 +64,7 @@ static inline struct kthread *to_kthread(struct task_struct *k) | |||
64 | static struct kthread *to_live_kthread(struct task_struct *k) | 64 | static struct kthread *to_live_kthread(struct task_struct *k) |
65 | { | 65 | { |
66 | struct completion *vfork = ACCESS_ONCE(k->vfork_done); | 66 | struct completion *vfork = ACCESS_ONCE(k->vfork_done); |
67 | if (likely(vfork)) | 67 | if (likely(vfork) && try_get_task_stack(k)) |
68 | return __to_kthread(vfork); | 68 | return __to_kthread(vfork); |
69 | return NULL; | 69 | return NULL; |
70 | } | 70 | } |
@@ -425,8 +425,10 @@ void kthread_unpark(struct task_struct *k) | |||
425 | { | 425 | { |
426 | struct kthread *kthread = to_live_kthread(k); | 426 | struct kthread *kthread = to_live_kthread(k); |
427 | 427 | ||
428 | if (kthread) | 428 | if (kthread) { |
429 | __kthread_unpark(k, kthread); | 429 | __kthread_unpark(k, kthread); |
430 | put_task_stack(k); | ||
431 | } | ||
430 | } | 432 | } |
431 | EXPORT_SYMBOL_GPL(kthread_unpark); | 433 | EXPORT_SYMBOL_GPL(kthread_unpark); |
432 | 434 | ||
@@ -455,6 +457,7 @@ int kthread_park(struct task_struct *k) | |||
455 | wait_for_completion(&kthread->parked); | 457 | wait_for_completion(&kthread->parked); |
456 | } | 458 | } |
457 | } | 459 | } |
460 | put_task_stack(k); | ||
458 | ret = 0; | 461 | ret = 0; |
459 | } | 462 | } |
460 | return ret; | 463 | return ret; |
@@ -490,6 +493,7 @@ int kthread_stop(struct task_struct *k) | |||
490 | __kthread_unpark(k, kthread); | 493 | __kthread_unpark(k, kthread); |
491 | wake_up_process(k); | 494 | wake_up_process(k); |
492 | wait_for_completion(&kthread->exited); | 495 | wait_for_completion(&kthread->exited); |
496 | put_task_stack(k); | ||
493 | } | 497 | } |
494 | ret = k->exit_code; | 498 | ret = k->exit_code; |
495 | put_task_struct(k); | 499 | put_task_struct(k); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fac6492f0b98..94732d1ab00a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2781,6 +2781,10 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |||
2781 | * task and put them back on the free list. | 2781 | * task and put them back on the free list. |
2782 | */ | 2782 | */ |
2783 | kprobe_flush_task(prev); | 2783 | kprobe_flush_task(prev); |
2784 | |||
2785 | /* Task is done with its stack. */ | ||
2786 | put_task_stack(prev); | ||
2787 | |||
2784 | put_task_struct(prev); | 2788 | put_task_struct(prev); |
2785 | } | 2789 | } |
2786 | 2790 | ||
@@ -3403,7 +3407,6 @@ static void __sched notrace __schedule(bool preempt) | |||
3403 | 3407 | ||
3404 | balance_callback(rq); | 3408 | balance_callback(rq); |
3405 | } | 3409 | } |
3406 | STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */ | ||
3407 | 3410 | ||
3408 | void __noreturn do_task_dead(void) | 3411 | void __noreturn do_task_dead(void) |
3409 | { | 3412 | { |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 58df5590d028..055f935d4421 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1021,7 +1021,11 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
1021 | * per-task data have been completed by this moment. | 1021 | * per-task data have been completed by this moment. |
1022 | */ | 1022 | */ |
1023 | smp_wmb(); | 1023 | smp_wmb(); |
1024 | #ifdef CONFIG_THREAD_INFO_IN_TASK | ||
1025 | p->cpu = cpu; | ||
1026 | #else | ||
1024 | task_thread_info(p)->cpu = cpu; | 1027 | task_thread_info(p)->cpu = cpu; |
1028 | #endif | ||
1025 | p->wake_cpu = cpu; | 1029 | p->wake_cpu = cpu; |
1026 | #endif | 1030 | #endif |
1027 | } | 1031 | } |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f4b86e8ca1e7..ba3326785ca4 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -24,11 +24,6 @@ config HAVE_FUNCTION_GRAPH_TRACER | |||
24 | help | 24 | help |
25 | See Documentation/trace/ftrace-design.txt | 25 | See Documentation/trace/ftrace-design.txt |
26 | 26 | ||
27 | config HAVE_FUNCTION_GRAPH_FP_TEST | ||
28 | bool | ||
29 | help | ||
30 | See Documentation/trace/ftrace-design.txt | ||
31 | |||
32 | config HAVE_DYNAMIC_FTRACE | 27 | config HAVE_DYNAMIC_FTRACE |
33 | bool | 28 | bool |
34 | help | 29 | help |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 7363ccf79512..0cbe38a844fa 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -119,7 +119,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, | |||
119 | /* Add a function return address to the trace stack on thread info.*/ | 119 | /* Add a function return address to the trace stack on thread info.*/ |
120 | int | 120 | int |
121 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, | 121 | ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, |
122 | unsigned long frame_pointer) | 122 | unsigned long frame_pointer, unsigned long *retp) |
123 | { | 123 | { |
124 | unsigned long long calltime; | 124 | unsigned long long calltime; |
125 | int index; | 125 | int index; |
@@ -171,7 +171,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, | |||
171 | current->ret_stack[index].func = func; | 171 | current->ret_stack[index].func = func; |
172 | current->ret_stack[index].calltime = calltime; | 172 | current->ret_stack[index].calltime = calltime; |
173 | current->ret_stack[index].subtime = 0; | 173 | current->ret_stack[index].subtime = 0; |
174 | #ifdef HAVE_FUNCTION_GRAPH_FP_TEST | ||
174 | current->ret_stack[index].fp = frame_pointer; | 175 | current->ret_stack[index].fp = frame_pointer; |
176 | #endif | ||
177 | #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR | ||
178 | current->ret_stack[index].retp = retp; | ||
179 | #endif | ||
175 | *depth = current->curr_ret_stack; | 180 | *depth = current->curr_ret_stack; |
176 | 181 | ||
177 | return 0; | 182 | return 0; |
@@ -204,7 +209,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, | |||
204 | return; | 209 | return; |
205 | } | 210 | } |
206 | 211 | ||
207 | #if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY) | 212 | #ifdef HAVE_FUNCTION_GRAPH_FP_TEST |
208 | /* | 213 | /* |
209 | * The arch may choose to record the frame pointer used | 214 | * The arch may choose to record the frame pointer used |
210 | * and check it here to make sure that it is what we expect it | 215 | * and check it here to make sure that it is what we expect it |
@@ -279,6 +284,64 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) | |||
279 | return ret; | 284 | return ret; |
280 | } | 285 | } |
281 | 286 | ||
287 | /** | ||
288 | * ftrace_graph_ret_addr - convert a potentially modified stack return address | ||
289 | * to its original value | ||
290 | * | ||
291 | * This function can be called by stack unwinding code to convert a found stack | ||
292 | * return address ('ret') to its original value, in case the function graph | ||
293 | * tracer has modified it to be 'return_to_handler'. If the address hasn't | ||
294 | * been modified, the unchanged value of 'ret' is returned. | ||
295 | * | ||
296 | * 'idx' is a state variable which should be initialized by the caller to zero | ||
297 | * before the first call. | ||
298 | * | ||
299 | * 'retp' is a pointer to the return address on the stack. It's ignored if | ||
300 | * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined. | ||
301 | */ | ||
302 | #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR | ||
303 | unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, | ||
304 | unsigned long ret, unsigned long *retp) | ||
305 | { | ||
306 | int index = task->curr_ret_stack; | ||
307 | int i; | ||
308 | |||
309 | if (ret != (unsigned long)return_to_handler) | ||
310 | return ret; | ||
311 | |||
312 | if (index < -1) | ||
313 | index += FTRACE_NOTRACE_DEPTH; | ||
314 | |||
315 | if (index < 0) | ||
316 | return ret; | ||
317 | |||
318 | for (i = 0; i <= index; i++) | ||
319 | if (task->ret_stack[i].retp == retp) | ||
320 | return task->ret_stack[i].ret; | ||
321 | |||
322 | return ret; | ||
323 | } | ||
324 | #else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ | ||
325 | unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, | ||
326 | unsigned long ret, unsigned long *retp) | ||
327 | { | ||
328 | int task_idx; | ||
329 | |||
330 | if (ret != (unsigned long)return_to_handler) | ||
331 | return ret; | ||
332 | |||
333 | task_idx = task->curr_ret_stack; | ||
334 | |||
335 | if (!task->ret_stack || task_idx < *idx) | ||
336 | return ret; | ||
337 | |||
338 | task_idx -= *idx; | ||
339 | (*idx)++; | ||
340 | |||
341 | return task->ret_stack[task_idx].ret; | ||
342 | } | ||
343 | #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ | ||
344 | |||
282 | int __trace_graph_entry(struct trace_array *tr, | 345 | int __trace_graph_entry(struct trace_array *tr, |
283 | struct ftrace_graph_ent *trace, | 346 | struct ftrace_graph_ent *trace, |
284 | unsigned long flags, | 347 | unsigned long flags, |
diff --git a/lib/dma-debug.c b/lib/dma-debug.c index fcfa1939ac41..06f02f6aecd2 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/stacktrace.h> | 22 | #include <linux/stacktrace.h> |
23 | #include <linux/dma-debug.h> | 23 | #include <linux/dma-debug.h> |
24 | #include <linux/spinlock.h> | 24 | #include <linux/spinlock.h> |
25 | #include <linux/vmalloc.h> | ||
25 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
26 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
27 | #include <linux/export.h> | 28 | #include <linux/export.h> |
@@ -1164,11 +1165,32 @@ static void check_unmap(struct dma_debug_entry *ref) | |||
1164 | put_hash_bucket(bucket, &flags); | 1165 | put_hash_bucket(bucket, &flags); |
1165 | } | 1166 | } |
1166 | 1167 | ||
1167 | static void check_for_stack(struct device *dev, void *addr) | 1168 | static void check_for_stack(struct device *dev, |
1169 | struct page *page, size_t offset) | ||
1168 | { | 1170 | { |
1169 | if (object_is_on_stack(addr)) | 1171 | void *addr; |
1170 | err_printk(dev, NULL, "DMA-API: device driver maps memory from " | 1172 | struct vm_struct *stack_vm_area = task_stack_vm_area(current); |
1171 | "stack [addr=%p]\n", addr); | 1173 | |
1174 | if (!stack_vm_area) { | ||
1175 | /* Stack is direct-mapped. */ | ||
1176 | if (PageHighMem(page)) | ||
1177 | return; | ||
1178 | addr = page_address(page) + offset; | ||
1179 | if (object_is_on_stack(addr)) | ||
1180 | err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr); | ||
1181 | } else { | ||
1182 | /* Stack is vmalloced. */ | ||
1183 | int i; | ||
1184 | |||
1185 | for (i = 0; i < stack_vm_area->nr_pages; i++) { | ||
1186 | if (page != stack_vm_area->pages[i]) | ||
1187 | continue; | ||
1188 | |||
1189 | addr = (u8 *)current->stack + i * PAGE_SIZE + offset; | ||
1190 | err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr); | ||
1191 | break; | ||
1192 | } | ||
1193 | } | ||
1172 | } | 1194 | } |
1173 | 1195 | ||
1174 | static inline bool overlap(void *addr, unsigned long len, void *start, void *end) | 1196 | static inline bool overlap(void *addr, unsigned long len, void *start, void *end) |
@@ -1291,10 +1313,11 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, | |||
1291 | if (map_single) | 1313 | if (map_single) |
1292 | entry->type = dma_debug_single; | 1314 | entry->type = dma_debug_single; |
1293 | 1315 | ||
1316 | check_for_stack(dev, page, offset); | ||
1317 | |||
1294 | if (!PageHighMem(page)) { | 1318 | if (!PageHighMem(page)) { |
1295 | void *addr = page_address(page) + offset; | 1319 | void *addr = page_address(page) + offset; |
1296 | 1320 | ||
1297 | check_for_stack(dev, addr); | ||
1298 | check_for_illegal_area(dev, addr, size); | 1321 | check_for_illegal_area(dev, addr, size); |
1299 | } | 1322 | } |
1300 | 1323 | ||
@@ -1386,8 +1409,9 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, | |||
1386 | entry->sg_call_ents = nents; | 1409 | entry->sg_call_ents = nents; |
1387 | entry->sg_mapped_ents = mapped_ents; | 1410 | entry->sg_mapped_ents = mapped_ents; |
1388 | 1411 | ||
1412 | check_for_stack(dev, sg_page(s), s->offset); | ||
1413 | |||
1389 | if (!PageHighMem(sg_page(s))) { | 1414 | if (!PageHighMem(sg_page(s))) { |
1390 | check_for_stack(dev, sg_virt(s)); | ||
1391 | check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); | 1415 | check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); |
1392 | } | 1416 | } |
1393 | 1417 | ||
diff --git a/lib/syscall.c b/lib/syscall.c index e30e03932480..63239e097b13 100644 --- a/lib/syscall.c +++ b/lib/syscall.c | |||
@@ -7,9 +7,19 @@ static int collect_syscall(struct task_struct *target, long *callno, | |||
7 | unsigned long args[6], unsigned int maxargs, | 7 | unsigned long args[6], unsigned int maxargs, |
8 | unsigned long *sp, unsigned long *pc) | 8 | unsigned long *sp, unsigned long *pc) |
9 | { | 9 | { |
10 | struct pt_regs *regs = task_pt_regs(target); | 10 | struct pt_regs *regs; |
11 | if (unlikely(!regs)) | 11 | |
12 | if (!try_get_task_stack(target)) { | ||
13 | /* Task has no stack, so the task isn't in a syscall. */ | ||
14 | *callno = -1; | ||
15 | return 0; | ||
16 | } | ||
17 | |||
18 | regs = task_pt_regs(target); | ||
19 | if (unlikely(!regs)) { | ||
20 | put_task_stack(target); | ||
12 | return -EAGAIN; | 21 | return -EAGAIN; |
22 | } | ||
13 | 23 | ||
14 | *sp = user_stack_pointer(regs); | 24 | *sp = user_stack_pointer(regs); |
15 | *pc = instruction_pointer(regs); | 25 | *pc = instruction_pointer(regs); |
@@ -18,6 +28,7 @@ static int collect_syscall(struct task_struct *target, long *callno, | |||
18 | if (*callno != -1L && maxargs > 0) | 28 | if (*callno != -1L && maxargs > 0) |
19 | syscall_get_arguments(target, regs, 0, maxargs, args); | 29 | syscall_get_arguments(target, regs, 0, maxargs, args); |
20 | 30 | ||
31 | put_task_stack(target); | ||
21 | return 0; | 32 | return 0; |
22 | } | 33 | } |
23 | 34 | ||
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index 421456784bc6..b037ce9cf116 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c | |||
@@ -147,7 +147,7 @@ static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *)) | |||
147 | if (args.nr != getpid() || | 147 | if (args.nr != getpid() || |
148 | args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 || | 148 | args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 || |
149 | args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) { | 149 | args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) { |
150 | printf("[FAIL]\tgetpid() failed to preseve regs\n"); | 150 | printf("[FAIL]\tgetpid() failed to preserve regs\n"); |
151 | nerrs++; | 151 | nerrs++; |
152 | } else { | 152 | } else { |
153 | printf("[OK]\tgetpid() preserves regs\n"); | 153 | printf("[OK]\tgetpid() preserves regs\n"); |
@@ -162,7 +162,7 @@ static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *)) | |||
162 | if (args.nr != 0 || | 162 | if (args.nr != 0 || |
163 | args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 || | 163 | args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 || |
164 | args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) { | 164 | args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) { |
165 | printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preseve regs\n"); | 165 | printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preserve regs\n"); |
166 | nerrs++; | 166 | nerrs++; |
167 | } else { | 167 | } else { |
168 | printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n"); | 168 | printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n"); |
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 8a577e7070c6..246145b84a12 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c | |||
@@ -106,7 +106,7 @@ asm (".pushsection .text\n\t" | |||
106 | ".type int3, @function\n\t" | 106 | ".type int3, @function\n\t" |
107 | ".align 4096\n\t" | 107 | ".align 4096\n\t" |
108 | "int3:\n\t" | 108 | "int3:\n\t" |
109 | "mov %ss,%eax\n\t" | 109 | "mov %ss,%ecx\n\t" |
110 | "int3\n\t" | 110 | "int3\n\t" |
111 | ".size int3, . - int3\n\t" | 111 | ".size int3, . - int3\n\t" |
112 | ".align 4096, 0xcc\n\t" | 112 | ".align 4096, 0xcc\n\t" |
@@ -306,7 +306,7 @@ static volatile sig_atomic_t sig_corrupt_final_ss; | |||
306 | #ifdef __x86_64__ | 306 | #ifdef __x86_64__ |
307 | # define REG_IP REG_RIP | 307 | # define REG_IP REG_RIP |
308 | # define REG_SP REG_RSP | 308 | # define REG_SP REG_RSP |
309 | # define REG_AX REG_RAX | 309 | # define REG_CX REG_RCX |
310 | 310 | ||
311 | struct selectors { | 311 | struct selectors { |
312 | unsigned short cs, gs, fs, ss; | 312 | unsigned short cs, gs, fs, ss; |
@@ -326,7 +326,7 @@ static unsigned short *csptr(ucontext_t *ctx) | |||
326 | #else | 326 | #else |
327 | # define REG_IP REG_EIP | 327 | # define REG_IP REG_EIP |
328 | # define REG_SP REG_ESP | 328 | # define REG_SP REG_ESP |
329 | # define REG_AX REG_EAX | 329 | # define REG_CX REG_ECX |
330 | 330 | ||
331 | static greg_t *ssptr(ucontext_t *ctx) | 331 | static greg_t *ssptr(ucontext_t *ctx) |
332 | { | 332 | { |
@@ -457,10 +457,10 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void) | |||
457 | ctx->uc_mcontext.gregs[REG_IP] = | 457 | ctx->uc_mcontext.gregs[REG_IP] = |
458 | sig_cs == code16_sel ? 0 : (unsigned long)&int3; | 458 | sig_cs == code16_sel ? 0 : (unsigned long)&int3; |
459 | ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; | 459 | ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; |
460 | ctx->uc_mcontext.gregs[REG_AX] = 0; | 460 | ctx->uc_mcontext.gregs[REG_CX] = 0; |
461 | 461 | ||
462 | memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); | 462 | memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); |
463 | requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */ | 463 | requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */ |
464 | 464 | ||
465 | return; | 465 | return; |
466 | } | 466 | } |
@@ -482,7 +482,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) | |||
482 | unsigned short ss; | 482 | unsigned short ss; |
483 | asm ("mov %%ss,%0" : "=r" (ss)); | 483 | asm ("mov %%ss,%0" : "=r" (ss)); |
484 | 484 | ||
485 | greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX]; | 485 | greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX]; |
486 | if (asm_ss != sig_ss && sig == SIGTRAP) { | 486 | if (asm_ss != sig_ss && sig == SIGTRAP) { |
487 | /* Sanity check failure. */ | 487 | /* Sanity check failure. */ |
488 | printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", | 488 | printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", |
@@ -654,8 +654,8 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) | |||
654 | #endif | 654 | #endif |
655 | 655 | ||
656 | /* Sanity check on the kernel */ | 656 | /* Sanity check on the kernel */ |
657 | if (i == REG_AX && requested_regs[i] != resulting_regs[i]) { | 657 | if (i == REG_CX && requested_regs[i] != resulting_regs[i]) { |
658 | printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", | 658 | printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", |
659 | (unsigned long long)requested_regs[i], | 659 | (unsigned long long)requested_regs[i], |
660 | (unsigned long long)resulting_regs[i]); | 660 | (unsigned long long)resulting_regs[i]); |
661 | nerrs++; | 661 | nerrs++; |