diff options
author | Alexander Popov <alex.popov@linux.com> | 2018-08-16 18:16:58 -0400 |
---|---|---|
committer | Kees Cook <keescook@chromium.org> | 2018-09-04 13:35:47 -0400 |
commit | afaef01c001537fa97a25092d7f54d764dc7d8c1 (patch) | |
tree | 199a05427ea4c1e0c735058f322a5b21625b9ecd | |
parent | 57361846b52bc686112da6ca5368d11210796804 (diff) |
x86/entry: Add STACKLEAK erasing the kernel stack at the end of syscalls
The STACKLEAK feature (initially developed by PaX Team) has the following
benefits:
1. Reduces the information that can be revealed through kernel stack leak
bugs. The idea of erasing the thread stack at the end of syscalls is
similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel
crypto, which all comply with FDP_RIP.2 (Full Residual Information
Protection) of the Common Criteria standard.
2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712,
CVE-2010-2963). That kind of bugs should be killed by improving C
compilers in future, which might take a long time.
This commit introduces the code filling the used part of the kernel
stack with a poison value before returning to userspace. Full
STACKLEAK feature also contains the gcc plugin which comes in a
separate commit.
The STACKLEAK feature is ported from grsecurity/PaX. More information at:
https://grsecurity.net/
https://pax.grsecurity.net/
This code is modified from Brad Spengler/PaX Team's code in the last
public patch of grsecurity/PaX based on our understanding of the code.
Changes or omissions from the original code are ours and don't reflect
the original grsecurity/PaX code.
Performance impact:
Hardware: Intel Core i7-4770, 16 GB RAM
Test #1: building the Linux kernel on a single core
0.91% slowdown
Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P
4.2% slowdown
So the STACKLEAK description in Kconfig includes: "The tradeoff is the
performance impact: on a single CPU system kernel compilation sees a 1%
slowdown, other systems and workloads may vary and you are advised to
test this feature on your expected workload before deploying it".
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
-rw-r--r-- | Documentation/x86/x86_64/mm.txt | 2 | ||||
-rw-r--r-- | arch/Kconfig | 7 | ||||
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/entry/calling.h | 14 | ||||
-rw-r--r-- | arch/x86/entry/entry_32.S | 7 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 3 | ||||
-rw-r--r-- | arch/x86/entry/entry_64_compat.S | 5 | ||||
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | include/linux/stackleak.h | 26 | ||||
-rw-r--r-- | kernel/Makefile | 4 | ||||
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | kernel/stackleak.c | 62 | ||||
-rw-r--r-- | scripts/gcc-plugins/Kconfig | 19 |
13 files changed, 157 insertions, 0 deletions
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 5432a96d31ff..600bc2afa27d 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt | |||
@@ -24,6 +24,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space | |||
24 | [fixmap start] - ffffffffff5fffff kernel-internal fixmap range | 24 | [fixmap start] - ffffffffff5fffff kernel-internal fixmap range |
25 | ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI | 25 | ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI |
26 | ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole | 26 | ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole |
27 | STACKLEAK_POISON value in this last hole: ffffffffffff4111 | ||
27 | 28 | ||
28 | Virtual memory map with 5 level page tables: | 29 | Virtual memory map with 5 level page tables: |
29 | 30 | ||
@@ -50,6 +51,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space | |||
50 | [fixmap start] - ffffffffff5fffff kernel-internal fixmap range | 51 | [fixmap start] - ffffffffff5fffff kernel-internal fixmap range |
51 | ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI | 52 | ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI |
52 | ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole | 53 | ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole |
54 | STACKLEAK_POISON value in this last hole: ffffffffffff4111 | ||
53 | 55 | ||
54 | Architecture defines a 64-bit virtual address. Implementations can support | 56 | Architecture defines a 64-bit virtual address. Implementations can support |
55 | less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 | 57 | less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 |
diff --git a/arch/Kconfig b/arch/Kconfig index 6801123932a5..ee79ff56faab 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -419,6 +419,13 @@ config SECCOMP_FILTER | |||
419 | 419 | ||
420 | See Documentation/userspace-api/seccomp_filter.rst for details. | 420 | See Documentation/userspace-api/seccomp_filter.rst for details. |
421 | 421 | ||
422 | config HAVE_ARCH_STACKLEAK | ||
423 | bool | ||
424 | help | ||
425 | An architecture should select this if it has the code which | ||
426 | fills the used part of the kernel stack with the STACKLEAK_POISON | ||
427 | value before returning from system calls. | ||
428 | |||
422 | config HAVE_STACKPROTECTOR | 429 | config HAVE_STACKPROTECTOR |
423 | bool | 430 | bool |
424 | help | 431 | help |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1a0be022f91d..662cb2cc9630 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -127,6 +127,7 @@ config X86 | |||
127 | select HAVE_ARCH_PREL32_RELOCATIONS | 127 | select HAVE_ARCH_PREL32_RELOCATIONS |
128 | select HAVE_ARCH_SECCOMP_FILTER | 128 | select HAVE_ARCH_SECCOMP_FILTER |
129 | select HAVE_ARCH_THREAD_STRUCT_WHITELIST | 129 | select HAVE_ARCH_THREAD_STRUCT_WHITELIST |
130 | select HAVE_ARCH_STACKLEAK | ||
130 | select HAVE_ARCH_TRACEHOOK | 131 | select HAVE_ARCH_TRACEHOOK |
131 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE | 132 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE |
132 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 | 133 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 |
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 352e70cd33e8..20d0885b00fb 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h | |||
@@ -329,8 +329,22 @@ For 32-bit we have the following conventions - kernel is built with | |||
329 | 329 | ||
330 | #endif | 330 | #endif |
331 | 331 | ||
332 | .macro STACKLEAK_ERASE_NOCLOBBER | ||
333 | #ifdef CONFIG_GCC_PLUGIN_STACKLEAK | ||
334 | PUSH_AND_CLEAR_REGS | ||
335 | call stackleak_erase | ||
336 | POP_REGS | ||
337 | #endif | ||
338 | .endm | ||
339 | |||
332 | #endif /* CONFIG_X86_64 */ | 340 | #endif /* CONFIG_X86_64 */ |
333 | 341 | ||
342 | .macro STACKLEAK_ERASE | ||
343 | #ifdef CONFIG_GCC_PLUGIN_STACKLEAK | ||
344 | call stackleak_erase | ||
345 | #endif | ||
346 | .endm | ||
347 | |||
334 | /* | 348 | /* |
335 | * This does 'call enter_from_user_mode' unless we can avoid it based on | 349 | * This does 'call enter_from_user_mode' unless we can avoid it based on |
336 | * kernel config or using the static jump infrastructure. | 350 | * kernel config or using the static jump infrastructure. |
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2767c625a52c..dfb975b4c981 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S | |||
@@ -46,6 +46,8 @@ | |||
46 | #include <asm/frame.h> | 46 | #include <asm/frame.h> |
47 | #include <asm/nospec-branch.h> | 47 | #include <asm/nospec-branch.h> |
48 | 48 | ||
49 | #include "calling.h" | ||
50 | |||
49 | .section .entry.text, "ax" | 51 | .section .entry.text, "ax" |
50 | 52 | ||
51 | /* | 53 | /* |
@@ -711,6 +713,7 @@ ENTRY(ret_from_fork) | |||
711 | /* When we fork, we trace the syscall return in the child, too. */ | 713 | /* When we fork, we trace the syscall return in the child, too. */ |
712 | movl %esp, %eax | 714 | movl %esp, %eax |
713 | call syscall_return_slowpath | 715 | call syscall_return_slowpath |
716 | STACKLEAK_ERASE | ||
714 | jmp restore_all | 717 | jmp restore_all |
715 | 718 | ||
716 | /* kernel thread */ | 719 | /* kernel thread */ |
@@ -885,6 +888,8 @@ ENTRY(entry_SYSENTER_32) | |||
885 | ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ | 888 | ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ |
886 | "jmp .Lsyscall_32_done", X86_FEATURE_XENPV | 889 | "jmp .Lsyscall_32_done", X86_FEATURE_XENPV |
887 | 890 | ||
891 | STACKLEAK_ERASE | ||
892 | |||
888 | /* Opportunistic SYSEXIT */ | 893 | /* Opportunistic SYSEXIT */ |
889 | TRACE_IRQS_ON /* User mode traces as IRQs on. */ | 894 | TRACE_IRQS_ON /* User mode traces as IRQs on. */ |
890 | 895 | ||
@@ -996,6 +1001,8 @@ ENTRY(entry_INT80_32) | |||
996 | call do_int80_syscall_32 | 1001 | call do_int80_syscall_32 |
997 | .Lsyscall_32_done: | 1002 | .Lsyscall_32_done: |
998 | 1003 | ||
1004 | STACKLEAK_ERASE | ||
1005 | |||
999 | restore_all: | 1006 | restore_all: |
1000 | TRACE_IRQS_IRET | 1007 | TRACE_IRQS_IRET |
1001 | SWITCH_TO_ENTRY_STACK | 1008 | SWITCH_TO_ENTRY_STACK |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 957dfb693ecc..a5dd28093020 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -329,6 +329,8 @@ syscall_return_via_sysret: | |||
329 | * We are on the trampoline stack. All regs except RDI are live. | 329 | * We are on the trampoline stack. All regs except RDI are live. |
330 | * We can do future final exit work right here. | 330 | * We can do future final exit work right here. |
331 | */ | 331 | */ |
332 | STACKLEAK_ERASE_NOCLOBBER | ||
333 | |||
332 | SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi | 334 | SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi |
333 | 335 | ||
334 | popq %rdi | 336 | popq %rdi |
@@ -688,6 +690,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) | |||
688 | * We are on the trampoline stack. All regs except RDI are live. | 690 | * We are on the trampoline stack. All regs except RDI are live. |
689 | * We can do future final exit work right here. | 691 | * We can do future final exit work right here. |
690 | */ | 692 | */ |
693 | STACKLEAK_ERASE_NOCLOBBER | ||
691 | 694 | ||
692 | SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi | 695 | SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi |
693 | 696 | ||
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 7d0df78db727..8eaf8952c408 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S | |||
@@ -261,6 +261,11 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) | |||
261 | 261 | ||
262 | /* Opportunistic SYSRET */ | 262 | /* Opportunistic SYSRET */ |
263 | sysret32_from_system_call: | 263 | sysret32_from_system_call: |
264 | /* | ||
265 | * We are not going to return to userspace from the trampoline | ||
266 | * stack. So let's erase the thread stack right now. | ||
267 | */ | ||
268 | STACKLEAK_ERASE | ||
264 | TRACE_IRQS_ON /* User mode traces as IRQs on. */ | 269 | TRACE_IRQS_ON /* User mode traces as IRQs on. */ |
265 | movq RBX(%rsp), %rbx /* pt_regs->rbx */ | 270 | movq RBX(%rsp), %rbx /* pt_regs->rbx */ |
266 | movq RBP(%rsp), %rbp /* pt_regs->rbp */ | 271 | movq RBP(%rsp), %rbp /* pt_regs->rbp */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 977cb57d7bc9..c1a23acd24e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1192,6 +1192,10 @@ struct task_struct { | |||
1192 | void *security; | 1192 | void *security; |
1193 | #endif | 1193 | #endif |
1194 | 1194 | ||
1195 | #ifdef CONFIG_GCC_PLUGIN_STACKLEAK | ||
1196 | unsigned long lowest_stack; | ||
1197 | #endif | ||
1198 | |||
1195 | /* | 1199 | /* |
1196 | * New fields for task_struct should be added above here, so that | 1200 | * New fields for task_struct should be added above here, so that |
1197 | * they are included in the randomized portion of task_struct. | 1201 | * they are included in the randomized portion of task_struct. |
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h new file mode 100644 index 000000000000..628c2b947b89 --- /dev/null +++ b/include/linux/stackleak.h | |||
@@ -0,0 +1,26 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef _LINUX_STACKLEAK_H | ||
3 | #define _LINUX_STACKLEAK_H | ||
4 | |||
5 | #include <linux/sched.h> | ||
6 | #include <linux/sched/task_stack.h> | ||
7 | |||
8 | /* | ||
9 | * Check that the poison value points to the unused hole in the | ||
10 | * virtual memory map for your platform. | ||
11 | */ | ||
12 | #define STACKLEAK_POISON -0xBEEF | ||
13 | #define STACKLEAK_SEARCH_DEPTH 128 | ||
14 | |||
15 | #ifdef CONFIG_GCC_PLUGIN_STACKLEAK | ||
16 | #include <asm/stacktrace.h> | ||
17 | |||
18 | static inline void stackleak_task_init(struct task_struct *t) | ||
19 | { | ||
20 | t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long); | ||
21 | } | ||
22 | #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ | ||
23 | static inline void stackleak_task_init(struct task_struct *t) { } | ||
24 | #endif | ||
25 | |||
26 | #endif | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 7a63d567fdb5..7343b3a9bff0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -117,6 +117,10 @@ obj-$(CONFIG_HAS_IOMEM) += iomem.o | |||
117 | obj-$(CONFIG_ZONE_DEVICE) += memremap.o | 117 | obj-$(CONFIG_ZONE_DEVICE) += memremap.o |
118 | obj-$(CONFIG_RSEQ) += rseq.o | 118 | obj-$(CONFIG_RSEQ) += rseq.o |
119 | 119 | ||
120 | obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o | ||
121 | KASAN_SANITIZE_stackleak.o := n | ||
122 | KCOV_INSTRUMENT_stackleak.o := n | ||
123 | |||
120 | $(obj)/configs.o: $(obj)/config_data.h | 124 | $(obj)/configs.o: $(obj)/config_data.h |
121 | 125 | ||
122 | targets += config_data.gz | 126 | targets += config_data.gz |
diff --git a/kernel/fork.c b/kernel/fork.c index d896e9ca38b0..47911e49c2b1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -91,6 +91,7 @@ | |||
91 | #include <linux/kcov.h> | 91 | #include <linux/kcov.h> |
92 | #include <linux/livepatch.h> | 92 | #include <linux/livepatch.h> |
93 | #include <linux/thread_info.h> | 93 | #include <linux/thread_info.h> |
94 | #include <linux/stackleak.h> | ||
94 | 95 | ||
95 | #include <asm/pgtable.h> | 96 | #include <asm/pgtable.h> |
96 | #include <asm/pgalloc.h> | 97 | #include <asm/pgalloc.h> |
@@ -1880,6 +1881,8 @@ static __latent_entropy struct task_struct *copy_process( | |||
1880 | if (retval) | 1881 | if (retval) |
1881 | goto bad_fork_cleanup_io; | 1882 | goto bad_fork_cleanup_io; |
1882 | 1883 | ||
1884 | stackleak_task_init(p); | ||
1885 | |||
1883 | if (pid != &init_struct_pid) { | 1886 | if (pid != &init_struct_pid) { |
1884 | pid = alloc_pid(p->nsproxy->pid_ns_for_children); | 1887 | pid = alloc_pid(p->nsproxy->pid_ns_for_children); |
1885 | if (IS_ERR(pid)) { | 1888 | if (IS_ERR(pid)) { |
diff --git a/kernel/stackleak.c b/kernel/stackleak.c new file mode 100644 index 000000000000..deba0d8992f9 --- /dev/null +++ b/kernel/stackleak.c | |||
@@ -0,0 +1,62 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * This code fills the used part of the kernel stack with a poison value | ||
4 | * before returning to userspace. It's part of the STACKLEAK feature | ||
5 | * ported from grsecurity/PaX. | ||
6 | * | ||
7 | * Author: Alexander Popov <alex.popov@linux.com> | ||
8 | * | ||
9 | * STACKLEAK reduces the information which kernel stack leak bugs can | ||
10 | * reveal and blocks some uninitialized stack variable attacks. | ||
11 | */ | ||
12 | |||
13 | #include <linux/stackleak.h> | ||
14 | |||
15 | asmlinkage void stackleak_erase(void) | ||
16 | { | ||
17 | /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */ | ||
18 | unsigned long kstack_ptr = current->lowest_stack; | ||
19 | unsigned long boundary = (unsigned long)end_of_stack(current); | ||
20 | unsigned int poison_count = 0; | ||
21 | const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long); | ||
22 | |||
23 | /* Check that 'lowest_stack' value is sane */ | ||
24 | if (unlikely(kstack_ptr - boundary >= THREAD_SIZE)) | ||
25 | kstack_ptr = boundary; | ||
26 | |||
27 | /* Search for the poison value in the kernel stack */ | ||
28 | while (kstack_ptr > boundary && poison_count <= depth) { | ||
29 | if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON) | ||
30 | poison_count++; | ||
31 | else | ||
32 | poison_count = 0; | ||
33 | |||
34 | kstack_ptr -= sizeof(unsigned long); | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * One 'long int' at the bottom of the thread stack is reserved and | ||
39 | * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y). | ||
40 | */ | ||
41 | if (kstack_ptr == boundary) | ||
42 | kstack_ptr += sizeof(unsigned long); | ||
43 | |||
44 | /* | ||
45 | * Now write the poison value to the kernel stack. Start from | ||
46 | * 'kstack_ptr' and move up till the new 'boundary'. We assume that | ||
47 | * the stack pointer doesn't change when we write poison. | ||
48 | */ | ||
49 | if (on_thread_stack()) | ||
50 | boundary = current_stack_pointer; | ||
51 | else | ||
52 | boundary = current_top_of_stack(); | ||
53 | |||
54 | while (kstack_ptr < boundary) { | ||
55 | *(unsigned long *)kstack_ptr = STACKLEAK_POISON; | ||
56 | kstack_ptr += sizeof(unsigned long); | ||
57 | } | ||
58 | |||
59 | /* Reset the 'lowest_stack' value for the next syscall */ | ||
60 | current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64; | ||
61 | } | ||
62 | |||
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig index cb0c889e13aa..977b84e69787 100644 --- a/scripts/gcc-plugins/Kconfig +++ b/scripts/gcc-plugins/Kconfig | |||
@@ -139,4 +139,23 @@ config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE | |||
139 | in structures. This reduces the performance hit of RANDSTRUCT | 139 | in structures. This reduces the performance hit of RANDSTRUCT |
140 | at the cost of weakened randomization. | 140 | at the cost of weakened randomization. |
141 | 141 | ||
142 | config GCC_PLUGIN_STACKLEAK | ||
143 | bool "Erase the kernel stack before returning from syscalls" | ||
144 | depends on GCC_PLUGINS | ||
145 | depends on HAVE_ARCH_STACKLEAK | ||
146 | help | ||
147 | This option makes the kernel erase the kernel stack before | ||
148 | returning from system calls. That reduces the information which | ||
149 | kernel stack leak bugs can reveal and blocks some uninitialized | ||
150 | stack variable attacks. | ||
151 | |||
152 | The tradeoff is the performance impact: on a single CPU system kernel | ||
153 | compilation sees a 1% slowdown, other systems and workloads may vary | ||
154 | and you are advised to test this feature on your expected workload | ||
155 | before deploying it. | ||
156 | |||
157 | This plugin was ported from grsecurity/PaX. More information at: | ||
158 | * https://grsecurity.net/ | ||
159 | * https://pax.grsecurity.net/ | ||
160 | |||
142 | endif | 161 | endif |