aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Popov <alex.popov@linux.com>2018-08-16 18:16:58 -0400
committerKees Cook <keescook@chromium.org>2018-09-04 13:35:47 -0400
commitafaef01c001537fa97a25092d7f54d764dc7d8c1 (patch)
tree199a05427ea4c1e0c735058f322a5b21625b9ecd
parent57361846b52bc686112da6ca5368d11210796804 (diff)
x86/entry: Add STACKLEAK erasing the kernel stack at the end of syscalls
The STACKLEAK feature (initially developed by PaX Team) has the following benefits: 1. Reduces the information that can be revealed through kernel stack leak bugs. The idea of erasing the thread stack at the end of syscalls is similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel crypto, which all comply with FDP_RIP.2 (Full Residual Information Protection) of the Common Criteria standard. 2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712, CVE-2010-2963). That kind of bugs should be killed by improving C compilers in future, which might take a long time. This commit introduces the code filling the used part of the kernel stack with a poison value before returning to userspace. Full STACKLEAK feature also contains the gcc plugin which comes in a separate commit. The STACKLEAK feature is ported from grsecurity/PaX. More information at: https://grsecurity.net/ https://pax.grsecurity.net/ This code is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on our understanding of the code. Changes or omissions from the original code are ours and don't reflect the original grsecurity/PaX code. Performance impact: Hardware: Intel Core i7-4770, 16 GB RAM Test #1: building the Linux kernel on a single core 0.91% slowdown Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P 4.2% slowdown So the STACKLEAK description in Kconfig includes: "The tradeoff is the performance impact: on a single CPU system kernel compilation sees a 1% slowdown, other systems and workloads may vary and you are advised to test this feature on your expected workload before deploying it". Signed-off-by: Alexander Popov <alex.popov@linux.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Kees Cook <keescook@chromium.org>
-rw-r--r--Documentation/x86/x86_64/mm.txt2
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/calling.h14
-rw-r--r--arch/x86/entry/entry_32.S7
-rw-r--r--arch/x86/entry/entry_64.S3
-rw-r--r--arch/x86/entry/entry_64_compat.S5
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/stackleak.h26
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/stackleak.c62
-rw-r--r--scripts/gcc-plugins/Kconfig19
13 files changed, 157 insertions, 0 deletions
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 5432a96d31ff..600bc2afa27d 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -24,6 +24,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
24[fixmap start] - ffffffffff5fffff kernel-internal fixmap range 24[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
25ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI 25ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
26ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole 26ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
27STACKLEAK_POISON value in this last hole: ffffffffffff4111
27 28
28Virtual memory map with 5 level page tables: 29Virtual memory map with 5 level page tables:
29 30
@@ -50,6 +51,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
50[fixmap start] - ffffffffff5fffff kernel-internal fixmap range 51[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
51ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI 52ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
52ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole 53ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
54STACKLEAK_POISON value in this last hole: ffffffffffff4111
53 55
54Architecture defines a 64-bit virtual address. Implementations can support 56Architecture defines a 64-bit virtual address. Implementations can support
55less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 57less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
diff --git a/arch/Kconfig b/arch/Kconfig
index 6801123932a5..ee79ff56faab 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -419,6 +419,13 @@ config SECCOMP_FILTER
419 419
420 See Documentation/userspace-api/seccomp_filter.rst for details. 420 See Documentation/userspace-api/seccomp_filter.rst for details.
421 421
422config HAVE_ARCH_STACKLEAK
423 bool
424 help
425 An architecture should select this if it has the code which
426 fills the used part of the kernel stack with the STACKLEAK_POISON
427 value before returning from system calls.
428
422config HAVE_STACKPROTECTOR 429config HAVE_STACKPROTECTOR
423 bool 430 bool
424 help 431 help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be022f91d..662cb2cc9630 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -127,6 +127,7 @@ config X86
127 select HAVE_ARCH_PREL32_RELOCATIONS 127 select HAVE_ARCH_PREL32_RELOCATIONS
128 select HAVE_ARCH_SECCOMP_FILTER 128 select HAVE_ARCH_SECCOMP_FILTER
129 select HAVE_ARCH_THREAD_STRUCT_WHITELIST 129 select HAVE_ARCH_THREAD_STRUCT_WHITELIST
130 select HAVE_ARCH_STACKLEAK
130 select HAVE_ARCH_TRACEHOOK 131 select HAVE_ARCH_TRACEHOOK
131 select HAVE_ARCH_TRANSPARENT_HUGEPAGE 132 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
132 select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 133 select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 352e70cd33e8..20d0885b00fb 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -329,8 +329,22 @@ For 32-bit we have the following conventions - kernel is built with
329 329
330#endif 330#endif
331 331
332.macro STACKLEAK_ERASE_NOCLOBBER
333#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
334 PUSH_AND_CLEAR_REGS
335 call stackleak_erase
336 POP_REGS
337#endif
338.endm
339
332#endif /* CONFIG_X86_64 */ 340#endif /* CONFIG_X86_64 */
333 341
342.macro STACKLEAK_ERASE
343#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
344 call stackleak_erase
345#endif
346.endm
347
334/* 348/*
335 * This does 'call enter_from_user_mode' unless we can avoid it based on 349 * This does 'call enter_from_user_mode' unless we can avoid it based on
336 * kernel config or using the static jump infrastructure. 350 * kernel config or using the static jump infrastructure.
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..dfb975b4c981 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -46,6 +46,8 @@
46#include <asm/frame.h> 46#include <asm/frame.h>
47#include <asm/nospec-branch.h> 47#include <asm/nospec-branch.h>
48 48
49#include "calling.h"
50
49 .section .entry.text, "ax" 51 .section .entry.text, "ax"
50 52
51/* 53/*
@@ -711,6 +713,7 @@ ENTRY(ret_from_fork)
711 /* When we fork, we trace the syscall return in the child, too. */ 713 /* When we fork, we trace the syscall return in the child, too. */
712 movl %esp, %eax 714 movl %esp, %eax
713 call syscall_return_slowpath 715 call syscall_return_slowpath
716 STACKLEAK_ERASE
714 jmp restore_all 717 jmp restore_all
715 718
716 /* kernel thread */ 719 /* kernel thread */
@@ -885,6 +888,8 @@ ENTRY(entry_SYSENTER_32)
885 ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 888 ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
886 "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 889 "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
887 890
891 STACKLEAK_ERASE
892
888/* Opportunistic SYSEXIT */ 893/* Opportunistic SYSEXIT */
889 TRACE_IRQS_ON /* User mode traces as IRQs on. */ 894 TRACE_IRQS_ON /* User mode traces as IRQs on. */
890 895
@@ -996,6 +1001,8 @@ ENTRY(entry_INT80_32)
996 call do_int80_syscall_32 1001 call do_int80_syscall_32
997.Lsyscall_32_done: 1002.Lsyscall_32_done:
998 1003
1004 STACKLEAK_ERASE
1005
999restore_all: 1006restore_all:
1000 TRACE_IRQS_IRET 1007 TRACE_IRQS_IRET
1001 SWITCH_TO_ENTRY_STACK 1008 SWITCH_TO_ENTRY_STACK
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..a5dd28093020 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -329,6 +329,8 @@ syscall_return_via_sysret:
329 * We are on the trampoline stack. All regs except RDI are live. 329 * We are on the trampoline stack. All regs except RDI are live.
330 * We can do future final exit work right here. 330 * We can do future final exit work right here.
331 */ 331 */
332 STACKLEAK_ERASE_NOCLOBBER
333
332 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi 334 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
333 335
334 popq %rdi 336 popq %rdi
@@ -688,6 +690,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
688 * We are on the trampoline stack. All regs except RDI are live. 690 * We are on the trampoline stack. All regs except RDI are live.
689 * We can do future final exit work right here. 691 * We can do future final exit work right here.
690 */ 692 */
693 STACKLEAK_ERASE_NOCLOBBER
691 694
692 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi 695 SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
693 696
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 7d0df78db727..8eaf8952c408 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -261,6 +261,11 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
261 261
262 /* Opportunistic SYSRET */ 262 /* Opportunistic SYSRET */
263sysret32_from_system_call: 263sysret32_from_system_call:
264 /*
265 * We are not going to return to userspace from the trampoline
266 * stack. So let's erase the thread stack right now.
267 */
268 STACKLEAK_ERASE
264 TRACE_IRQS_ON /* User mode traces as IRQs on. */ 269 TRACE_IRQS_ON /* User mode traces as IRQs on. */
265 movq RBX(%rsp), %rbx /* pt_regs->rbx */ 270 movq RBX(%rsp), %rbx /* pt_regs->rbx */
266 movq RBP(%rsp), %rbp /* pt_regs->rbp */ 271 movq RBP(%rsp), %rbp /* pt_regs->rbp */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 977cb57d7bc9..c1a23acd24e7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1192,6 +1192,10 @@ struct task_struct {
1192 void *security; 1192 void *security;
1193#endif 1193#endif
1194 1194
1195#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
1196 unsigned long lowest_stack;
1197#endif
1198
1195 /* 1199 /*
1196 * New fields for task_struct should be added above here, so that 1200 * New fields for task_struct should be added above here, so that
1197 * they are included in the randomized portion of task_struct. 1201 * they are included in the randomized portion of task_struct.
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
new file mode 100644
index 000000000000..628c2b947b89
--- /dev/null
+++ b/include/linux/stackleak.h
@@ -0,0 +1,26 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LINUX_STACKLEAK_H
3#define _LINUX_STACKLEAK_H
4
5#include <linux/sched.h>
6#include <linux/sched/task_stack.h>
7
8/*
9 * Check that the poison value points to the unused hole in the
10 * virtual memory map for your platform.
11 */
12#define STACKLEAK_POISON -0xBEEF
13#define STACKLEAK_SEARCH_DEPTH 128
14
15#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
16#include <asm/stacktrace.h>
17
18static inline void stackleak_task_init(struct task_struct *t)
19{
20 t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long);
21}
22#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
23static inline void stackleak_task_init(struct task_struct *t) { }
24#endif
25
26#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 7a63d567fdb5..7343b3a9bff0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -117,6 +117,10 @@ obj-$(CONFIG_HAS_IOMEM) += iomem.o
117obj-$(CONFIG_ZONE_DEVICE) += memremap.o 117obj-$(CONFIG_ZONE_DEVICE) += memremap.o
118obj-$(CONFIG_RSEQ) += rseq.o 118obj-$(CONFIG_RSEQ) += rseq.o
119 119
120obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
121KASAN_SANITIZE_stackleak.o := n
122KCOV_INSTRUMENT_stackleak.o := n
123
120$(obj)/configs.o: $(obj)/config_data.h 124$(obj)/configs.o: $(obj)/config_data.h
121 125
122targets += config_data.gz 126targets += config_data.gz
diff --git a/kernel/fork.c b/kernel/fork.c
index d896e9ca38b0..47911e49c2b1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -91,6 +91,7 @@
91#include <linux/kcov.h> 91#include <linux/kcov.h>
92#include <linux/livepatch.h> 92#include <linux/livepatch.h>
93#include <linux/thread_info.h> 93#include <linux/thread_info.h>
94#include <linux/stackleak.h>
94 95
95#include <asm/pgtable.h> 96#include <asm/pgtable.h>
96#include <asm/pgalloc.h> 97#include <asm/pgalloc.h>
@@ -1880,6 +1881,8 @@ static __latent_entropy struct task_struct *copy_process(
1880 if (retval) 1881 if (retval)
1881 goto bad_fork_cleanup_io; 1882 goto bad_fork_cleanup_io;
1882 1883
1884 stackleak_task_init(p);
1885
1883 if (pid != &init_struct_pid) { 1886 if (pid != &init_struct_pid) {
1884 pid = alloc_pid(p->nsproxy->pid_ns_for_children); 1887 pid = alloc_pid(p->nsproxy->pid_ns_for_children);
1885 if (IS_ERR(pid)) { 1888 if (IS_ERR(pid)) {
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
new file mode 100644
index 000000000000..deba0d8992f9
--- /dev/null
+++ b/kernel/stackleak.c
@@ -0,0 +1,62 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * This code fills the used part of the kernel stack with a poison value
4 * before returning to userspace. It's part of the STACKLEAK feature
5 * ported from grsecurity/PaX.
6 *
7 * Author: Alexander Popov <alex.popov@linux.com>
8 *
9 * STACKLEAK reduces the information which kernel stack leak bugs can
10 * reveal and blocks some uninitialized stack variable attacks.
11 */
12
13#include <linux/stackleak.h>
14
15asmlinkage void stackleak_erase(void)
16{
17 /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
18 unsigned long kstack_ptr = current->lowest_stack;
19 unsigned long boundary = (unsigned long)end_of_stack(current);
20 unsigned int poison_count = 0;
21 const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
22
23 /* Check that 'lowest_stack' value is sane */
24 if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
25 kstack_ptr = boundary;
26
27 /* Search for the poison value in the kernel stack */
28 while (kstack_ptr > boundary && poison_count <= depth) {
29 if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
30 poison_count++;
31 else
32 poison_count = 0;
33
34 kstack_ptr -= sizeof(unsigned long);
35 }
36
37 /*
38 * One 'long int' at the bottom of the thread stack is reserved and
39 * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
40 */
41 if (kstack_ptr == boundary)
42 kstack_ptr += sizeof(unsigned long);
43
44 /*
45 * Now write the poison value to the kernel stack. Start from
46 * 'kstack_ptr' and move up till the new 'boundary'. We assume that
47 * the stack pointer doesn't change when we write poison.
48 */
49 if (on_thread_stack())
50 boundary = current_stack_pointer;
51 else
52 boundary = current_top_of_stack();
53
54 while (kstack_ptr < boundary) {
55 *(unsigned long *)kstack_ptr = STACKLEAK_POISON;
56 kstack_ptr += sizeof(unsigned long);
57 }
58
59 /* Reset the 'lowest_stack' value for the next syscall */
60 current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
61}
62
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index cb0c889e13aa..977b84e69787 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -139,4 +139,23 @@ config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
139 in structures. This reduces the performance hit of RANDSTRUCT 139 in structures. This reduces the performance hit of RANDSTRUCT
140 at the cost of weakened randomization. 140 at the cost of weakened randomization.
141 141
142config GCC_PLUGIN_STACKLEAK
143 bool "Erase the kernel stack before returning from syscalls"
144 depends on GCC_PLUGINS
145 depends on HAVE_ARCH_STACKLEAK
146 help
147 This option makes the kernel erase the kernel stack before
148 returning from system calls. That reduces the information which
149 kernel stack leak bugs can reveal and blocks some uninitialized
150 stack variable attacks.
151
152 The tradeoff is the performance impact: on a single CPU system kernel
153 compilation sees a 1% slowdown, other systems and workloads may vary
154 and you are advised to test this feature on your expected workload
155 before deploying it.
156
157 This plugin was ported from grsecurity/PaX. More information at:
158 * https://grsecurity.net/
159 * https://pax.grsecurity.net/
160
142endif 161endif