aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2016-12-26 22:20:50 -0500
committerIngo Molnar <mingo@kernel.org>2017-01-05 03:20:02 -0500
commit6606021401032919c559a829a5d273ba1741b434 (patch)
treec8f79c28b0522c3e6969b7fff81ecc7f8af05645
parente02003b515e8d95f40f20f213622bb82510873d2 (diff)
selftests/x86: Add a selftest for SYSRET to noncanonical addresses
SYSRET to a noncanonical address will blow up on Intel CPUs. Linux needs to prevent this from happening in two major cases, and the criteria will become more complicated when support for larger virtual address spaces is added. A fast-path SYSCALL will fall through to the following instruction using SYSRET without any particular checking. To prevent fall-through to a noncanonical address, Linux prevents the highest canonical page from being mapped. This test case checks a variety of possible maximum addresses to make sure that either we can't map code there or that SYSCALL fall-through works. A slow-path system call can return anywhere. Linux needs to make sure that, if the return address is non-canonical, it won't use SYSRET. This test cases causes sigreturn() to return to a variety of addresses (with RCX == RIP) and makes sure that nothing explodes. Some of this code comes from Kirill Shutemov. Kirill reported the following output with 5-level paging enabled: [RUN] sigreturn to 0x800000000000 [OK] Got SIGSEGV at RIP=0x800000000000 [RUN] sigreturn to 0x1000000000000 [OK] Got SIGSEGV at RIP=0x1000000000000 [RUN] sigreturn to 0x2000000000000 [OK] Got SIGSEGV at RIP=0x2000000000000 [RUN] sigreturn to 0x4000000000000 [OK] Got SIGSEGV at RIP=0x4000000000000 [RUN] sigreturn to 0x8000000000000 [OK] Got SIGSEGV at RIP=0x8000000000000 [RUN] sigreturn to 0x10000000000000 [OK] Got SIGSEGV at RIP=0x10000000000000 [RUN] sigreturn to 0x20000000000000 [OK] Got SIGSEGV at RIP=0x20000000000000 [RUN] sigreturn to 0x40000000000000 [OK] Got SIGSEGV at RIP=0x40000000000000 [RUN] sigreturn to 0x80000000000000 [OK] Got SIGSEGV at RIP=0x80000000000000 [RUN] sigreturn to 0x100000000000000 [OK] Got SIGSEGV at RIP=0x100000000000000 [RUN] sigreturn to 0x200000000000000 [OK] Got SIGSEGV at RIP=0x200000000000000 [RUN] sigreturn to 0x400000000000000 [OK] Got SIGSEGV at RIP=0x400000000000000 [RUN] sigreturn to 0x800000000000000 [OK] Got SIGSEGV at RIP=0x800000000000000 [RUN] sigreturn to 0x1000000000000000 [OK] Got SIGSEGV at RIP=0x1000000000000000 [RUN] sigreturn to 0x2000000000000000 [OK] Got SIGSEGV at RIP=0x2000000000000000 [RUN] sigreturn to 0x4000000000000000 [OK] Got SIGSEGV at RIP=0x4000000000000000 [RUN] sigreturn to 0x8000000000000000 [OK] Got SIGSEGV at RIP=0x8000000000000000 [RUN] Trying a SYSCALL that falls through to 0x7fffffffe000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x7ffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x800000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0xfffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x1000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x1fffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x2000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x3fffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x4000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x7fffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x8000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0xffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x10000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x1ffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x20000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x3ffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x40000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x7ffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x80000000000000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0xfffffffffff000 [OK] We survived [RUN] Trying a SYSCALL that falls through to 0x100000000000000 [OK] mremap to 0xfffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x1fffffffffff000 [OK] mremap to 0x1ffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x200000000000000 [OK] mremap to 0x1fffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x3fffffffffff000 [OK] mremap to 0x3ffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x400000000000000 [OK] mremap to 0x3fffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x7fffffffffff000 [OK] mremap to 0x7ffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x800000000000000 [OK] mremap to 0x7fffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0xffffffffffff000 [OK] mremap to 0xfffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x1000000000000000 [OK] mremap to 0xffffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x1ffffffffffff000 [OK] mremap to 0x1fffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x2000000000000000 [OK] mremap to 0x1ffffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x3ffffffffffff000 [OK] mremap to 0x3fffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x4000000000000000 [OK] mremap to 0x3ffffffffffff000 failed [RUN] Trying a SYSCALL that falls through to 0x7ffffffffffff000 [OK] mremap to 0x7fffffffffffe000 failed [RUN] Trying a SYSCALL that falls through to 0x8000000000000000 [OK] mremap to 0x7ffffffffffff000 failed Signed-off-by: Andy Lutomirski <luto@kernel.org> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Shuah Khan <shuahkh@osg.samsung.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/e70bd9a3f90657ba47b755100a20475d038fa26b.1482808435.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/sysret_rip.c195
2 files changed, 196 insertions, 1 deletions
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 8c1cb423cfe6..25d4067c11e4 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -10,7 +10,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc
10TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ 10TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
11 test_FCMOV test_FCOMI test_FISTTP \ 11 test_FCMOV test_FCOMI test_FISTTP \
12 vdso_restorer 12 vdso_restorer
13TARGETS_C_64BIT_ONLY := fsgsbase 13TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
14 14
15TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) 15TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
16TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) 16TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
new file mode 100644
index 000000000000..d85ec5b3671c
--- /dev/null
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -0,0 +1,195 @@
1/*
2 * sigreturn.c - tests that x86 avoids Intel SYSRET pitfalls
3 * Copyright (c) 2014-2016 Andrew Lutomirski
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
14
15#define _GNU_SOURCE
16
17#include <stdlib.h>
18#include <unistd.h>
19#include <stdio.h>
20#include <string.h>
21#include <inttypes.h>
22#include <sys/signal.h>
23#include <sys/ucontext.h>
24#include <sys/syscall.h>
25#include <err.h>
26#include <stddef.h>
27#include <stdbool.h>
28#include <setjmp.h>
29#include <sys/user.h>
30#include <sys/mman.h>
31#include <assert.h>
32
33
34asm (
35 ".pushsection \".text\", \"ax\"\n\t"
36 ".balign 4096\n\t"
37 "test_page: .globl test_page\n\t"
38 ".fill 4094,1,0xcc\n\t"
39 "test_syscall_insn:\n\t"
40 "syscall\n\t"
41 ".ifne . - test_page - 4096\n\t"
42 ".error \"test page is not one page long\"\n\t"
43 ".endif\n\t"
44 ".popsection"
45 );
46
47extern const char test_page[];
48static void const *current_test_page_addr = test_page;
49
50static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
51 int flags)
52{
53 struct sigaction sa;
54 memset(&sa, 0, sizeof(sa));
55 sa.sa_sigaction = handler;
56 sa.sa_flags = SA_SIGINFO | flags;
57 sigemptyset(&sa.sa_mask);
58 if (sigaction(sig, &sa, 0))
59 err(1, "sigaction");
60}
61
62static void clearhandler(int sig)
63{
64 struct sigaction sa;
65 memset(&sa, 0, sizeof(sa));
66 sa.sa_handler = SIG_DFL;
67 sigemptyset(&sa.sa_mask);
68 if (sigaction(sig, &sa, 0))
69 err(1, "sigaction");
70}
71
72/* State used by our signal handlers. */
73static gregset_t initial_regs;
74
75static volatile unsigned long rip;
76
77static void sigsegv_for_sigreturn_test(int sig, siginfo_t *info, void *ctx_void)
78{
79 ucontext_t *ctx = (ucontext_t*)ctx_void;
80
81 if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
82 printf("[FAIL]\tRequested RIP=0x%lx but got RIP=0x%lx\n",
83 rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
84 fflush(stdout);
85 _exit(1);
86 }
87
88 memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
89
90 printf("[OK]\tGot SIGSEGV at RIP=0x%lx\n", rip);
91}
92
93static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
94{
95 ucontext_t *ctx = (ucontext_t*)ctx_void;
96
97 memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
98
99 /* Set IP and CX to match so that SYSRET can happen. */
100 ctx->uc_mcontext.gregs[REG_RIP] = rip;
101 ctx->uc_mcontext.gregs[REG_RCX] = rip;
102
103 /* R11 and EFLAGS should already match. */
104 assert(ctx->uc_mcontext.gregs[REG_EFL] ==
105 ctx->uc_mcontext.gregs[REG_R11]);
106
107 sethandler(SIGSEGV, sigsegv_for_sigreturn_test, SA_RESETHAND);
108
109 return;
110}
111
112static void test_sigreturn_to(unsigned long ip)
113{
114 rip = ip;
115 printf("[RUN]\tsigreturn to 0x%lx\n", ip);
116 raise(SIGUSR1);
117}
118
119static jmp_buf jmpbuf;
120
121static void sigsegv_for_fallthrough(int sig, siginfo_t *info, void *ctx_void)
122{
123 ucontext_t *ctx = (ucontext_t*)ctx_void;
124
125 if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
126 printf("[FAIL]\tExpected SIGSEGV at 0x%lx but got RIP=0x%lx\n",
127 rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
128 fflush(stdout);
129 _exit(1);
130 }
131
132 siglongjmp(jmpbuf, 1);
133}
134
135static void test_syscall_fallthrough_to(unsigned long ip)
136{
137 void *new_address = (void *)(ip - 4096);
138 void *ret;
139
140 printf("[RUN]\tTrying a SYSCALL that falls through to 0x%lx\n", ip);
141
142 ret = mremap((void *)current_test_page_addr, 4096, 4096,
143 MREMAP_MAYMOVE | MREMAP_FIXED, new_address);
144 if (ret == MAP_FAILED) {
145 if (ip <= (1UL << 47) - PAGE_SIZE) {
146 err(1, "mremap to %p", new_address);
147 } else {
148 printf("[OK]\tmremap to %p failed\n", new_address);
149 return;
150 }
151 }
152
153 if (ret != new_address)
154 errx(1, "mremap malfunctioned: asked for %p but got %p\n",
155 new_address, ret);
156
157 current_test_page_addr = new_address;
158 rip = ip;
159
160 if (sigsetjmp(jmpbuf, 1) == 0) {
161 asm volatile ("call *%[syscall_insn]" :: "a" (SYS_getpid),
162 [syscall_insn] "rm" (ip - 2));
163 errx(1, "[FAIL]\tSyscall trampoline returned");
164 }
165
166 printf("[OK]\tWe survived\n");
167}
168
169int main()
170{
171 /*
172 * When the kernel returns from a slow-path syscall, it will
173 * detect whether SYSRET is appropriate. If it incorrectly
174 * thinks that SYSRET is appropriate when RIP is noncanonical,
175 * it'll crash on Intel CPUs.
176 */
177 sethandler(SIGUSR1, sigusr1, 0);
178 for (int i = 47; i < 64; i++)
179 test_sigreturn_to(1UL<<i);
180
181 clearhandler(SIGUSR1);
182
183 sethandler(SIGSEGV, sigsegv_for_fallthrough, 0);
184
185 /* One extra test to check that we didn't screw up the mremap logic. */
186 test_syscall_fallthrough_to((1UL << 47) - 2*PAGE_SIZE);
187
188 /* These are the interesting cases. */
189 for (int i = 47; i < 64; i++) {
190 test_syscall_fallthrough_to((1UL<<i) - PAGE_SIZE);
191 test_syscall_fallthrough_to(1UL<<i);
192 }
193
194 return 0;
195}