diff options
author | Andy Lutomirski <luto@mit.edu> | 2011-08-10 11:15:32 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2011-08-10 20:26:46 -0400 |
commit | 3ae36655b97a03fa1decf72f04078ef945647c1a (patch) | |
tree | f6f5fbc743909d66e6823c9325ddba9d9d3646ad /arch/x86/kernel | |
parent | fce8dc06423d6fb2709469dc5c55b04e09c1d126 (diff) |
x86-64: Rework vsyscall emulation and add vsyscall= parameter
There are three choices:
vsyscall=native: Vsyscalls are native code that issues the
corresponding syscalls.
vsyscall=emulate (default): Vsyscalls are emulated by instruction
fault traps, tested in the bad_area path. The actual contents of
the vsyscall page is the same as the vsyscall=native case except
that it's marked NX. This way programs that make assumptions about
what the code in the page does will not be confused when they read
that code.
vsyscall=none: Trying to execute a vsyscall will segfault.
Signed-off-by: Andy Lutomirski <luto@mit.edu>
Link: http://lkml.kernel.org/r/8449fb3abf89851fd6b2260972666a6f82542284.1312988155.git.luto@mit.edu
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/entry_64.S | 1 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 33 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 79 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_emu_64.S | 36 |
5 files changed, 72 insertions, 83 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e949793d6b93..46792d900018 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1123,7 +1123,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug | |||
1123 | zeroentry coprocessor_error do_coprocessor_error | 1123 | zeroentry coprocessor_error do_coprocessor_error |
1124 | errorentry alignment_check do_alignment_check | 1124 | errorentry alignment_check do_alignment_check |
1125 | zeroentry simd_coprocessor_error do_simd_coprocessor_error | 1125 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1126 | zeroentry emulate_vsyscall do_emulate_vsyscall | ||
1127 | 1126 | ||
1128 | 1127 | ||
1129 | /* Reload gs selector with exception handling */ | 1128 | /* Reload gs selector with exception handling */ |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index fbc097a085ca..b9b67166f9de 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -872,12 +872,6 @@ void __init trap_init(void) | |||
872 | set_bit(SYSCALL_VECTOR, used_vectors); | 872 | set_bit(SYSCALL_VECTOR, used_vectors); |
873 | #endif | 873 | #endif |
874 | 874 | ||
875 | #ifdef CONFIG_X86_64 | ||
876 | BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors)); | ||
877 | set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall); | ||
878 | set_bit(VSYSCALL_EMU_VECTOR, used_vectors); | ||
879 | #endif | ||
880 | |||
881 | /* | 875 | /* |
882 | * Should be a barrier for any external CPU state: | 876 | * Should be a barrier for any external CPU state: |
883 | */ | 877 | */ |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8f3a265476d7..0f703f10901a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -71,7 +71,6 @@ PHDRS { | |||
71 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
72 | data PT_LOAD FLAGS(6); /* RW_ */ | 72 | data PT_LOAD FLAGS(6); /* RW_ */ |
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | user PT_LOAD FLAGS(5); /* R_E */ | ||
75 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
76 | percpu PT_LOAD FLAGS(6); /* RW_ */ | 75 | percpu PT_LOAD FLAGS(6); /* RW_ */ |
77 | #endif | 76 | #endif |
@@ -174,38 +173,6 @@ SECTIONS | |||
174 | 173 | ||
175 | . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); | 174 | . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); |
176 | 175 | ||
177 | #define VSYSCALL_ADDR (-10*1024*1024) | ||
178 | |||
179 | #define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET) | ||
180 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) | ||
181 | |||
182 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) | ||
183 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | ||
184 | |||
185 | __vsyscall_0 = .; | ||
186 | |||
187 | . = VSYSCALL_ADDR; | ||
188 | .vsyscall : AT(VLOAD(.vsyscall)) { | ||
189 | /* work around gold bug 13023 */ | ||
190 | __vsyscall_beginning_hack = .; | ||
191 | *(.vsyscall_0) | ||
192 | |||
193 | . = __vsyscall_beginning_hack + 1024; | ||
194 | *(.vsyscall_1) | ||
195 | |||
196 | . = __vsyscall_beginning_hack + 2048; | ||
197 | *(.vsyscall_2) | ||
198 | |||
199 | . = __vsyscall_beginning_hack + 4096; /* Pad the whole page. */ | ||
200 | } :user =0xcc | ||
201 | . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE); | ||
202 | |||
203 | #undef VSYSCALL_ADDR | ||
204 | #undef VLOAD_OFFSET | ||
205 | #undef VLOAD | ||
206 | #undef VVIRT_OFFSET | ||
207 | #undef VVIRT | ||
208 | |||
209 | #endif /* CONFIG_X86_64 */ | 176 | #endif /* CONFIG_X86_64 */ |
210 | 177 | ||
211 | /* Init code and data - will be freed after init */ | 178 | /* Init code and data - will be freed after init */ |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index bf8e9ffee6e9..18ae83dd1cd7 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -56,6 +56,27 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | |||
56 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 56 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
57 | }; | 57 | }; |
58 | 58 | ||
59 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; | ||
60 | |||
61 | static int __init vsyscall_setup(char *str) | ||
62 | { | ||
63 | if (str) { | ||
64 | if (!strcmp("emulate", str)) | ||
65 | vsyscall_mode = EMULATE; | ||
66 | else if (!strcmp("native", str)) | ||
67 | vsyscall_mode = NATIVE; | ||
68 | else if (!strcmp("none", str)) | ||
69 | vsyscall_mode = NONE; | ||
70 | else | ||
71 | return -EINVAL; | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | return -EINVAL; | ||
77 | } | ||
78 | early_param("vsyscall", vsyscall_setup); | ||
79 | |||
59 | void update_vsyscall_tz(void) | 80 | void update_vsyscall_tz(void) |
60 | { | 81 | { |
61 | unsigned long flags; | 82 | unsigned long flags; |
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, | |||
100 | 121 | ||
101 | printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", | 122 | printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", |
102 | level, tsk->comm, task_pid_nr(tsk), | 123 | level, tsk->comm, task_pid_nr(tsk), |
103 | message, regs->ip - 2, regs->cs, | 124 | message, regs->ip, regs->cs, |
104 | regs->sp, regs->ax, regs->si, regs->di); | 125 | regs->sp, regs->ax, regs->si, regs->di); |
105 | } | 126 | } |
106 | 127 | ||
@@ -118,45 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
118 | return nr; | 139 | return nr; |
119 | } | 140 | } |
120 | 141 | ||
121 | void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) | 142 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) |
122 | { | 143 | { |
123 | struct task_struct *tsk; | 144 | struct task_struct *tsk; |
124 | unsigned long caller; | 145 | unsigned long caller; |
125 | int vsyscall_nr; | 146 | int vsyscall_nr; |
126 | long ret; | 147 | long ret; |
127 | 148 | ||
128 | local_irq_enable(); | 149 | /* |
150 | * No point in checking CS -- the only way to get here is a user mode | ||
151 | * trap to a high address, which means that we're in 64-bit user code. | ||
152 | */ | ||
129 | 153 | ||
130 | if (!user_64bit_mode(regs)) { | 154 | WARN_ON_ONCE(address != regs->ip); |
131 | /* | ||
132 | * If we trapped from kernel mode, we might as well OOPS now | ||
133 | * instead of returning to some random address and OOPSing | ||
134 | * then. | ||
135 | */ | ||
136 | BUG_ON(!user_mode(regs)); | ||
137 | 155 | ||
138 | /* Compat mode and non-compat 32-bit CS should both segfault. */ | 156 | if (vsyscall_mode == NONE) { |
139 | warn_bad_vsyscall(KERN_WARNING, regs, | 157 | warn_bad_vsyscall(KERN_INFO, regs, |
140 | "illegal int 0xcc from 32-bit mode"); | 158 | "vsyscall attempted with vsyscall=none"); |
141 | goto sigsegv; | 159 | return false; |
142 | } | 160 | } |
143 | 161 | ||
144 | /* | 162 | vsyscall_nr = addr_to_vsyscall_nr(address); |
145 | * x86-ism here: regs->ip points to the instruction after the int 0xcc, | ||
146 | * and int 0xcc is two bytes long. | ||
147 | */ | ||
148 | vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2); | ||
149 | 163 | ||
150 | trace_emulate_vsyscall(vsyscall_nr); | 164 | trace_emulate_vsyscall(vsyscall_nr); |
151 | 165 | ||
152 | if (vsyscall_nr < 0) { | 166 | if (vsyscall_nr < 0) { |
153 | warn_bad_vsyscall(KERN_WARNING, regs, | 167 | warn_bad_vsyscall(KERN_WARNING, regs, |
154 | "illegal int 0xcc (exploit attempt?)"); | 168 | "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); |
155 | goto sigsegv; | 169 | goto sigsegv; |
156 | } | 170 | } |
157 | 171 | ||
158 | if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { | 172 | if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { |
159 | warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); | 173 | warn_bad_vsyscall(KERN_WARNING, regs, |
174 | "vsyscall with bad stack (exploit attempt?)"); | ||
160 | goto sigsegv; | 175 | goto sigsegv; |
161 | } | 176 | } |
162 | 177 | ||
@@ -201,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) | |||
201 | regs->ip = caller; | 216 | regs->ip = caller; |
202 | regs->sp += 8; | 217 | regs->sp += 8; |
203 | 218 | ||
204 | local_irq_disable(); | 219 | return true; |
205 | return; | ||
206 | 220 | ||
207 | sigsegv: | 221 | sigsegv: |
208 | regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */ | ||
209 | force_sig(SIGSEGV, current); | 222 | force_sig(SIGSEGV, current); |
210 | local_irq_disable(); | 223 | return true; |
211 | } | 224 | } |
212 | 225 | ||
213 | /* | 226 | /* |
@@ -255,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | |||
255 | 268 | ||
256 | void __init map_vsyscall(void) | 269 | void __init map_vsyscall(void) |
257 | { | 270 | { |
258 | extern char __vsyscall_0; | 271 | extern char __vsyscall_page; |
259 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); | 272 | unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); |
260 | extern char __vvar_page; | 273 | extern char __vvar_page; |
261 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); | 274 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); |
262 | 275 | ||
263 | /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ | 276 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, |
264 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); | 277 | vsyscall_mode == NATIVE |
278 | ? PAGE_KERNEL_VSYSCALL | ||
279 | : PAGE_KERNEL_VVAR); | ||
280 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) != | ||
281 | (unsigned long)VSYSCALL_START); | ||
282 | |||
265 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); | 283 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); |
266 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); | 284 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != |
285 | (unsigned long)VVAR_ADDRESS); | ||
267 | } | 286 | } |
268 | 287 | ||
269 | static int __init vsyscall_init(void) | 288 | static int __init vsyscall_init(void) |
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S index ffa845eae5ca..c9596a9af159 100644 --- a/arch/x86/kernel/vsyscall_emu_64.S +++ b/arch/x86/kernel/vsyscall_emu_64.S | |||
@@ -7,21 +7,31 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | |||
10 | #include <asm/irq_vectors.h> | 11 | #include <asm/irq_vectors.h> |
12 | #include <asm/page_types.h> | ||
13 | #include <asm/unistd_64.h> | ||
14 | |||
15 | __PAGE_ALIGNED_DATA | ||
16 | .globl __vsyscall_page | ||
17 | .balign PAGE_SIZE, 0xcc | ||
18 | .type __vsyscall_page, @object | ||
19 | __vsyscall_page: | ||
20 | |||
21 | mov $__NR_gettimeofday, %rax | ||
22 | syscall | ||
23 | ret | ||
11 | 24 | ||
12 | /* The unused parts of the page are filled with 0xcc by the linker script. */ | 25 | .balign 1024, 0xcc |
26 | mov $__NR_time, %rax | ||
27 | syscall | ||
28 | ret | ||
13 | 29 | ||
14 | .section .vsyscall_0, "a" | 30 | .balign 1024, 0xcc |
15 | ENTRY(vsyscall_0) | 31 | mov $__NR_getcpu, %rax |
16 | int $VSYSCALL_EMU_VECTOR | 32 | syscall |
17 | END(vsyscall_0) | 33 | ret |
18 | 34 | ||
19 | .section .vsyscall_1, "a" | 35 | .balign 4096, 0xcc |
20 | ENTRY(vsyscall_1) | ||
21 | int $VSYSCALL_EMU_VECTOR | ||
22 | END(vsyscall_1) | ||
23 | 36 | ||
24 | .section .vsyscall_2, "a" | 37 | .size __vsyscall_page, 4096 |
25 | ENTRY(vsyscall_2) | ||
26 | int $VSYSCALL_EMU_VECTOR | ||
27 | END(vsyscall_2) | ||