diff options
Diffstat (limited to 'arch/x86/kernel/vsyscall_64.c')
| -rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 110 |
1 files changed, 59 insertions, 51 deletions
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3a3e8c9e280d..9a907a67be8f 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
| @@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
| 145 | return nr; | 145 | return nr; |
| 146 | } | 146 | } |
| 147 | 147 | ||
| 148 | #ifdef CONFIG_SECCOMP | ||
| 149 | static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr) | ||
| 150 | { | ||
| 151 | if (!seccomp_mode(&tsk->seccomp)) | ||
| 152 | return 0; | ||
| 153 | task_pt_regs(tsk)->orig_ax = syscall_nr; | ||
| 154 | task_pt_regs(tsk)->ax = syscall_nr; | ||
| 155 | return __secure_computing(syscall_nr); | ||
| 156 | } | ||
| 157 | #else | ||
| 158 | #define vsyscall_seccomp(_tsk, _nr) 0 | ||
| 159 | #endif | ||
| 160 | |||
| 161 | static bool write_ok_or_segv(unsigned long ptr, size_t size) | 148 | static bool write_ok_or_segv(unsigned long ptr, size_t size) |
| 162 | { | 149 | { |
| 163 | /* | 150 | /* |
| @@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
| 190 | { | 177 | { |
| 191 | struct task_struct *tsk; | 178 | struct task_struct *tsk; |
| 192 | unsigned long caller; | 179 | unsigned long caller; |
| 193 | int vsyscall_nr; | 180 | int vsyscall_nr, syscall_nr, tmp; |
| 194 | int prev_sig_on_uaccess_error; | 181 | int prev_sig_on_uaccess_error; |
| 195 | long ret; | 182 | long ret; |
| 196 | int skip; | ||
| 197 | 183 | ||
| 198 | /* | 184 | /* |
| 199 | * No point in checking CS -- the only way to get here is a user mode | 185 | * No point in checking CS -- the only way to get here is a user mode |
| @@ -225,56 +211,84 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
| 225 | } | 211 | } |
| 226 | 212 | ||
| 227 | tsk = current; | 213 | tsk = current; |
| 228 | /* | ||
| 229 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
| 230 | * preserve that behavior to make writing exploits harder. | ||
| 231 | */ | ||
| 232 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
| 233 | current_thread_info()->sig_on_uaccess_error = 1; | ||
| 234 | 214 | ||
| 235 | /* | 215 | /* |
| 216 | * Check for access_ok violations and find the syscall nr. | ||
| 217 | * | ||
| 236 | * NULL is a valid user pointer (in the access_ok sense) on 32-bit and | 218 | * NULL is a valid user pointer (in the access_ok sense) on 32-bit and |
| 237 | * 64-bit, so we don't need to special-case it here. For all the | 219 | * 64-bit, so we don't need to special-case it here. For all the |
| 238 | * vsyscalls, NULL means "don't write anything" not "write it at | 220 | * vsyscalls, NULL means "don't write anything" not "write it at |
| 239 | * address 0". | 221 | * address 0". |
| 240 | */ | 222 | */ |
| 241 | ret = -EFAULT; | ||
| 242 | skip = 0; | ||
| 243 | switch (vsyscall_nr) { | 223 | switch (vsyscall_nr) { |
| 244 | case 0: | 224 | case 0: |
| 245 | skip = vsyscall_seccomp(tsk, __NR_gettimeofday); | ||
| 246 | if (skip) | ||
| 247 | break; | ||
| 248 | |||
| 249 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || | 225 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || |
| 250 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) | 226 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) { |
| 251 | break; | 227 | ret = -EFAULT; |
| 228 | goto check_fault; | ||
| 229 | } | ||
| 230 | |||
| 231 | syscall_nr = __NR_gettimeofday; | ||
| 232 | break; | ||
| 233 | |||
| 234 | case 1: | ||
| 235 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) { | ||
| 236 | ret = -EFAULT; | ||
| 237 | goto check_fault; | ||
| 238 | } | ||
| 239 | |||
| 240 | syscall_nr = __NR_time; | ||
| 241 | break; | ||
| 242 | |||
| 243 | case 2: | ||
| 244 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
| 245 | !write_ok_or_segv(regs->si, sizeof(unsigned))) { | ||
| 246 | ret = -EFAULT; | ||
| 247 | goto check_fault; | ||
| 248 | } | ||
| 249 | |||
| 250 | syscall_nr = __NR_getcpu; | ||
| 251 | break; | ||
| 252 | } | ||
| 253 | |||
| 254 | /* | ||
| 255 | * Handle seccomp. regs->ip must be the original value. | ||
| 256 | * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt. | ||
| 257 | * | ||
| 258 | * We could optimize the seccomp disabled case, but performance | ||
| 259 | * here doesn't matter. | ||
| 260 | */ | ||
| 261 | regs->orig_ax = syscall_nr; | ||
| 262 | regs->ax = -ENOSYS; | ||
| 263 | tmp = secure_computing(syscall_nr); | ||
| 264 | if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { | ||
| 265 | warn_bad_vsyscall(KERN_DEBUG, regs, | ||
| 266 | "seccomp tried to change syscall nr or ip"); | ||
| 267 | do_exit(SIGSYS); | ||
| 268 | } | ||
| 269 | if (tmp) | ||
| 270 | goto do_ret; /* skip requested */ | ||
| 252 | 271 | ||
| 272 | /* | ||
| 273 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
| 274 | * preserve that behavior to make writing exploits harder. | ||
| 275 | */ | ||
| 276 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
| 277 | current_thread_info()->sig_on_uaccess_error = 1; | ||
| 278 | |||
| 279 | ret = -EFAULT; | ||
| 280 | switch (vsyscall_nr) { | ||
| 281 | case 0: | ||
| 253 | ret = sys_gettimeofday( | 282 | ret = sys_gettimeofday( |
| 254 | (struct timeval __user *)regs->di, | 283 | (struct timeval __user *)regs->di, |
| 255 | (struct timezone __user *)regs->si); | 284 | (struct timezone __user *)regs->si); |
| 256 | break; | 285 | break; |
| 257 | 286 | ||
| 258 | case 1: | 287 | case 1: |
| 259 | skip = vsyscall_seccomp(tsk, __NR_time); | ||
| 260 | if (skip) | ||
| 261 | break; | ||
| 262 | |||
| 263 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) | ||
| 264 | break; | ||
| 265 | |||
| 266 | ret = sys_time((time_t __user *)regs->di); | 288 | ret = sys_time((time_t __user *)regs->di); |
| 267 | break; | 289 | break; |
| 268 | 290 | ||
| 269 | case 2: | 291 | case 2: |
| 270 | skip = vsyscall_seccomp(tsk, __NR_getcpu); | ||
| 271 | if (skip) | ||
| 272 | break; | ||
| 273 | |||
| 274 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
| 275 | !write_ok_or_segv(regs->si, sizeof(unsigned))) | ||
| 276 | break; | ||
| 277 | |||
| 278 | ret = sys_getcpu((unsigned __user *)regs->di, | 292 | ret = sys_getcpu((unsigned __user *)regs->di, |
| 279 | (unsigned __user *)regs->si, | 293 | (unsigned __user *)regs->si, |
| 280 | NULL); | 294 | NULL); |
| @@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
| 283 | 297 | ||
| 284 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; | 298 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; |
| 285 | 299 | ||
| 286 | if (skip) { | 300 | check_fault: |
| 287 | if ((long)regs->ax <= 0L) /* seccomp errno emulation */ | ||
| 288 | goto do_ret; | ||
| 289 | goto done; /* seccomp trace/trap */ | ||
| 290 | } | ||
| 291 | |||
| 292 | if (ret == -EFAULT) { | 301 | if (ret == -EFAULT) { |
| 293 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ | 302 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ |
| 294 | warn_bad_vsyscall(KERN_INFO, regs, | 303 | warn_bad_vsyscall(KERN_INFO, regs, |
| @@ -311,7 +320,6 @@ do_ret: | |||
| 311 | /* Emulate a ret instruction. */ | 320 | /* Emulate a ret instruction. */ |
| 312 | regs->ip = caller; | 321 | regs->ip = caller; |
| 313 | regs->sp += 8; | 322 | regs->sp += 8; |
| 314 | done: | ||
| 315 | return true; | 323 | return true; |
| 316 | 324 | ||
| 317 | sigsegv: | 325 | sigsegv: |
