aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-16 18:40:50 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-16 18:40:50 -0500
commit2a74dbb9a86e8102dcd07d284135b4530a84826e (patch)
treea54403e312b6062dfb57bd904ba8b8ce3b11e720 /arch
parent770b6cb4d21fb3e3df2a7a51e186a3c14db1ec30 (diff)
parente93072374112db9dc86635934ee761249be28370 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
Pull security subsystem updates from James Morris: "A quiet cycle for the security subsystem with just a few maintenance updates." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security: Smack: create a sysfs mount point for smackfs Smack: use select not depends in Kconfig Yama: remove locking from delete path Yama: add RCU to drop read locking drivers/char/tpm: remove tasklet and cleanup KEYS: Use keyring_alloc() to create special keyrings KEYS: Reduce initial permissions on keys KEYS: Make the session and process keyrings per-thread seccomp: Make syscall skipping and nr changes more consistent key: Fix resource leak keys: Fix unreachable code KEYS: Add payload preparsing opportunity prior to key instantiate or update
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/vsyscall_64.c110
1 files changed, 59 insertions, 51 deletions
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3a3e8c9e280d..9a907a67be8f 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
145 return nr; 145 return nr;
146} 146}
147 147
148#ifdef CONFIG_SECCOMP
149static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
150{
151 if (!seccomp_mode(&tsk->seccomp))
152 return 0;
153 task_pt_regs(tsk)->orig_ax = syscall_nr;
154 task_pt_regs(tsk)->ax = syscall_nr;
155 return __secure_computing(syscall_nr);
156}
157#else
158#define vsyscall_seccomp(_tsk, _nr) 0
159#endif
160
161static bool write_ok_or_segv(unsigned long ptr, size_t size) 148static bool write_ok_or_segv(unsigned long ptr, size_t size)
162{ 149{
163 /* 150 /*
@@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
190{ 177{
191 struct task_struct *tsk; 178 struct task_struct *tsk;
192 unsigned long caller; 179 unsigned long caller;
193 int vsyscall_nr; 180 int vsyscall_nr, syscall_nr, tmp;
194 int prev_sig_on_uaccess_error; 181 int prev_sig_on_uaccess_error;
195 long ret; 182 long ret;
196 int skip;
197 183
198 /* 184 /*
199 * No point in checking CS -- the only way to get here is a user mode 185 * No point in checking CS -- the only way to get here is a user mode
@@ -225,56 +211,84 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
225 } 211 }
226 212
227 tsk = current; 213 tsk = current;
228 /*
229 * With a real vsyscall, page faults cause SIGSEGV. We want to
230 * preserve that behavior to make writing exploits harder.
231 */
232 prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
233 current_thread_info()->sig_on_uaccess_error = 1;
234 214
235 /* 215 /*
216 * Check for access_ok violations and find the syscall nr.
217 *
236 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and 218 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
237 * 64-bit, so we don't need to special-case it here. For all the 219 * 64-bit, so we don't need to special-case it here. For all the
238 * vsyscalls, NULL means "don't write anything" not "write it at 220 * vsyscalls, NULL means "don't write anything" not "write it at
239 * address 0". 221 * address 0".
240 */ 222 */
241 ret = -EFAULT;
242 skip = 0;
243 switch (vsyscall_nr) { 223 switch (vsyscall_nr) {
244 case 0: 224 case 0:
245 skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
246 if (skip)
247 break;
248
249 if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || 225 if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
250 !write_ok_or_segv(regs->si, sizeof(struct timezone))) 226 !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
251 break; 227 ret = -EFAULT;
228 goto check_fault;
229 }
230
231 syscall_nr = __NR_gettimeofday;
232 break;
233
234 case 1:
235 if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
236 ret = -EFAULT;
237 goto check_fault;
238 }
239
240 syscall_nr = __NR_time;
241 break;
242
243 case 2:
244 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
245 !write_ok_or_segv(regs->si, sizeof(unsigned))) {
246 ret = -EFAULT;
247 goto check_fault;
248 }
249
250 syscall_nr = __NR_getcpu;
251 break;
252 }
253
254 /*
255 * Handle seccomp. regs->ip must be the original value.
256 * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
257 *
258 * We could optimize the seccomp disabled case, but performance
259 * here doesn't matter.
260 */
261 regs->orig_ax = syscall_nr;
262 regs->ax = -ENOSYS;
263 tmp = secure_computing(syscall_nr);
264 if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
265 warn_bad_vsyscall(KERN_DEBUG, regs,
266 "seccomp tried to change syscall nr or ip");
267 do_exit(SIGSYS);
268 }
269 if (tmp)
270 goto do_ret; /* skip requested */
252 271
272 /*
273 * With a real vsyscall, page faults cause SIGSEGV. We want to
274 * preserve that behavior to make writing exploits harder.
275 */
276 prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
277 current_thread_info()->sig_on_uaccess_error = 1;
278
279 ret = -EFAULT;
280 switch (vsyscall_nr) {
281 case 0:
253 ret = sys_gettimeofday( 282 ret = sys_gettimeofday(
254 (struct timeval __user *)regs->di, 283 (struct timeval __user *)regs->di,
255 (struct timezone __user *)regs->si); 284 (struct timezone __user *)regs->si);
256 break; 285 break;
257 286
258 case 1: 287 case 1:
259 skip = vsyscall_seccomp(tsk, __NR_time);
260 if (skip)
261 break;
262
263 if (!write_ok_or_segv(regs->di, sizeof(time_t)))
264 break;
265
266 ret = sys_time((time_t __user *)regs->di); 288 ret = sys_time((time_t __user *)regs->di);
267 break; 289 break;
268 290
269 case 2: 291 case 2:
270 skip = vsyscall_seccomp(tsk, __NR_getcpu);
271 if (skip)
272 break;
273
274 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
275 !write_ok_or_segv(regs->si, sizeof(unsigned)))
276 break;
277
278 ret = sys_getcpu((unsigned __user *)regs->di, 292 ret = sys_getcpu((unsigned __user *)regs->di,
279 (unsigned __user *)regs->si, 293 (unsigned __user *)regs->si,
280 NULL); 294 NULL);
@@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
283 297
284 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; 298 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
285 299
286 if (skip) { 300check_fault:
287 if ((long)regs->ax <= 0L) /* seccomp errno emulation */
288 goto do_ret;
289 goto done; /* seccomp trace/trap */
290 }
291
292 if (ret == -EFAULT) { 301 if (ret == -EFAULT) {
293 /* Bad news -- userspace fed a bad pointer to a vsyscall. */ 302 /* Bad news -- userspace fed a bad pointer to a vsyscall. */
294 warn_bad_vsyscall(KERN_INFO, regs, 303 warn_bad_vsyscall(KERN_INFO, regs,
@@ -311,7 +320,6 @@ do_ret:
311 /* Emulate a ret instruction. */ 320 /* Emulate a ret instruction. */
312 regs->ip = caller; 321 regs->ip = caller;
313 regs->sp += 8; 322 regs->sp += 8;
314done:
315 return true; 323 return true;
316 324
317sigsegv: 325sigsegv: