aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorKees Cook <keescook@chromium.org>2014-06-27 18:18:48 -0400
committerKees Cook <keescook@chromium.org>2014-07-18 15:13:39 -0400
commitdbd952127d11bb44a4ea30b08cc60531b6a23d71 (patch)
tree3e6bbec7041ed70dc0d015b0e7d01f1dcfed9a37 /kernel
parentc8bee430dc52cfca6c1aab27752a89275d78d50f (diff)
seccomp: introduce writer locking
Normally, task_struct.seccomp.filter is only ever read or modified by the task that owns it (current). This property aids in fast access during system call filtering as read access is lockless. Updating the pointer from another task, however, opens up race conditions. To allow cross-thread filter pointer updates, writes to the seccomp fields are now protected by the sighand spinlock (which is shared by all threads in the thread group). Read access remains lockless because pointer updates themselves are atomic. However, writes (or cloning) often entail additional checking (like maximum instruction counts) which require locking to perform safely. In the case of cloning threads, the child is invisible to the system until it enters the task list. To make sure a child can't be cloned from a thread and left in a prior state, seccomp duplication is additionally moved under the sighand lock. Then parent and child are certain have the same seccomp state when they exit the lock. Based on patches by Will Drewry and David Drysdale. Signed-off-by: Kees Cook <keescook@chromium.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Andy Lutomirski <luto@amacapital.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c49
-rw-r--r--kernel/seccomp.c16
2 files changed, 63 insertions, 2 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 6a13c46cd87d..ed4bc339c9dc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
315 goto free_ti; 315 goto free_ti;
316 316
317 tsk->stack = ti; 317 tsk->stack = ti;
318#ifdef CONFIG_SECCOMP
319 /*
320 * We must handle setting up seccomp filters once we're under
321 * the sighand lock in case orig has changed between now and
322 * then. Until then, filter must be NULL to avoid messing up
323 * the usage counts on the error path calling free_task.
324 */
325 tsk->seccomp.filter = NULL;
326#endif
318 327
319 setup_thread_stack(tsk, orig); 328 setup_thread_stack(tsk, orig);
320 clear_user_return_notifier(tsk); 329 clear_user_return_notifier(tsk);
@@ -1081,6 +1090,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1081 return 0; 1090 return 0;
1082} 1091}
1083 1092
1093static void copy_seccomp(struct task_struct *p)
1094{
1095#ifdef CONFIG_SECCOMP
1096 /*
1097 * Must be called with sighand->lock held, which is common to
1098 * all threads in the group. Holding cred_guard_mutex is not
1099 * needed because this new task is not yet running and cannot
1100 * be racing exec.
1101 */
1102 BUG_ON(!spin_is_locked(&current->sighand->siglock));
1103
1104 /* Ref-count the new filter user, and assign it. */
1105 get_seccomp_filter(current);
1106 p->seccomp = current->seccomp;
1107
1108 /*
1109 * Explicitly enable no_new_privs here in case it got set
1110 * between the task_struct being duplicated and holding the
1111 * sighand lock. The seccomp state and nnp must be in sync.
1112 */
1113 if (task_no_new_privs(current))
1114 task_set_no_new_privs(p);
1115
1116 /*
1117 * If the parent gained a seccomp mode after copying thread
1118 * flags and between before we held the sighand lock, we have
1119 * to manually enable the seccomp thread flag here.
1120 */
1121 if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
1122 set_tsk_thread_flag(p, TIF_SECCOMP);
1123#endif
1124}
1125
1084SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) 1126SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
1085{ 1127{
1086 current->clear_child_tid = tidptr; 1128 current->clear_child_tid = tidptr;
@@ -1196,7 +1238,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1196 goto fork_out; 1238 goto fork_out;
1197 1239
1198 ftrace_graph_init_task(p); 1240 ftrace_graph_init_task(p);
1199 get_seccomp_filter(p);
1200 1241
1201 rt_mutex_init_task(p); 1242 rt_mutex_init_task(p);
1202 1243
@@ -1437,6 +1478,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1437 spin_lock(&current->sighand->siglock); 1478 spin_lock(&current->sighand->siglock);
1438 1479
1439 /* 1480 /*
1481 * Copy seccomp details explicitly here, in case they were changed
1482 * before holding sighand lock.
1483 */
1484 copy_seccomp(p);
1485
1486 /*
1440 * Process group and session signals need to be delivered to just the 1487 * Process group and session signals need to be delivered to just the
1441 * parent before the fork or both the parent and the child after the 1488 * parent before the fork or both the parent and the child after the
1442 * fork. Restart if a signal comes in before we add the new process to 1489 * fork. Restart if a signal comes in before we add the new process to
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 58125160417c..d5543e787e4e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -199,6 +199,8 @@ static u32 seccomp_run_filters(int syscall)
199 199
200static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) 200static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
201{ 201{
202 BUG_ON(!spin_is_locked(&current->sighand->siglock));
203
202 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) 204 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
203 return false; 205 return false;
204 206
@@ -207,6 +209,8 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
207 209
208static inline void seccomp_assign_mode(unsigned long seccomp_mode) 210static inline void seccomp_assign_mode(unsigned long seccomp_mode)
209{ 211{
212 BUG_ON(!spin_is_locked(&current->sighand->siglock));
213
210 current->seccomp.mode = seccomp_mode; 214 current->seccomp.mode = seccomp_mode;
211 set_tsk_thread_flag(current, TIF_SECCOMP); 215 set_tsk_thread_flag(current, TIF_SECCOMP);
212} 216}
@@ -332,6 +336,8 @@ out:
332 * @flags: flags to change filter behavior 336 * @flags: flags to change filter behavior
333 * @filter: seccomp filter to add to the current process 337 * @filter: seccomp filter to add to the current process
334 * 338 *
339 * Caller must be holding current->sighand->siglock lock.
340 *
335 * Returns 0 on success, -ve on error. 341 * Returns 0 on success, -ve on error.
336 */ 342 */
337static long seccomp_attach_filter(unsigned int flags, 343static long seccomp_attach_filter(unsigned int flags,
@@ -340,6 +346,8 @@ static long seccomp_attach_filter(unsigned int flags,
340 unsigned long total_insns; 346 unsigned long total_insns;
341 struct seccomp_filter *walker; 347 struct seccomp_filter *walker;
342 348
349 BUG_ON(!spin_is_locked(&current->sighand->siglock));
350
343 /* Validate resulting filter length. */ 351 /* Validate resulting filter length. */
344 total_insns = filter->prog->len; 352 total_insns = filter->prog->len;
345 for (walker = current->seccomp.filter; walker; walker = walker->prev) 353 for (walker = current->seccomp.filter; walker; walker = walker->prev)
@@ -529,6 +537,8 @@ static long seccomp_set_mode_strict(void)
529 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; 537 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
530 long ret = -EINVAL; 538 long ret = -EINVAL;
531 539
540 spin_lock_irq(&current->sighand->siglock);
541
532 if (!seccomp_may_assign_mode(seccomp_mode)) 542 if (!seccomp_may_assign_mode(seccomp_mode))
533 goto out; 543 goto out;
534 544
@@ -539,6 +549,7 @@ static long seccomp_set_mode_strict(void)
539 ret = 0; 549 ret = 0;
540 550
541out: 551out:
552 spin_unlock_irq(&current->sighand->siglock);
542 553
543 return ret; 554 return ret;
544} 555}
@@ -566,13 +577,15 @@ static long seccomp_set_mode_filter(unsigned int flags,
566 577
567 /* Validate flags. */ 578 /* Validate flags. */
568 if (flags != 0) 579 if (flags != 0)
569 goto out; 580 return -EINVAL;
570 581
571 /* Prepare the new filter before holding any locks. */ 582 /* Prepare the new filter before holding any locks. */
572 prepared = seccomp_prepare_user_filter(filter); 583 prepared = seccomp_prepare_user_filter(filter);
573 if (IS_ERR(prepared)) 584 if (IS_ERR(prepared))
574 return PTR_ERR(prepared); 585 return PTR_ERR(prepared);
575 586
587 spin_lock_irq(&current->sighand->siglock);
588
576 if (!seccomp_may_assign_mode(seccomp_mode)) 589 if (!seccomp_may_assign_mode(seccomp_mode))
577 goto out; 590 goto out;
578 591
@@ -584,6 +597,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
584 597
585 seccomp_assign_mode(seccomp_mode); 598 seccomp_assign_mode(seccomp_mode);
586out: 599out:
600 spin_unlock_irq(&current->sighand->siglock);
587 seccomp_filter_free(prepared); 601 seccomp_filter_free(prepared);
588 return ret; 602 return ret;
589} 603}