aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKees Cook <keescook@chromium.org>2014-06-05 03:23:17 -0400
committerKees Cook <keescook@chromium.org>2014-07-18 15:13:40 -0400
commitc2e1f2e30daa551db3c670c0ccfeab20a540b9e1 (patch)
treee18b30519856013f4ed0d6349a7a31325e896918
parent3ba2530cc06eb4aee4f1f754f43d781e8a12ee09 (diff)
seccomp: implement SECCOMP_FILTER_FLAG_TSYNC
Applying restrictive seccomp filter programs to large or diverse codebases often requires handling threads which may be started early in the process lifetime (e.g., by code that is linked in). While it is possible to apply permissive programs prior to process start up, it is difficult to further restrict the kernel ABI to those threads after that point. This change adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER for synchronizing thread group seccomp filters at filter installation time. When calling seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, filter) an attempt will be made to synchronize all threads in current's threadgroup to its new seccomp filter program. This is possible iff all threads are using a filter that is an ancestor to the filter current is attempting to synchronize to. NULL filters (where the task is running as SECCOMP_MODE_NONE) are also treated as ancestors allowing threads to be transitioned into SECCOMP_MODE_FILTER. If prctrl(PR_SET_NO_NEW_PRIVS, ...) has been set on the calling thread, no_new_privs will be set for all synchronized threads too. On success, 0 is returned. On failure, the pid of one of the failing threads will be returned and no filters will have been applied. The race conditions against another thread are: - requesting TSYNC (already handled by sighand lock) - performing a clone (already handled by sighand lock) - changing its filter (already handled by sighand lock) - calling exec (handled by cred_guard_mutex) The clone case is assisted by the fact that new threads will have their seccomp state duplicated from their parent before appearing on the tasklist. Holding cred_guard_mutex means that seccomp filters cannot be assigned while in the middle of another thread's exec (potentially bypassing no_new_privs or similar). The call to de_thread() may kill threads waiting for the mutex. Changes across threads to the filter pointer includes a barrier. Based on patches by Will Drewry. Suggested-by: Julien Tinnes <jln@chromium.org> Signed-off-by: Kees Cook <keescook@chromium.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Andy Lutomirski <luto@amacapital.net>
-rw-r--r--fs/exec.c2
-rw-r--r--include/linux/seccomp.h2
-rw-r--r--include/uapi/linux/seccomp.h3
-rw-r--r--kernel/seccomp.c135
4 files changed, 140 insertions, 2 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 0f5c272410f6..ab1f1200ce5d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
1216/* 1216/*
1217 * determine how safe it is to execute the proposed program 1217 * determine how safe it is to execute the proposed program
1218 * - the caller must hold ->cred_guard_mutex to protect against 1218 * - the caller must hold ->cred_guard_mutex to protect against
1219 * PTRACE_ATTACH 1219 * PTRACE_ATTACH or seccomp thread-sync
1220 */ 1220 */
1221static void check_unsafe_exec(struct linux_binprm *bprm) 1221static void check_unsafe_exec(struct linux_binprm *bprm)
1222{ 1222{
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 9ff98b4bfe2e..5d586a45a319 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,6 +3,8 @@
3 3
4#include <uapi/linux/seccomp.h> 4#include <uapi/linux/seccomp.h>
5 5
6#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
7
6#ifdef CONFIG_SECCOMP 8#ifdef CONFIG_SECCOMP
7 9
8#include <linux/thread_info.h> 10#include <linux/thread_info.h>
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index b258878ba754..0f238a43ff1e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -14,6 +14,9 @@
14#define SECCOMP_SET_MODE_STRICT 0 14#define SECCOMP_SET_MODE_STRICT 0
15#define SECCOMP_SET_MODE_FILTER 1 15#define SECCOMP_SET_MODE_FILTER 1
16 16
17/* Valid flags for SECCOMP_SET_MODE_FILTER */
18#define SECCOMP_FILTER_FLAG_TSYNC 1
19
17/* 20/*
18 * All BPF programs must return a 32-bit value. 21 * All BPF programs must return a 32-bit value.
19 * The bottom 16-bits are for optional return data. 22 * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 9065d2c79c56..74f460179171 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -26,6 +26,7 @@
26#ifdef CONFIG_SECCOMP_FILTER 26#ifdef CONFIG_SECCOMP_FILTER
27#include <asm/syscall.h> 27#include <asm/syscall.h>
28#include <linux/filter.h> 28#include <linux/filter.h>
29#include <linux/pid.h>
29#include <linux/ptrace.h> 30#include <linux/ptrace.h>
30#include <linux/security.h> 31#include <linux/security.h>
31#include <linux/tracehook.h> 32#include <linux/tracehook.h>
@@ -225,6 +226,114 @@ static inline void seccomp_assign_mode(struct task_struct *task,
225} 226}
226 227
227#ifdef CONFIG_SECCOMP_FILTER 228#ifdef CONFIG_SECCOMP_FILTER
229/* Returns 1 if the parent is an ancestor of the child. */
230static int is_ancestor(struct seccomp_filter *parent,
231 struct seccomp_filter *child)
232{
233 /* NULL is the root ancestor. */
234 if (parent == NULL)
235 return 1;
236 for (; child; child = child->prev)
237 if (child == parent)
238 return 1;
239 return 0;
240}
241
242/**
243 * seccomp_can_sync_threads: checks if all threads can be synchronized
244 *
245 * Expects sighand and cred_guard_mutex locks to be held.
246 *
247 * Returns 0 on success, -ve on error, or the pid of a thread which was
248 * either not in the correct seccomp mode or it did not have an ancestral
249 * seccomp filter.
250 */
251static inline pid_t seccomp_can_sync_threads(void)
252{
253 struct task_struct *thread, *caller;
254
255 BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
256 BUG_ON(!spin_is_locked(&current->sighand->siglock));
257
258 /* Validate all threads being eligible for synchronization. */
259 caller = current;
260 for_each_thread(caller, thread) {
261 pid_t failed;
262
263 /* Skip current, since it is initiating the sync. */
264 if (thread == caller)
265 continue;
266
267 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
268 (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
269 is_ancestor(thread->seccomp.filter,
270 caller->seccomp.filter)))
271 continue;
272
273 /* Return the first thread that cannot be synchronized. */
274 failed = task_pid_vnr(thread);
275 /* If the pid cannot be resolved, then return -ESRCH */
276 if (unlikely(WARN_ON(failed == 0)))
277 failed = -ESRCH;
278 return failed;
279 }
280
281 return 0;
282}
283
284/**
285 * seccomp_sync_threads: sets all threads to use current's filter
286 *
287 * Expects sighand and cred_guard_mutex locks to be held, and for
288 * seccomp_can_sync_threads() to have returned success already
289 * without dropping the locks.
290 *
291 */
292static inline void seccomp_sync_threads(void)
293{
294 struct task_struct *thread, *caller;
295
296 BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
297 BUG_ON(!spin_is_locked(&current->sighand->siglock));
298
299 /* Synchronize all threads. */
300 caller = current;
301 for_each_thread(caller, thread) {
302 /* Skip current, since it needs no changes. */
303 if (thread == caller)
304 continue;
305
306 /* Get a task reference for the new leaf node. */
307 get_seccomp_filter(caller);
308 /*
309 * Drop the task reference to the shared ancestor since
310 * current's path will hold a reference. (This also
311 * allows a put before the assignment.)
312 */
313 put_seccomp_filter(thread);
314 smp_store_release(&thread->seccomp.filter,
315 caller->seccomp.filter);
316 /*
317 * Opt the other thread into seccomp if needed.
318 * As threads are considered to be trust-realm
319 * equivalent (see ptrace_may_access), it is safe to
320 * allow one thread to transition the other.
321 */
322 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
323 /*
324 * Don't let an unprivileged task work around
325 * the no_new_privs restriction by creating
326 * a thread that sets it up, enters seccomp,
327 * then dies.
328 */
329 if (task_no_new_privs(caller))
330 task_set_no_new_privs(thread);
331
332 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
333 }
334 }
335}
336
228/** 337/**
229 * seccomp_prepare_filter: Prepares a seccomp filter for use. 338 * seccomp_prepare_filter: Prepares a seccomp filter for use.
230 * @fprog: BPF program to install 339 * @fprog: BPF program to install
@@ -364,6 +473,15 @@ static long seccomp_attach_filter(unsigned int flags,
364 if (total_insns > MAX_INSNS_PER_PATH) 473 if (total_insns > MAX_INSNS_PER_PATH)
365 return -ENOMEM; 474 return -ENOMEM;
366 475
476 /* If thread sync has been requested, check that it is possible. */
477 if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
478 int ret;
479
480 ret = seccomp_can_sync_threads();
481 if (ret)
482 return ret;
483 }
484
367 /* 485 /*
368 * If there is an existing filter, make it the prev and don't drop its 486 * If there is an existing filter, make it the prev and don't drop its
369 * task reference. 487 * task reference.
@@ -371,6 +489,10 @@ static long seccomp_attach_filter(unsigned int flags,
371 filter->prev = current->seccomp.filter; 489 filter->prev = current->seccomp.filter;
372 current->seccomp.filter = filter; 490 current->seccomp.filter = filter;
373 491
492 /* Now that the new filter is in place, synchronize to all threads. */
493 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
494 seccomp_sync_threads();
495
374 return 0; 496 return 0;
375} 497}
376 498
@@ -590,7 +712,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
590 long ret = -EINVAL; 712 long ret = -EINVAL;
591 713
592 /* Validate flags. */ 714 /* Validate flags. */
593 if (flags != 0) 715 if (flags & ~SECCOMP_FILTER_FLAG_MASK)
594 return -EINVAL; 716 return -EINVAL;
595 717
596 /* Prepare the new filter before holding any locks. */ 718 /* Prepare the new filter before holding any locks. */
@@ -598,6 +720,14 @@ static long seccomp_set_mode_filter(unsigned int flags,
598 if (IS_ERR(prepared)) 720 if (IS_ERR(prepared))
599 return PTR_ERR(prepared); 721 return PTR_ERR(prepared);
600 722
723 /*
724 * Make sure we cannot change seccomp or nnp state via TSYNC
725 * while another thread is in the middle of calling exec.
726 */
727 if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
728 mutex_lock_killable(&current->signal->cred_guard_mutex))
729 goto out_free;
730
601 spin_lock_irq(&current->sighand->siglock); 731 spin_lock_irq(&current->sighand->siglock);
602 732
603 if (!seccomp_may_assign_mode(seccomp_mode)) 733 if (!seccomp_may_assign_mode(seccomp_mode))
@@ -612,6 +742,9 @@ static long seccomp_set_mode_filter(unsigned int flags,
612 seccomp_assign_mode(current, seccomp_mode); 742 seccomp_assign_mode(current, seccomp_mode);
613out: 743out:
614 spin_unlock_irq(&current->sighand->siglock); 744 spin_unlock_irq(&current->sighand->siglock);
745 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
746 mutex_unlock(&current->signal->cred_guard_mutex);
747out_free:
615 seccomp_filter_free(prepared); 748 seccomp_filter_free(prepared);
616 return ret; 749 return ret;
617} 750}