diff options
| author | Will Drewry <wad@chromium.org> | 2011-07-27 12:27:07 -0400 |
|---|---|---|
| committer | Leann Ogasawara <leann.ogasawara@canonical.com> | 2011-08-30 17:33:49 -0400 |
| commit | 1ee5facfde4bae032bc962c8eedf299c7c9ac3a7 (patch) | |
| tree | 60041bbe593849e58d6cb7cfbdd5655a368c2083 /include | |
| parent | c056eec1542e660c689b542290f89cecf6de6abc (diff) | |
UBUNTU: SAUCE: seccomp_filter: new mode with configurable syscall filters
This change adds a new seccomp mode which specifies the allowed system
calls dynamically. When in the new mode (13), all system calls are
checked against process-defined filters - first by system call number,
then by a filter string. If an entry exists for a given system call and
all filter predicates evaluate to true, then the task may proceed.
Otherwise, the task is killed.
Filter string parsing and evaluation is handled by the ftrace filter
engine. Related patches tweak to the perf filter trace and free
allowing the calls to be shared. Filters inherit their understanding of
types and arguments for each system call from the CONFIG_FTRACE_SYSCALLS
subsystem which already populates this information in syscall_metadata
associated enter_event (and exit_event) structures. If
CONFIG_FTRACE_SYSCALLS is not compiled in, only filter strings of "1"
will be allowed.
The net result is a process may have its system calls filtered using the
ftrace filter engine's inherent understanding of systems calls. The set
of filters is specified through the PR_SET_SECCOMP_FILTER argument in
prctl(). For example, a filterset for a process, like pdftotext, that
should only process read-only input could (roughly) look like:
sprintf(rdonly, "flags == %u", O_RDONLY|O_LARGEFILE);
type = PR_SECCOMP_FILTER_SYSCALL;
prctl(PR_SET_SECCOMP_FILTER, type, __NR_open, rdonly);
prctl(PR_SET_SECCOMP_FILTER, type, __NR__llseek, "1");
prctl(PR_SET_SECCOMP_FILTER, type, __NR_brk, "1");
prctl(PR_SET_SECCOMP_FILTER, type, __NR_close, "1");
prctl(PR_SET_SECCOMP_FILTER, type, __NR_exit_group, "1");
prctl(PR_SET_SECCOMP_FILTER, type, __NR_fstat64, "1");
prctl(PR_SET_SECCOMP_FILTER, type, __NR_mmap2, "1");
prctl(PR_SET_SECCOMP_FILTER, type, __NR_munmap, "1");
prctl(PR_SET_SECCOMP_FILTER, type, __NR_read, "1");
prctl(PR_SET_SECCOMP_FILTER, type, __NR_write, "fd == 1 || fd == 2");
prctl(PR_SET_SECCOMP, 13);
Subsequent calls to PR_SET_SECCOMP_FILTER for the same system call will
be &&'d together to ensure that attack surface may only be reduced:
prctl(PR_SET_SECCOMP_FILTER, __NR_write, "fd != 2");
With the earlier example, the active filter becomes:
"(fd == 1 || fd == 2) && (fd != 2)"
The patch also adds PR_CLEAR_SECCOMP_FILTER and PR_GET_SECCOMP_FILTER.
The latter returns the current filter for a system call to userspace:
prctl(PR_GET_SECCOMP_FILTER, type, __NR_write, buf, bufsize);
while the former clears any filters for a given system call changing it
back to a defaulty deny:
prctl(PR_CLEAR_SECCOMP_FILTER, type, __NR_write);
Note, type may be either PR_SECCOMP_FILTER_EVENT or
PR_SECCOMP_FILTER_SYSCALL. This allows for ftrace event ids to be used
in lieu of system call numbers. At present, only syscalls:sys_enter_*
event id are supported, but this allows for potential future extension
of the backend.
v11: - Use mode "13" to avoid future overlap; with comment update
- Use kref; extra memset; other clean up from msb@chromium.org
- Cleaned up Makefile object merging since locally shared symbols are gone
v10: - Note that PERF_EVENTS are also needed for ftrace filter engine support.
- Removed dependency on ftrace code changes for event_filters
(wrapping with perf_events and violating opaqueness for the filter str)
- pulled in all the hacks to get access to syscall_metadata and build
call objects for filter evaluation.
v9: - rebase on to de505e709ffb09a7382ca8e0d8c7dbb171ba5
- disallow PR_SECCOMP_FILTER_EVENT when a compat task is calling
as ftrace has no compat_syscalls awareness yet.
- return -ENOSYS when filter engine strings are used on a compat call
as there are no compat_syscalls events to reference yet.
v8: - expand parenthical use during SET_SECCOMP_FILTER to avoid operator
precedence undermining attack surface reduction (caught by
segoon@openwall.com). Opted to waste bytes on () than reparse to
avoid OP_OR precedence overriding extend_filter's intentions.
- remove more lingering references to @state
- fix incorrect compat mismatch check (anyone up for a Tested-By?)
v7: - disallow seccomp_filter inheritance across fork except when seccomp
is active. This avoids filters leaking across processes when they
are not actively in use but ensure an allowed fork/clone doesn't drop
filters.
- remove the Mode: print from show as it reflected current and not the
filters holder.
v6: - clean up minor unnecessary changes (empty lines, ordering, etc)
- fix one overly long line
- add refcount overflow BUG_ON
v5: - drop mutex usage when the task_struct is safe to access directly
v4: - move off of RCU to a read/write guarding mutex after
paulmck@linux.vnet.ibm.com's feedback (mem leak, rcu fail)
- stopped inc/dec refcounts in mutex guard sections
- added required changes to init the mutex in INIT_TASK and safely
lock around fork inheritance.
- added id_type support to the prctl interface to support using
ftrace event ids as an alternative to syscall numbers. Behavior
is identical otherwise (as per discussion with mingo@elte.hu)
v3: - always block execve calls (as per torvalds@linux-foundation.org)
- add __NR_seccomp_execve(_32) to seccomp-supporting arches
- ensure compat tasks can't reach ftrace:syscalls
- dropped new defines for seccomp modes.
- two level array instead of hlists (sugg. by olofj@chromium.org)
- added generic Kconfig entry that is not connected.
- dropped internal seccomp.h
- move prctl helpers to seccomp_filter
- killed seccomp_t typedef (as per checkpatch)
v2: - changed to use the existing syscall number ABI.
- prctl changes to minimize parsing in the kernel:
prctl(PR_SET_SECCOMP, {0 | 1 | 2 }, { 0 | ON_EXEC });
prctl(PR_SET_SECCOMP_FILTER, __NR_read, "fd == 5");
prctl(PR_CLEAR_SECCOMP_FILTER, __NR_read);
prctl(PR_GET_SECCOMP_FILTER, __NR_read, buf, bufsize);
- defined PR_SECCOMP_MODE_STRICT and ..._FILTER
- added flags
- provide a default fail syscall_nr_to_meta in ftrace
- provides fallback for unhooked system calls
- use -ENOSYS and ERR_PTR(-ENOSYS) for stubbed functionality
- added kernel/seccomp.h to share seccomp.c/seccomp_filter.c
- moved to a hlist and 4 bit hash of linked lists
- added support to operate without CONFIG_FTRACE_SYSCALLS
- moved Kconfig support next to SECCOMP
- made Kconfig entries dependent on EXPERIMENTAL
- added macros to avoid ifdefs from kernel/fork.c
- added compat task/filter matching
- drop seccomp.h inclusion in sched.h and drop seccomp_t
- added Filtering to "show" output
- added on_exec state dup'ing when enabling after a fast-path accept.
Signed-off-by: Will Drewry <wad@chromium.org>
BUG=chromium-os:14496
TEST=built in x86-alex. Out of tree commandline helper test confirms functionality works. Will check in a test into the minijail repo which can be used from autotest.
Change-Id: I901595e3399914783739d113a058d83550ddf8e2
Reviewed-on: http://gerrit.chromium.org/gerrit/4814
Reviewed-by: Sonny Rao <sonnyrao@chromium.org>
Tested-by: Will Drewry <wad@chromium.org>
Signed-off-by: Kees Cook <kees.cook@canonical.com>
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/init_task.h | 12 | ||||
| -rw-r--r-- | include/linux/prctl.h | 7 | ||||
| -rw-r--r-- | include/linux/sched.h | 2 | ||||
| -rw-r--r-- | include/linux/seccomp.h | 119 |
4 files changed, 136 insertions, 4 deletions
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 580f70c0239..56deaf25371 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
| @@ -126,6 +126,17 @@ extern struct cred init_cred; | |||
| 126 | # define INIT_PERF_EVENTS(tsk) | 126 | # define INIT_PERF_EVENTS(tsk) |
| 127 | #endif | 127 | #endif |
| 128 | 128 | ||
| 129 | #ifdef CONFIG_SECCOMP_FILTER | ||
| 130 | # define INIT_SECCOMP_FILTER(tsk) \ | ||
| 131 | .seccomp = { \ | ||
| 132 | .filters_guard = \ | ||
| 133 | __MUTEX_INITIALIZER(tsk.seccomp.filters_guard), \ | ||
| 134 | }, | ||
| 135 | #else | ||
| 136 | # define INIT_SECCOMP_FILTER(tsk) | ||
| 137 | #endif | ||
| 138 | |||
| 139 | |||
| 129 | /* | 140 | /* |
| 130 | * INIT_TASK is used to set up the first task table, touch at | 141 | * INIT_TASK is used to set up the first task table, touch at |
| 131 | * your own risk!. Base=0, limit=0x1fffff (=2MB) | 142 | * your own risk!. Base=0, limit=0x1fffff (=2MB) |
| @@ -188,6 +199,7 @@ extern struct cred init_cred; | |||
| 188 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ | 199 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ |
| 189 | INIT_IDS \ | 200 | INIT_IDS \ |
| 190 | INIT_PERF_EVENTS(tsk) \ | 201 | INIT_PERF_EVENTS(tsk) \ |
| 202 | INIT_SECCOMP_FILTER(tsk) \ | ||
| 191 | INIT_TRACE_IRQFLAGS \ | 203 | INIT_TRACE_IRQFLAGS \ |
| 192 | INIT_LOCKDEP \ | 204 | INIT_LOCKDEP \ |
| 193 | INIT_FTRACE_GRAPH \ | 205 | INIT_FTRACE_GRAPH \ |
diff --git a/include/linux/prctl.h b/include/linux/prctl.h index da7837bbd2c..74deeb717c6 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h | |||
| @@ -64,6 +64,13 @@ | |||
| 64 | #define PR_GET_SECCOMP 21 | 64 | #define PR_GET_SECCOMP 21 |
| 65 | #define PR_SET_SECCOMP 22 | 65 | #define PR_SET_SECCOMP 22 |
| 66 | 66 | ||
| 67 | /* Get/set process seccomp filters */ | ||
| 68 | #define PR_GET_SECCOMP_FILTER 35 | ||
| 69 | #define PR_SET_SECCOMP_FILTER 36 | ||
| 70 | #define PR_CLEAR_SECCOMP_FILTER 37 | ||
| 71 | # define PR_SECCOMP_FILTER_SYSCALL 0 | ||
| 72 | # define PR_SECCOMP_FILTER_EVENT 1 | ||
| 73 | |||
| 67 | /* Get/set the capability bounding set (as per security/commoncap.c) */ | 74 | /* Get/set the capability bounding set (as per security/commoncap.c) */ |
| 68 | #define PR_CAPBSET_READ 23 | 75 | #define PR_CAPBSET_READ 23 |
| 69 | #define PR_CAPBSET_DROP 24 | 76 | #define PR_CAPBSET_DROP 24 |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 7badc5d9f07..9cdfbf4a97a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -1411,7 +1411,7 @@ struct task_struct { | |||
| 1411 | uid_t loginuid; | 1411 | uid_t loginuid; |
| 1412 | unsigned int sessionid; | 1412 | unsigned int sessionid; |
| 1413 | #endif | 1413 | #endif |
| 1414 | seccomp_t seccomp; | 1414 | struct seccomp_struct seccomp; |
| 1415 | 1415 | ||
| 1416 | /* Thread group tracking */ | 1416 | /* Thread group tracking */ |
| 1417 | u32 parent_exec_id; | 1417 | u32 parent_exec_id; |
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 167c33361d9..f81a9827334 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h | |||
| @@ -1,13 +1,35 @@ | |||
| 1 | #ifndef _LINUX_SECCOMP_H | 1 | #ifndef _LINUX_SECCOMP_H |
| 2 | #define _LINUX_SECCOMP_H | 2 | #define _LINUX_SECCOMP_H |
| 3 | 3 | ||
| 4 | struct seq_file; | ||
| 4 | 5 | ||
| 5 | #ifdef CONFIG_SECCOMP | 6 | #ifdef CONFIG_SECCOMP |
| 6 | 7 | ||
| 8 | #include <linux/errno.h> | ||
| 7 | #include <linux/thread_info.h> | 9 | #include <linux/thread_info.h> |
| 10 | #include <linux/types.h> | ||
| 11 | #include <linux/mutex.h> | ||
| 8 | #include <asm/seccomp.h> | 12 | #include <asm/seccomp.h> |
| 9 | 13 | ||
| 10 | typedef struct { int mode; } seccomp_t; | 14 | struct seccomp_filters; |
| 15 | /** | ||
| 16 | * struct seccomp_struct - the state of a seccomp'ed process | ||
| 17 | * | ||
| 18 | * @mode: | ||
| 19 | * if this is 1, the process is under standard seccomp rules | ||
| 20 | * is 13, the process is only allowed to make system calls where | ||
| 21 | * associated filters evaluate successfully. | ||
| 22 | * @filters: Metadata for filters if using CONFIG_SECCOMP_FILTER. | ||
| 23 | * @filters assignment and use should always be guarded by | ||
| 24 | * @filters_guard. | ||
| 25 | */ | ||
| 26 | struct seccomp_struct { | ||
| 27 | int mode; | ||
| 28 | #ifdef CONFIG_SECCOMP_FILTER | ||
| 29 | struct mutex filters_guard; | ||
| 30 | struct seccomp_filters *filters; | ||
| 31 | #endif | ||
| 32 | }; | ||
| 11 | 33 | ||
| 12 | extern void __secure_computing(int); | 34 | extern void __secure_computing(int); |
| 13 | static inline void secure_computing(int this_syscall) | 35 | static inline void secure_computing(int this_syscall) |
| @@ -23,8 +45,7 @@ extern long prctl_set_seccomp(unsigned long); | |||
| 23 | 45 | ||
| 24 | #include <linux/errno.h> | 46 | #include <linux/errno.h> |
| 25 | 47 | ||
| 26 | typedef struct { } seccomp_t; | 48 | struct seccomp_struct { }; |
| 27 | |||
| 28 | #define secure_computing(x) do { } while (0) | 49 | #define secure_computing(x) do { } while (0) |
| 29 | 50 | ||
| 30 | static inline long prctl_get_seccomp(void) | 51 | static inline long prctl_get_seccomp(void) |
| @@ -39,4 +60,96 @@ static inline long prctl_set_seccomp(unsigned long arg2) | |||
| 39 | 60 | ||
| 40 | #endif /* CONFIG_SECCOMP */ | 61 | #endif /* CONFIG_SECCOMP */ |
| 41 | 62 | ||
| 63 | #ifdef CONFIG_SECCOMP_FILTER | ||
| 64 | |||
| 65 | #define seccomp_filter_init_task(_tsk) do { \ | ||
| 66 | mutex_init(&(_tsk)->seccomp.filters_guard); \ | ||
| 67 | (_tsk)->seccomp.filters = NULL; \ | ||
| 68 | } while (0); | ||
| 69 | |||
| 70 | /* Do nothing unless seccomp filtering is active. If not, the execve boundary | ||
| 71 | * can not be cleanly enforced and preset filters may leak across execve calls. | ||
| 72 | */ | ||
| 73 | #define seccomp_filter_fork(_tsk, _orig) do { \ | ||
| 74 | if ((_tsk)->seccomp.mode) { \ | ||
| 75 | (_tsk)->seccomp.mode = (_orig)->seccomp.mode; \ | ||
| 76 | mutex_lock(&(_orig)->seccomp.filters_guard); \ | ||
| 77 | (_tsk)->seccomp.filters = \ | ||
| 78 | get_seccomp_filters((_orig)->seccomp.filters); \ | ||
| 79 | mutex_unlock(&(_orig)->seccomp.filters_guard); \ | ||
| 80 | } \ | ||
| 81 | } while (0); | ||
| 82 | |||
| 83 | /* No locking is needed here because the task_struct will | ||
| 84 | * have no parallel consumers. | ||
| 85 | */ | ||
| 86 | #define seccomp_filter_free_task(_tsk) do { \ | ||
| 87 | put_seccomp_filters((_tsk)->seccomp.filters); \ | ||
| 88 | } while (0); | ||
| 89 | |||
| 90 | extern int seccomp_show_filters(struct seccomp_filters *filters, | ||
| 91 | struct seq_file *); | ||
| 92 | extern long seccomp_set_filter(int, char *); | ||
| 93 | extern long seccomp_clear_filter(int); | ||
| 94 | extern long seccomp_get_filter(int, char *, unsigned long); | ||
| 95 | |||
| 96 | extern long prctl_set_seccomp_filter(unsigned long, unsigned long, | ||
| 97 | char __user *); | ||
| 98 | extern long prctl_get_seccomp_filter(unsigned long, unsigned long, | ||
| 99 | char __user *, unsigned long); | ||
| 100 | extern long prctl_clear_seccomp_filter(unsigned long, unsigned long); | ||
| 101 | |||
| 102 | extern struct seccomp_filters *get_seccomp_filters(struct seccomp_filters *); | ||
| 103 | extern void put_seccomp_filters(struct seccomp_filters *); | ||
| 104 | |||
| 105 | extern int seccomp_test_filters(int); | ||
| 106 | extern void seccomp_filter_log_failure(int); | ||
| 107 | |||
| 108 | #else /* CONFIG_SECCOMP_FILTER */ | ||
| 109 | |||
| 110 | struct seccomp_filters { }; | ||
| 111 | #define seccomp_filter_init_task(_tsk) do { } while (0); | ||
| 112 | #define seccomp_filter_fork(_tsk, _orig) do { } while (0); | ||
| 113 | #define seccomp_filter_free_task(_tsk) do { } while (0); | ||
| 114 | |||
| 115 | static inline int seccomp_show_filters(struct seccomp_filters *filters, | ||
| 116 | struct seq_file *m) | ||
| 117 | { | ||
| 118 | return -ENOSYS; | ||
| 119 | } | ||
| 120 | |||
| 121 | static inline long seccomp_set_filter(int syscall_nr, char *filter) | ||
| 122 | { | ||
| 123 | return -ENOSYS; | ||
| 124 | } | ||
| 125 | |||
| 126 | static inline long seccomp_clear_filter(int syscall_nr) | ||
| 127 | { | ||
| 128 | return -ENOSYS; | ||
| 129 | } | ||
| 130 | |||
| 131 | static inline long seccomp_get_filter(int syscall_nr, | ||
| 132 | char *buf, unsigned long available) | ||
| 133 | { | ||
| 134 | return -ENOSYS; | ||
| 135 | } | ||
| 136 | |||
| 137 | static inline long prctl_set_seccomp_filter(unsigned long a2, unsigned long a3, | ||
| 138 | char __user *a4) | ||
| 139 | { | ||
| 140 | return -ENOSYS; | ||
| 141 | } | ||
| 142 | |||
| 143 | static inline long prctl_clear_seccomp_filter(unsigned long a2, | ||
| 144 | unsigned long a3) | ||
| 145 | { | ||
| 146 | return -ENOSYS; | ||
| 147 | } | ||
| 148 | |||
| 149 | static inline long prctl_get_seccomp_filter(unsigned long a2, unsigned long a3, | ||
| 150 | char __user *a4, unsigned long a5) | ||
| 151 | { | ||
| 152 | return -ENOSYS; | ||
| 153 | } | ||
| 154 | #endif /* CONFIG_SECCOMP_FILTER */ | ||
| 42 | #endif /* _LINUX_SECCOMP_H */ | 155 | #endif /* _LINUX_SECCOMP_H */ |
