diff options
-rw-r--r-- | arch/Kconfig | 17 | ||||
-rw-r--r-- | include/linux/Kbuild | 1 | ||||
-rw-r--r-- | include/linux/seccomp.h | 76 | ||||
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | kernel/seccomp.c | 396 | ||||
-rw-r--r-- | kernel/sys.c | 2 |
6 files changed, 472 insertions, 23 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 684eb5af439d..91c2c730fc1a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -216,4 +216,21 @@ config HAVE_CMPXCHG_DOUBLE | |||
216 | config ARCH_WANT_OLD_COMPAT_IPC | 216 | config ARCH_WANT_OLD_COMPAT_IPC |
217 | bool | 217 | bool |
218 | 218 | ||
219 | config HAVE_ARCH_SECCOMP_FILTER | ||
220 | bool | ||
221 | help | ||
222 | This symbol should be selected by an architecure if it provides | ||
223 | asm/syscall.h, specifically syscall_get_arguments() and | ||
224 | syscall_get_arch(). | ||
225 | |||
226 | config SECCOMP_FILTER | ||
227 | def_bool y | ||
228 | depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET | ||
229 | help | ||
230 | Enable tasks to build secure computing environments defined | ||
231 | in terms of Berkeley Packet Filter programs which implement | ||
232 | task-defined system call filtering polices. | ||
233 | |||
234 | See Documentation/prctl/seccomp_filter.txt for details. | ||
235 | |||
219 | source "kernel/gcov/Kconfig" | 236 | source "kernel/gcov/Kconfig" |
diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 3c9b616c834a..5c93d6c5d591 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild | |||
@@ -332,6 +332,7 @@ header-y += scc.h | |||
332 | header-y += sched.h | 332 | header-y += sched.h |
333 | header-y += screen_info.h | 333 | header-y += screen_info.h |
334 | header-y += sdla.h | 334 | header-y += sdla.h |
335 | header-y += seccomp.h | ||
335 | header-y += securebits.h | 336 | header-y += securebits.h |
336 | header-y += selinux_netlink.h | 337 | header-y += selinux_netlink.h |
337 | header-y += sem.h | 338 | header-y += sem.h |
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index d61f27fcaa97..86bb68fc7683 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h | |||
@@ -1,14 +1,67 @@ | |||
1 | #ifndef _LINUX_SECCOMP_H | 1 | #ifndef _LINUX_SECCOMP_H |
2 | #define _LINUX_SECCOMP_H | 2 | #define _LINUX_SECCOMP_H |
3 | 3 | ||
4 | #include <linux/compiler.h> | ||
5 | #include <linux/types.h> | ||
6 | |||
7 | |||
8 | /* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, <mode>) */ | ||
9 | #define SECCOMP_MODE_DISABLED 0 /* seccomp is not in use. */ | ||
10 | #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ | ||
11 | #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ | ||
12 | |||
13 | /* | ||
14 | * All BPF programs must return a 32-bit value. | ||
15 | * The bottom 16-bits are reserved for future use. | ||
16 | * The upper 16-bits are ordered from least permissive values to most. | ||
17 | * | ||
18 | * The ordering ensures that a min_t() over composed return values always | ||
19 | * selects the least permissive choice. | ||
20 | */ | ||
21 | #define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ | ||
22 | #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ | ||
23 | |||
24 | /* Masks for the return value sections. */ | ||
25 | #define SECCOMP_RET_ACTION 0x7fff0000U | ||
26 | #define SECCOMP_RET_DATA 0x0000ffffU | ||
27 | |||
28 | /** | ||
29 | * struct seccomp_data - the format the BPF program executes over. | ||
30 | * @nr: the system call number | ||
31 | * @arch: indicates system call convention as an AUDIT_ARCH_* value | ||
32 | * as defined in <linux/audit.h>. | ||
33 | * @instruction_pointer: at the time of the system call. | ||
34 | * @args: up to 6 system call arguments always stored as 64-bit values | ||
35 | * regardless of the architecture. | ||
36 | */ | ||
37 | struct seccomp_data { | ||
38 | int nr; | ||
39 | __u32 arch; | ||
40 | __u64 instruction_pointer; | ||
41 | __u64 args[6]; | ||
42 | }; | ||
4 | 43 | ||
44 | #ifdef __KERNEL__ | ||
5 | #ifdef CONFIG_SECCOMP | 45 | #ifdef CONFIG_SECCOMP |
6 | 46 | ||
7 | #include <linux/thread_info.h> | 47 | #include <linux/thread_info.h> |
8 | #include <asm/seccomp.h> | 48 | #include <asm/seccomp.h> |
9 | 49 | ||
50 | struct seccomp_filter; | ||
51 | /** | ||
52 | * struct seccomp - the state of a seccomp'ed process | ||
53 | * | ||
54 | * @mode: indicates one of the valid values above for controlled | ||
55 | * system calls available to a process. | ||
56 | * @filter: The metadata and ruleset for determining what system calls | ||
57 | * are allowed for a task. | ||
58 | * | ||
59 | * @filter must only be accessed from the context of current as there | ||
60 | * is no locking. | ||
61 | */ | ||
10 | struct seccomp { | 62 | struct seccomp { |
11 | int mode; | 63 | int mode; |
64 | struct seccomp_filter *filter; | ||
12 | }; | 65 | }; |
13 | 66 | ||
14 | extern void __secure_computing(int); | 67 | extern void __secure_computing(int); |
@@ -19,7 +72,7 @@ static inline void secure_computing(int this_syscall) | |||
19 | } | 72 | } |
20 | 73 | ||
21 | extern long prctl_get_seccomp(void); | 74 | extern long prctl_get_seccomp(void); |
22 | extern long prctl_set_seccomp(unsigned long); | 75 | extern long prctl_set_seccomp(unsigned long, char __user *); |
23 | 76 | ||
24 | static inline int seccomp_mode(struct seccomp *s) | 77 | static inline int seccomp_mode(struct seccomp *s) |
25 | { | 78 | { |
@@ -31,15 +84,16 @@ static inline int seccomp_mode(struct seccomp *s) | |||
31 | #include <linux/errno.h> | 84 | #include <linux/errno.h> |
32 | 85 | ||
33 | struct seccomp { }; | 86 | struct seccomp { }; |
87 | struct seccomp_filter { }; | ||
34 | 88 | ||
35 | #define secure_computing(x) do { } while (0) | 89 | #define secure_computing(x) 0 |
36 | 90 | ||
37 | static inline long prctl_get_seccomp(void) | 91 | static inline long prctl_get_seccomp(void) |
38 | { | 92 | { |
39 | return -EINVAL; | 93 | return -EINVAL; |
40 | } | 94 | } |
41 | 95 | ||
42 | static inline long prctl_set_seccomp(unsigned long arg2) | 96 | static inline long prctl_set_seccomp(unsigned long arg2, char __user *arg3) |
43 | { | 97 | { |
44 | return -EINVAL; | 98 | return -EINVAL; |
45 | } | 99 | } |
@@ -48,7 +102,21 @@ static inline int seccomp_mode(struct seccomp *s) | |||
48 | { | 102 | { |
49 | return 0; | 103 | return 0; |
50 | } | 104 | } |
51 | |||
52 | #endif /* CONFIG_SECCOMP */ | 105 | #endif /* CONFIG_SECCOMP */ |
53 | 106 | ||
107 | #ifdef CONFIG_SECCOMP_FILTER | ||
108 | extern void put_seccomp_filter(struct task_struct *tsk); | ||
109 | extern void get_seccomp_filter(struct task_struct *tsk); | ||
110 | extern u32 seccomp_bpf_load(int off); | ||
111 | #else /* CONFIG_SECCOMP_FILTER */ | ||
112 | static inline void put_seccomp_filter(struct task_struct *tsk) | ||
113 | { | ||
114 | return; | ||
115 | } | ||
116 | static inline void get_seccomp_filter(struct task_struct *tsk) | ||
117 | { | ||
118 | return; | ||
119 | } | ||
120 | #endif /* CONFIG_SECCOMP_FILTER */ | ||
121 | #endif /* __KERNEL__ */ | ||
54 | #endif /* _LINUX_SECCOMP_H */ | 122 | #endif /* _LINUX_SECCOMP_H */ |
diff --git a/kernel/fork.c b/kernel/fork.c index b9372a0bff18..f7cf6fb107ec 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/cgroup.h> | 34 | #include <linux/cgroup.h> |
35 | #include <linux/security.h> | 35 | #include <linux/security.h> |
36 | #include <linux/hugetlb.h> | 36 | #include <linux/hugetlb.h> |
37 | #include <linux/seccomp.h> | ||
37 | #include <linux/swap.h> | 38 | #include <linux/swap.h> |
38 | #include <linux/syscalls.h> | 39 | #include <linux/syscalls.h> |
39 | #include <linux/jiffies.h> | 40 | #include <linux/jiffies.h> |
@@ -170,6 +171,7 @@ void free_task(struct task_struct *tsk) | |||
170 | free_thread_info(tsk->stack); | 171 | free_thread_info(tsk->stack); |
171 | rt_mutex_debug_task_free(tsk); | 172 | rt_mutex_debug_task_free(tsk); |
172 | ftrace_graph_exit_task(tsk); | 173 | ftrace_graph_exit_task(tsk); |
174 | put_seccomp_filter(tsk); | ||
173 | free_task_struct(tsk); | 175 | free_task_struct(tsk); |
174 | } | 176 | } |
175 | EXPORT_SYMBOL(free_task); | 177 | EXPORT_SYMBOL(free_task); |
@@ -1162,6 +1164,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1162 | goto fork_out; | 1164 | goto fork_out; |
1163 | 1165 | ||
1164 | ftrace_graph_init_task(p); | 1166 | ftrace_graph_init_task(p); |
1167 | get_seccomp_filter(p); | ||
1165 | 1168 | ||
1166 | rt_mutex_init_task(p); | 1169 | rt_mutex_init_task(p); |
1167 | 1170 | ||
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e8d76c5895ea..0aeec1960f91 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
@@ -3,16 +3,343 @@ | |||
3 | * | 3 | * |
4 | * Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com> | 4 | * Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com> |
5 | * | 5 | * |
6 | * This defines a simple but solid secure-computing mode. | 6 | * Copyright (C) 2012 Google, Inc. |
7 | * Will Drewry <wad@chromium.org> | ||
8 | * | ||
9 | * This defines a simple but solid secure-computing facility. | ||
10 | * | ||
11 | * Mode 1 uses a fixed list of allowed system calls. | ||
12 | * Mode 2 allows user-defined system call filters in the form | ||
13 | * of Berkeley Packet Filters/Linux Socket Filters. | ||
7 | */ | 14 | */ |
8 | 15 | ||
16 | #include <linux/atomic.h> | ||
9 | #include <linux/audit.h> | 17 | #include <linux/audit.h> |
10 | #include <linux/seccomp.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/compat.h> | 18 | #include <linux/compat.h> |
19 | #include <linux/sched.h> | ||
20 | #include <linux/seccomp.h> | ||
13 | 21 | ||
14 | /* #define SECCOMP_DEBUG 1 */ | 22 | /* #define SECCOMP_DEBUG 1 */ |
15 | #define NR_SECCOMP_MODES 1 | 23 | |
24 | #ifdef CONFIG_SECCOMP_FILTER | ||
25 | #include <asm/syscall.h> | ||
26 | #include <linux/filter.h> | ||
27 | #include <linux/security.h> | ||
28 | #include <linux/slab.h> | ||
29 | #include <linux/tracehook.h> | ||
30 | #include <linux/uaccess.h> | ||
31 | |||
32 | /** | ||
33 | * struct seccomp_filter - container for seccomp BPF programs | ||
34 | * | ||
35 | * @usage: reference count to manage the object lifetime. | ||
36 | * get/put helpers should be used when accessing an instance | ||
37 | * outside of a lifetime-guarded section. In general, this | ||
38 | * is only needed for handling filters shared across tasks. | ||
39 | * @prev: points to a previously installed, or inherited, filter | ||
40 | * @len: the number of instructions in the program | ||
41 | * @insns: the BPF program instructions to evaluate | ||
42 | * | ||
43 | * seccomp_filter objects are organized in a tree linked via the @prev | ||
44 | * pointer. For any task, it appears to be a singly-linked list starting | ||
45 | * with current->seccomp.filter, the most recently attached or inherited filter. | ||
46 | * However, multiple filters may share a @prev node, by way of fork(), which | ||
47 | * results in a unidirectional tree existing in memory. This is similar to | ||
48 | * how namespaces work. | ||
49 | * | ||
50 | * seccomp_filter objects should never be modified after being attached | ||
51 | * to a task_struct (other than @usage). | ||
52 | */ | ||
53 | struct seccomp_filter { | ||
54 | atomic_t usage; | ||
55 | struct seccomp_filter *prev; | ||
56 | unsigned short len; /* Instruction count */ | ||
57 | struct sock_filter insns[]; | ||
58 | }; | ||
59 | |||
60 | /* Limit any path through the tree to 256KB worth of instructions. */ | ||
61 | #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter)) | ||
62 | |||
63 | static void seccomp_filter_log_failure(int syscall) | ||
64 | { | ||
65 | int compat = 0; | ||
66 | #ifdef CONFIG_COMPAT | ||
67 | compat = is_compat_task(); | ||
68 | #endif | ||
69 | pr_info("%s[%d]: %ssystem call %d blocked at 0x%lx\n", | ||
70 | current->comm, task_pid_nr(current), | ||
71 | (compat ? "compat " : ""), | ||
72 | syscall, KSTK_EIP(current)); | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * get_u32 - returns a u32 offset into data | ||
77 | * @data: a unsigned 64 bit value | ||
78 | * @index: 0 or 1 to return the first or second 32-bits | ||
79 | * | ||
80 | * This inline exists to hide the length of unsigned long. If a 32-bit | ||
81 | * unsigned long is passed in, it will be extended and the top 32-bits will be | ||
82 | * 0. If it is a 64-bit unsigned long, then whatever data is resident will be | ||
83 | * properly returned. | ||
84 | * | ||
85 | * Endianness is explicitly ignored and left for BPF program authors to manage | ||
86 | * as per the specific architecture. | ||
87 | */ | ||
88 | static inline u32 get_u32(u64 data, int index) | ||
89 | { | ||
90 | return ((u32 *)&data)[index]; | ||
91 | } | ||
92 | |||
93 | /* Helper for bpf_load below. */ | ||
94 | #define BPF_DATA(_name) offsetof(struct seccomp_data, _name) | ||
95 | /** | ||
96 | * bpf_load: checks and returns a pointer to the requested offset | ||
97 | * @off: offset into struct seccomp_data to load from | ||
98 | * | ||
99 | * Returns the requested 32-bits of data. | ||
100 | * seccomp_check_filter() should assure that @off is 32-bit aligned | ||
101 | * and not out of bounds. Failure to do so is a BUG. | ||
102 | */ | ||
103 | u32 seccomp_bpf_load(int off) | ||
104 | { | ||
105 | struct pt_regs *regs = task_pt_regs(current); | ||
106 | if (off == BPF_DATA(nr)) | ||
107 | return syscall_get_nr(current, regs); | ||
108 | if (off == BPF_DATA(arch)) | ||
109 | return syscall_get_arch(current, regs); | ||
110 | if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) { | ||
111 | unsigned long value; | ||
112 | int arg = (off - BPF_DATA(args[0])) / sizeof(u64); | ||
113 | int index = !!(off % sizeof(u64)); | ||
114 | syscall_get_arguments(current, regs, arg, 1, &value); | ||
115 | return get_u32(value, index); | ||
116 | } | ||
117 | if (off == BPF_DATA(instruction_pointer)) | ||
118 | return get_u32(KSTK_EIP(current), 0); | ||
119 | if (off == BPF_DATA(instruction_pointer) + sizeof(u32)) | ||
120 | return get_u32(KSTK_EIP(current), 1); | ||
121 | /* seccomp_check_filter should make this impossible. */ | ||
122 | BUG(); | ||
123 | } | ||
124 | |||
125 | /** | ||
126 | * seccomp_check_filter - verify seccomp filter code | ||
127 | * @filter: filter to verify | ||
128 | * @flen: length of filter | ||
129 | * | ||
130 | * Takes a previously checked filter (by sk_chk_filter) and | ||
131 | * redirects all filter code that loads struct sk_buff data | ||
132 | * and related data through seccomp_bpf_load. It also | ||
133 | * enforces length and alignment checking of those loads. | ||
134 | * | ||
135 | * Returns 0 if the rule set is legal or -EINVAL if not. | ||
136 | */ | ||
137 | static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) | ||
138 | { | ||
139 | int pc; | ||
140 | for (pc = 0; pc < flen; pc++) { | ||
141 | struct sock_filter *ftest = &filter[pc]; | ||
142 | u16 code = ftest->code; | ||
143 | u32 k = ftest->k; | ||
144 | |||
145 | switch (code) { | ||
146 | case BPF_S_LD_W_ABS: | ||
147 | ftest->code = BPF_S_ANC_SECCOMP_LD_W; | ||
148 | /* 32-bit aligned and not out of bounds. */ | ||
149 | if (k >= sizeof(struct seccomp_data) || k & 3) | ||
150 | return -EINVAL; | ||
151 | continue; | ||
152 | case BPF_S_LD_W_LEN: | ||
153 | ftest->code = BPF_S_LD_IMM; | ||
154 | ftest->k = sizeof(struct seccomp_data); | ||
155 | continue; | ||
156 | case BPF_S_LDX_W_LEN: | ||
157 | ftest->code = BPF_S_LDX_IMM; | ||
158 | ftest->k = sizeof(struct seccomp_data); | ||
159 | continue; | ||
160 | /* Explicitly include allowed calls. */ | ||
161 | case BPF_S_RET_K: | ||
162 | case BPF_S_RET_A: | ||
163 | case BPF_S_ALU_ADD_K: | ||
164 | case BPF_S_ALU_ADD_X: | ||
165 | case BPF_S_ALU_SUB_K: | ||
166 | case BPF_S_ALU_SUB_X: | ||
167 | case BPF_S_ALU_MUL_K: | ||
168 | case BPF_S_ALU_MUL_X: | ||
169 | case BPF_S_ALU_DIV_X: | ||
170 | case BPF_S_ALU_AND_K: | ||
171 | case BPF_S_ALU_AND_X: | ||
172 | case BPF_S_ALU_OR_K: | ||
173 | case BPF_S_ALU_OR_X: | ||
174 | case BPF_S_ALU_LSH_K: | ||
175 | case BPF_S_ALU_LSH_X: | ||
176 | case BPF_S_ALU_RSH_K: | ||
177 | case BPF_S_ALU_RSH_X: | ||
178 | case BPF_S_ALU_NEG: | ||
179 | case BPF_S_LD_IMM: | ||
180 | case BPF_S_LDX_IMM: | ||
181 | case BPF_S_MISC_TAX: | ||
182 | case BPF_S_MISC_TXA: | ||
183 | case BPF_S_ALU_DIV_K: | ||
184 | case BPF_S_LD_MEM: | ||
185 | case BPF_S_LDX_MEM: | ||
186 | case BPF_S_ST: | ||
187 | case BPF_S_STX: | ||
188 | case BPF_S_JMP_JA: | ||
189 | case BPF_S_JMP_JEQ_K: | ||
190 | case BPF_S_JMP_JEQ_X: | ||
191 | case BPF_S_JMP_JGE_K: | ||
192 | case BPF_S_JMP_JGE_X: | ||
193 | case BPF_S_JMP_JGT_K: | ||
194 | case BPF_S_JMP_JGT_X: | ||
195 | case BPF_S_JMP_JSET_K: | ||
196 | case BPF_S_JMP_JSET_X: | ||
197 | continue; | ||
198 | default: | ||
199 | return -EINVAL; | ||
200 | } | ||
201 | } | ||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | /** | ||
206 | * seccomp_run_filters - evaluates all seccomp filters against @syscall | ||
207 | * @syscall: number of the current system call | ||
208 | * | ||
209 | * Returns valid seccomp BPF response codes. | ||
210 | */ | ||
211 | static u32 seccomp_run_filters(int syscall) | ||
212 | { | ||
213 | struct seccomp_filter *f; | ||
214 | u32 ret = SECCOMP_RET_KILL; | ||
215 | /* | ||
216 | * All filters in the list are evaluated and the lowest BPF return | ||
217 | * value always takes priority. | ||
218 | */ | ||
219 | for (f = current->seccomp.filter; f; f = f->prev) { | ||
220 | ret = sk_run_filter(NULL, f->insns); | ||
221 | if (ret != SECCOMP_RET_ALLOW) | ||
222 | break; | ||
223 | } | ||
224 | return ret; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * seccomp_attach_filter: Attaches a seccomp filter to current. | ||
229 | * @fprog: BPF program to install | ||
230 | * | ||
231 | * Returns 0 on success or an errno on failure. | ||
232 | */ | ||
233 | static long seccomp_attach_filter(struct sock_fprog *fprog) | ||
234 | { | ||
235 | struct seccomp_filter *filter; | ||
236 | unsigned long fp_size = fprog->len * sizeof(struct sock_filter); | ||
237 | unsigned long total_insns = fprog->len; | ||
238 | long ret; | ||
239 | |||
240 | if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) | ||
241 | return -EINVAL; | ||
242 | |||
243 | for (filter = current->seccomp.filter; filter; filter = filter->prev) | ||
244 | total_insns += filter->len + 4; /* include a 4 instr penalty */ | ||
245 | if (total_insns > MAX_INSNS_PER_PATH) | ||
246 | return -ENOMEM; | ||
247 | |||
248 | /* | ||
249 | * Installing a seccomp filter requires that the task have | ||
250 | * CAP_SYS_ADMIN in its namespace or be running with no_new_privs. | ||
251 | * This avoids scenarios where unprivileged tasks can affect the | ||
252 | * behavior of privileged children. | ||
253 | */ | ||
254 | if (!current->no_new_privs && | ||
255 | security_capable_noaudit(current_cred(), current_user_ns(), | ||
256 | CAP_SYS_ADMIN) != 0) | ||
257 | return -EACCES; | ||
258 | |||
259 | /* Allocate a new seccomp_filter */ | ||
260 | filter = kzalloc(sizeof(struct seccomp_filter) + fp_size, | ||
261 | GFP_KERNEL|__GFP_NOWARN); | ||
262 | if (!filter) | ||
263 | return -ENOMEM; | ||
264 | atomic_set(&filter->usage, 1); | ||
265 | filter->len = fprog->len; | ||
266 | |||
267 | /* Copy the instructions from fprog. */ | ||
268 | ret = -EFAULT; | ||
269 | if (copy_from_user(filter->insns, fprog->filter, fp_size)) | ||
270 | goto fail; | ||
271 | |||
272 | /* Check and rewrite the fprog via the skb checker */ | ||
273 | ret = sk_chk_filter(filter->insns, filter->len); | ||
274 | if (ret) | ||
275 | goto fail; | ||
276 | |||
277 | /* Check and rewrite the fprog for seccomp use */ | ||
278 | ret = seccomp_check_filter(filter->insns, filter->len); | ||
279 | if (ret) | ||
280 | goto fail; | ||
281 | |||
282 | /* | ||
283 | * If there is an existing filter, make it the prev and don't drop its | ||
284 | * task reference. | ||
285 | */ | ||
286 | filter->prev = current->seccomp.filter; | ||
287 | current->seccomp.filter = filter; | ||
288 | return 0; | ||
289 | fail: | ||
290 | kfree(filter); | ||
291 | return ret; | ||
292 | } | ||
293 | |||
294 | /** | ||
295 | * seccomp_attach_user_filter - attaches a user-supplied sock_fprog | ||
296 | * @user_filter: pointer to the user data containing a sock_fprog. | ||
297 | * | ||
298 | * Returns 0 on success and non-zero otherwise. | ||
299 | */ | ||
300 | long seccomp_attach_user_filter(char __user *user_filter) | ||
301 | { | ||
302 | struct sock_fprog fprog; | ||
303 | long ret = -EFAULT; | ||
304 | |||
305 | #ifdef CONFIG_COMPAT | ||
306 | if (is_compat_task()) { | ||
307 | struct compat_sock_fprog fprog32; | ||
308 | if (copy_from_user(&fprog32, user_filter, sizeof(fprog32))) | ||
309 | goto out; | ||
310 | fprog.len = fprog32.len; | ||
311 | fprog.filter = compat_ptr(fprog32.filter); | ||
312 | } else /* falls through to the if below. */ | ||
313 | #endif | ||
314 | if (copy_from_user(&fprog, user_filter, sizeof(fprog))) | ||
315 | goto out; | ||
316 | ret = seccomp_attach_filter(&fprog); | ||
317 | out: | ||
318 | return ret; | ||
319 | } | ||
320 | |||
321 | /* get_seccomp_filter - increments the reference count of the filter on @tsk */ | ||
322 | void get_seccomp_filter(struct task_struct *tsk) | ||
323 | { | ||
324 | struct seccomp_filter *orig = tsk->seccomp.filter; | ||
325 | if (!orig) | ||
326 | return; | ||
327 | /* Reference count is bounded by the number of total processes. */ | ||
328 | atomic_inc(&orig->usage); | ||
329 | } | ||
330 | |||
331 | /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ | ||
332 | void put_seccomp_filter(struct task_struct *tsk) | ||
333 | { | ||
334 | struct seccomp_filter *orig = tsk->seccomp.filter; | ||
335 | /* Clean up single-reference branches iteratively. */ | ||
336 | while (orig && atomic_dec_and_test(&orig->usage)) { | ||
337 | struct seccomp_filter *freeme = orig; | ||
338 | orig = orig->prev; | ||
339 | kfree(freeme); | ||
340 | } | ||
341 | } | ||
342 | #endif /* CONFIG_SECCOMP_FILTER */ | ||
16 | 343 | ||
17 | /* | 344 | /* |
18 | * Secure computing mode 1 allows only read/write/exit/sigreturn. | 345 | * Secure computing mode 1 allows only read/write/exit/sigreturn. |
@@ -34,10 +361,11 @@ static int mode1_syscalls_32[] = { | |||
34 | void __secure_computing(int this_syscall) | 361 | void __secure_computing(int this_syscall) |
35 | { | 362 | { |
36 | int mode = current->seccomp.mode; | 363 | int mode = current->seccomp.mode; |
37 | int * syscall; | 364 | int exit_sig = 0; |
365 | int *syscall; | ||
38 | 366 | ||
39 | switch (mode) { | 367 | switch (mode) { |
40 | case 1: | 368 | case SECCOMP_MODE_STRICT: |
41 | syscall = mode1_syscalls; | 369 | syscall = mode1_syscalls; |
42 | #ifdef CONFIG_COMPAT | 370 | #ifdef CONFIG_COMPAT |
43 | if (is_compat_task()) | 371 | if (is_compat_task()) |
@@ -47,7 +375,16 @@ void __secure_computing(int this_syscall) | |||
47 | if (*syscall == this_syscall) | 375 | if (*syscall == this_syscall) |
48 | return; | 376 | return; |
49 | } while (*++syscall); | 377 | } while (*++syscall); |
378 | exit_sig = SIGKILL; | ||
50 | break; | 379 | break; |
380 | #ifdef CONFIG_SECCOMP_FILTER | ||
381 | case SECCOMP_MODE_FILTER: | ||
382 | if (seccomp_run_filters(this_syscall) == SECCOMP_RET_ALLOW) | ||
383 | return; | ||
384 | seccomp_filter_log_failure(this_syscall); | ||
385 | exit_sig = SIGSYS; | ||
386 | break; | ||
387 | #endif | ||
51 | default: | 388 | default: |
52 | BUG(); | 389 | BUG(); |
53 | } | 390 | } |
@@ -56,7 +393,7 @@ void __secure_computing(int this_syscall) | |||
56 | dump_stack(); | 393 | dump_stack(); |
57 | #endif | 394 | #endif |
58 | audit_seccomp(this_syscall); | 395 | audit_seccomp(this_syscall); |
59 | do_exit(SIGKILL); | 396 | do_exit(exit_sig); |
60 | } | 397 | } |
61 | 398 | ||
62 | long prctl_get_seccomp(void) | 399 | long prctl_get_seccomp(void) |
@@ -64,25 +401,48 @@ long prctl_get_seccomp(void) | |||
64 | return current->seccomp.mode; | 401 | return current->seccomp.mode; |
65 | } | 402 | } |
66 | 403 | ||
67 | long prctl_set_seccomp(unsigned long seccomp_mode) | 404 | /** |
405 | * prctl_set_seccomp: configures current->seccomp.mode | ||
406 | * @seccomp_mode: requested mode to use | ||
407 | * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER | ||
408 | * | ||
409 | * This function may be called repeatedly with a @seccomp_mode of | ||
410 | * SECCOMP_MODE_FILTER to install additional filters. Every filter | ||
411 | * successfully installed will be evaluated (in reverse order) for each system | ||
412 | * call the task makes. | ||
413 | * | ||
414 | * Once current->seccomp.mode is non-zero, it may not be changed. | ||
415 | * | ||
416 | * Returns 0 on success or -EINVAL on failure. | ||
417 | */ | ||
418 | long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) | ||
68 | { | 419 | { |
69 | long ret; | 420 | long ret = -EINVAL; |
70 | 421 | ||
71 | /* can set it only once to be even more secure */ | 422 | if (current->seccomp.mode && |
72 | ret = -EPERM; | 423 | current->seccomp.mode != seccomp_mode) |
73 | if (unlikely(current->seccomp.mode)) | ||
74 | goto out; | 424 | goto out; |
75 | 425 | ||
76 | ret = -EINVAL; | 426 | switch (seccomp_mode) { |
77 | if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { | 427 | case SECCOMP_MODE_STRICT: |
78 | current->seccomp.mode = seccomp_mode; | 428 | ret = 0; |
79 | set_thread_flag(TIF_SECCOMP); | ||
80 | #ifdef TIF_NOTSC | 429 | #ifdef TIF_NOTSC |
81 | disable_TSC(); | 430 | disable_TSC(); |
82 | #endif | 431 | #endif |
83 | ret = 0; | 432 | break; |
433 | #ifdef CONFIG_SECCOMP_FILTER | ||
434 | case SECCOMP_MODE_FILTER: | ||
435 | ret = seccomp_attach_user_filter(filter); | ||
436 | if (ret) | ||
437 | goto out; | ||
438 | break; | ||
439 | #endif | ||
440 | default: | ||
441 | goto out; | ||
84 | } | 442 | } |
85 | 443 | ||
86 | out: | 444 | current->seccomp.mode = seccomp_mode; |
445 | set_thread_flag(TIF_SECCOMP); | ||
446 | out: | ||
87 | return ret; | 447 | return ret; |
88 | } | 448 | } |
diff --git a/kernel/sys.c b/kernel/sys.c index b82568b7d201..ba0ae8eea6fb 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1908,7 +1908,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
1908 | error = prctl_get_seccomp(); | 1908 | error = prctl_get_seccomp(); |
1909 | break; | 1909 | break; |
1910 | case PR_SET_SECCOMP: | 1910 | case PR_SET_SECCOMP: |
1911 | error = prctl_set_seccomp(arg2); | 1911 | error = prctl_set_seccomp(arg2, (char __user *)arg3); |
1912 | break; | 1912 | break; |
1913 | case PR_GET_TSC: | 1913 | case PR_GET_TSC: |
1914 | error = GET_TSC_CTL(arg2); | 1914 | error = GET_TSC_CTL(arg2); |