diff options
author | Andrew G. Morgan <morgan@kernel.org> | 2008-07-24 00:28:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-24 13:47:22 -0400 |
commit | ab763c7112ce0e2559c73f921617c81dc7287ca6 (patch) | |
tree | 110f60462a54e869402346b5ae9cfaed012cf8f4 | |
parent | 5459c164f0591ee75ed0203bb8f3817f25948e2f (diff) |
security: filesystem capabilities refactor kernel code
To date, we've tried hard to confine filesystem support for capabilities
to the security modules. This has left a lot of the code in
kernel/capability.c in a state where it looks like it supports something
that filesystem support for capabilities actually suppresses when the LSM
security/commmoncap.c code runs. What is left is a lot of code that uses
sub-optimal locking in the main kernel
With this change we refactor the main kernel code and make it explicit
which locks are needed and that the only remaining kernel races in this
area are associated with non-filesystem capability code.
Signed-off-by: Andrew G. Morgan <morgan@kernel.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | kernel/capability.c | 338 |
1 files changed, 221 insertions, 117 deletions
diff --git a/kernel/capability.c b/kernel/capability.c index 901e0fdc3fff..0101e847603e 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -115,11 +115,208 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) | |||
115 | return 0; | 115 | return 0; |
116 | } | 116 | } |
117 | 117 | ||
118 | #ifndef CONFIG_SECURITY_FILE_CAPABILITIES | ||
119 | |||
120 | /* | ||
121 | * Without filesystem capability support, we nominally support one process | ||
122 | * setting the capabilities of another | ||
123 | */ | ||
124 | static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, | ||
125 | kernel_cap_t *pIp, kernel_cap_t *pPp) | ||
126 | { | ||
127 | struct task_struct *target; | ||
128 | int ret; | ||
129 | |||
130 | spin_lock(&task_capability_lock); | ||
131 | read_lock(&tasklist_lock); | ||
132 | |||
133 | if (pid && pid != task_pid_vnr(current)) { | ||
134 | target = find_task_by_vpid(pid); | ||
135 | if (!target) { | ||
136 | ret = -ESRCH; | ||
137 | goto out; | ||
138 | } | ||
139 | } else | ||
140 | target = current; | ||
141 | |||
142 | ret = security_capget(target, pEp, pIp, pPp); | ||
143 | |||
144 | out: | ||
145 | read_unlock(&tasklist_lock); | ||
146 | spin_unlock(&task_capability_lock); | ||
147 | |||
148 | return ret; | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * cap_set_pg - set capabilities for all processes in a given process | ||
153 | * group. We call this holding task_capability_lock and tasklist_lock. | ||
154 | */ | ||
155 | static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective, | ||
156 | kernel_cap_t *inheritable, | ||
157 | kernel_cap_t *permitted) | ||
158 | { | ||
159 | struct task_struct *g, *target; | ||
160 | int ret = -EPERM; | ||
161 | int found = 0; | ||
162 | struct pid *pgrp; | ||
163 | |||
164 | spin_lock(&task_capability_lock); | ||
165 | read_lock(&tasklist_lock); | ||
166 | |||
167 | pgrp = find_vpid(pgrp_nr); | ||
168 | do_each_pid_task(pgrp, PIDTYPE_PGID, g) { | ||
169 | target = g; | ||
170 | while_each_thread(g, target) { | ||
171 | if (!security_capset_check(target, effective, | ||
172 | inheritable, permitted)) { | ||
173 | security_capset_set(target, effective, | ||
174 | inheritable, permitted); | ||
175 | ret = 0; | ||
176 | } | ||
177 | found = 1; | ||
178 | } | ||
179 | } while_each_pid_task(pgrp, PIDTYPE_PGID, g); | ||
180 | |||
181 | read_unlock(&tasklist_lock); | ||
182 | spin_unlock(&task_capability_lock); | ||
183 | |||
184 | if (!found) | ||
185 | ret = 0; | ||
186 | return ret; | ||
187 | } | ||
188 | |||
118 | /* | 189 | /* |
119 | * For sys_getproccap() and sys_setproccap(), any of the three | 190 | * cap_set_all - set capabilities for all processes other than init |
120 | * capability set pointers may be NULL -- indicating that that set is | 191 | * and self. We call this holding task_capability_lock and tasklist_lock. |
121 | * uninteresting and/or not to be changed. | ||
122 | */ | 192 | */ |
193 | static inline int cap_set_all(kernel_cap_t *effective, | ||
194 | kernel_cap_t *inheritable, | ||
195 | kernel_cap_t *permitted) | ||
196 | { | ||
197 | struct task_struct *g, *target; | ||
198 | int ret = -EPERM; | ||
199 | int found = 0; | ||
200 | |||
201 | spin_lock(&task_capability_lock); | ||
202 | read_lock(&tasklist_lock); | ||
203 | |||
204 | do_each_thread(g, target) { | ||
205 | if (target == current | ||
206 | || is_container_init(target->group_leader)) | ||
207 | continue; | ||
208 | found = 1; | ||
209 | if (security_capset_check(target, effective, inheritable, | ||
210 | permitted)) | ||
211 | continue; | ||
212 | ret = 0; | ||
213 | security_capset_set(target, effective, inheritable, permitted); | ||
214 | } while_each_thread(g, target); | ||
215 | |||
216 | read_unlock(&tasklist_lock); | ||
217 | spin_unlock(&task_capability_lock); | ||
218 | |||
219 | if (!found) | ||
220 | ret = 0; | ||
221 | |||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Given the target pid does not refer to the current process we | ||
227 | * need more elaborate support... (This support is not present when | ||
228 | * filesystem capabilities are configured.) | ||
229 | */ | ||
230 | static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective, | ||
231 | kernel_cap_t *inheritable, | ||
232 | kernel_cap_t *permitted) | ||
233 | { | ||
234 | struct task_struct *target; | ||
235 | int ret; | ||
236 | |||
237 | if (!capable(CAP_SETPCAP)) | ||
238 | return -EPERM; | ||
239 | |||
240 | if (pid == -1) /* all procs other than current and init */ | ||
241 | return cap_set_all(effective, inheritable, permitted); | ||
242 | |||
243 | else if (pid < 0) /* all procs in process group */ | ||
244 | return cap_set_pg(-pid, effective, inheritable, permitted); | ||
245 | |||
246 | /* target != current */ | ||
247 | spin_lock(&task_capability_lock); | ||
248 | read_lock(&tasklist_lock); | ||
249 | |||
250 | target = find_task_by_vpid(pid); | ||
251 | if (!target) | ||
252 | ret = -ESRCH; | ||
253 | else { | ||
254 | ret = security_capset_check(target, effective, inheritable, | ||
255 | permitted); | ||
256 | |||
257 | /* having verified that the proposed changes are legal, | ||
258 | we now put them into effect. */ | ||
259 | if (!ret) | ||
260 | security_capset_set(target, effective, inheritable, | ||
261 | permitted); | ||
262 | } | ||
263 | |||
264 | read_unlock(&tasklist_lock); | ||
265 | spin_unlock(&task_capability_lock); | ||
266 | |||
267 | return ret; | ||
268 | } | ||
269 | |||
270 | #else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */ | ||
271 | |||
272 | /* | ||
273 | * If we have configured with filesystem capability support, then the | ||
274 | * only thing that can change the capabilities of the current process | ||
275 | * is the current process. As such, we can't be in this code at the | ||
276 | * same time as we are in the process of setting capabilities in this | ||
277 | * process. The net result is that we can limit our use of locks to | ||
278 | * when we are reading the caps of another process. | ||
279 | */ | ||
280 | static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, | ||
281 | kernel_cap_t *pIp, kernel_cap_t *pPp) | ||
282 | { | ||
283 | int ret; | ||
284 | |||
285 | if (pid && (pid != task_pid_vnr(current))) { | ||
286 | struct task_struct *target; | ||
287 | |||
288 | spin_lock(&task_capability_lock); | ||
289 | read_lock(&tasklist_lock); | ||
290 | |||
291 | target = find_task_by_vpid(pid); | ||
292 | if (!target) | ||
293 | ret = -ESRCH; | ||
294 | else | ||
295 | ret = security_capget(target, pEp, pIp, pPp); | ||
296 | |||
297 | read_unlock(&tasklist_lock); | ||
298 | spin_unlock(&task_capability_lock); | ||
299 | } else | ||
300 | ret = security_capget(current, pEp, pIp, pPp); | ||
301 | |||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | /* | ||
306 | * With filesystem capability support configured, the kernel does not | ||
307 | * permit the changing of capabilities in one process by another | ||
308 | * process. (CAP_SETPCAP has much less broad semantics when configured | ||
309 | * this way.) | ||
310 | */ | ||
311 | static inline int do_sys_capset_other_tasks(pid_t pid, | ||
312 | kernel_cap_t *effective, | ||
313 | kernel_cap_t *inheritable, | ||
314 | kernel_cap_t *permitted) | ||
315 | { | ||
316 | return -EPERM; | ||
317 | } | ||
318 | |||
319 | #endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ | ||
123 | 320 | ||
124 | /* | 321 | /* |
125 | * Atomically modify the effective capabilities returning the original | 322 | * Atomically modify the effective capabilities returning the original |
@@ -155,7 +352,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) | |||
155 | { | 352 | { |
156 | int ret = 0; | 353 | int ret = 0; |
157 | pid_t pid; | 354 | pid_t pid; |
158 | struct task_struct *target; | ||
159 | unsigned tocopy; | 355 | unsigned tocopy; |
160 | kernel_cap_t pE, pI, pP; | 356 | kernel_cap_t pE, pI, pP; |
161 | 357 | ||
@@ -169,23 +365,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) | |||
169 | if (pid < 0) | 365 | if (pid < 0) |
170 | return -EINVAL; | 366 | return -EINVAL; |
171 | 367 | ||
172 | spin_lock(&task_capability_lock); | 368 | ret = cap_get_target_pid(pid, &pE, &pI, &pP); |
173 | read_lock(&tasklist_lock); | ||
174 | |||
175 | if (pid && pid != task_pid_vnr(current)) { | ||
176 | target = find_task_by_vpid(pid); | ||
177 | if (!target) { | ||
178 | ret = -ESRCH; | ||
179 | goto out; | ||
180 | } | ||
181 | } else | ||
182 | target = current; | ||
183 | |||
184 | ret = security_capget(target, &pE, &pI, &pP); | ||
185 | |||
186 | out: | ||
187 | read_unlock(&tasklist_lock); | ||
188 | spin_unlock(&task_capability_lock); | ||
189 | 369 | ||
190 | if (!ret) { | 370 | if (!ret) { |
191 | struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; | 371 | struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; |
@@ -216,7 +396,6 @@ out: | |||
216 | * before modification is attempted and the application | 396 | * before modification is attempted and the application |
217 | * fails. | 397 | * fails. |
218 | */ | 398 | */ |
219 | |||
220 | if (copy_to_user(dataptr, kdata, tocopy | 399 | if (copy_to_user(dataptr, kdata, tocopy |
221 | * sizeof(struct __user_cap_data_struct))) { | 400 | * sizeof(struct __user_cap_data_struct))) { |
222 | return -EFAULT; | 401 | return -EFAULT; |
@@ -226,70 +405,8 @@ out: | |||
226 | return ret; | 405 | return ret; |
227 | } | 406 | } |
228 | 407 | ||
229 | /* | ||
230 | * cap_set_pg - set capabilities for all processes in a given process | ||
231 | * group. We call this holding task_capability_lock and tasklist_lock. | ||
232 | */ | ||
233 | static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective, | ||
234 | kernel_cap_t *inheritable, | ||
235 | kernel_cap_t *permitted) | ||
236 | { | ||
237 | struct task_struct *g, *target; | ||
238 | int ret = -EPERM; | ||
239 | int found = 0; | ||
240 | struct pid *pgrp; | ||
241 | |||
242 | pgrp = find_vpid(pgrp_nr); | ||
243 | do_each_pid_task(pgrp, PIDTYPE_PGID, g) { | ||
244 | target = g; | ||
245 | while_each_thread(g, target) { | ||
246 | if (!security_capset_check(target, effective, | ||
247 | inheritable, | ||
248 | permitted)) { | ||
249 | security_capset_set(target, effective, | ||
250 | inheritable, | ||
251 | permitted); | ||
252 | ret = 0; | ||
253 | } | ||
254 | found = 1; | ||
255 | } | ||
256 | } while_each_pid_task(pgrp, PIDTYPE_PGID, g); | ||
257 | |||
258 | if (!found) | ||
259 | ret = 0; | ||
260 | return ret; | ||
261 | } | ||
262 | |||
263 | /* | ||
264 | * cap_set_all - set capabilities for all processes other than init | ||
265 | * and self. We call this holding task_capability_lock and tasklist_lock. | ||
266 | */ | ||
267 | static inline int cap_set_all(kernel_cap_t *effective, | ||
268 | kernel_cap_t *inheritable, | ||
269 | kernel_cap_t *permitted) | ||
270 | { | ||
271 | struct task_struct *g, *target; | ||
272 | int ret = -EPERM; | ||
273 | int found = 0; | ||
274 | |||
275 | do_each_thread(g, target) { | ||
276 | if (target == current || is_container_init(target->group_leader)) | ||
277 | continue; | ||
278 | found = 1; | ||
279 | if (security_capset_check(target, effective, inheritable, | ||
280 | permitted)) | ||
281 | continue; | ||
282 | ret = 0; | ||
283 | security_capset_set(target, effective, inheritable, permitted); | ||
284 | } while_each_thread(g, target); | ||
285 | |||
286 | if (!found) | ||
287 | ret = 0; | ||
288 | return ret; | ||
289 | } | ||
290 | |||
291 | /** | 408 | /** |
292 | * sys_capset - set capabilities for a process or a group of processes | 409 | * sys_capset - set capabilities for a process or (*) a group of processes |
293 | * @header: pointer to struct that contains capability version and | 410 | * @header: pointer to struct that contains capability version and |
294 | * target pid data | 411 | * target pid data |
295 | * @data: pointer to struct that contains the effective, permitted, | 412 | * @data: pointer to struct that contains the effective, permitted, |
@@ -313,7 +430,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) | |||
313 | struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; | 430 | struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; |
314 | unsigned i, tocopy; | 431 | unsigned i, tocopy; |
315 | kernel_cap_t inheritable, permitted, effective; | 432 | kernel_cap_t inheritable, permitted, effective; |
316 | struct task_struct *target; | ||
317 | int ret; | 433 | int ret; |
318 | pid_t pid; | 434 | pid_t pid; |
319 | 435 | ||
@@ -324,9 +440,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) | |||
324 | if (get_user(pid, &header->pid)) | 440 | if (get_user(pid, &header->pid)) |
325 | return -EFAULT; | 441 | return -EFAULT; |
326 | 442 | ||
327 | if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP)) | ||
328 | return -EPERM; | ||
329 | |||
330 | if (copy_from_user(&kdata, data, tocopy | 443 | if (copy_from_user(&kdata, data, tocopy |
331 | * sizeof(struct __user_cap_data_struct))) { | 444 | * sizeof(struct __user_cap_data_struct))) { |
332 | return -EFAULT; | 445 | return -EFAULT; |
@@ -344,40 +457,31 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) | |||
344 | i++; | 457 | i++; |
345 | } | 458 | } |
346 | 459 | ||
347 | spin_lock(&task_capability_lock); | 460 | if (pid && (pid != task_pid_vnr(current))) |
348 | read_lock(&tasklist_lock); | 461 | ret = do_sys_capset_other_tasks(pid, &effective, &inheritable, |
349 | 462 | &permitted); | |
350 | if (pid > 0 && pid != task_pid_vnr(current)) { | 463 | else { |
351 | target = find_task_by_vpid(pid); | 464 | /* |
352 | if (!target) { | 465 | * This lock is required even when filesystem |
353 | ret = -ESRCH; | 466 | * capability support is configured - it protects the |
354 | goto out; | 467 | * sys_capget() call from returning incorrect data in |
355 | } | 468 | * the case that the targeted process is not the |
356 | } else | 469 | * current one. |
357 | target = current; | 470 | */ |
358 | 471 | spin_lock(&task_capability_lock); | |
359 | ret = 0; | ||
360 | |||
361 | /* having verified that the proposed changes are legal, | ||
362 | we now put them into effect. */ | ||
363 | if (pid < 0) { | ||
364 | if (pid == -1) /* all procs other than current and init */ | ||
365 | ret = cap_set_all(&effective, &inheritable, &permitted); | ||
366 | 472 | ||
367 | else /* all procs in process group */ | 473 | ret = security_capset_check(current, &effective, &inheritable, |
368 | ret = cap_set_pg(-pid, &effective, &inheritable, | ||
369 | &permitted); | ||
370 | } else { | ||
371 | ret = security_capset_check(target, &effective, &inheritable, | ||
372 | &permitted); | 474 | &permitted); |
475 | /* | ||
476 | * Having verified that the proposed changes are | ||
477 | * legal, we now put them into effect. | ||
478 | */ | ||
373 | if (!ret) | 479 | if (!ret) |
374 | security_capset_set(target, &effective, &inheritable, | 480 | security_capset_set(current, &effective, &inheritable, |
375 | &permitted); | 481 | &permitted); |
482 | spin_unlock(&task_capability_lock); | ||
376 | } | 483 | } |
377 | 484 | ||
378 | out: | ||
379 | read_unlock(&tasklist_lock); | ||
380 | spin_unlock(&task_capability_lock); | ||
381 | 485 | ||
382 | return ret; | 486 | return ret; |
383 | } | 487 | } |