diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-12 22:22:57 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-12 22:22:57 -0400 |
commit | 26a41cd1eeac299d0d7c505f8d38976a553c8fc4 (patch) | |
tree | 374461f6033b5c115274b3e3b1a1c23bf76755f5 | |
parent | 619b5891903936f3e493bf8cda18a8b6664fcdd7 (diff) | |
parent | 36c38fb7144aa941dc072ba8f58b2dbe509c0345 (diff) |
Merge branch 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:
"During recent restructuring, device_cgroup unified config input check
and enforcement logic; unfortunately, it turned out to share too much.
Aristeu's patches fix the breakage and marked for -stable backport.
The other two patches are fallouts from kernfs conversion. The blkcg
change is temporary and will go away once kernfs internal locking gets
simplified (patches pending)"
* 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
blkcg: use trylock on blkcg_pol_mutex in blkcg_reset_stats()
device_cgroup: check if exception removal is allowed
device_cgroup: fix the comment format for recently added functions
device_cgroup: rework device access check and exception checking
cgroup: fix the retry path of cgroup_mount()
-rw-r--r-- | block/blk-cgroup.c | 15 | ||||
-rw-r--r-- | kernel/cgroup.c | 8 | ||||
-rw-r--r-- | security/device_cgroup.c | 202 |
3 files changed, 177 insertions, 48 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e4a4145926f6..1039fb9ff5f5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -451,7 +451,20 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, | |||
451 | struct blkcg_gq *blkg; | 451 | struct blkcg_gq *blkg; |
452 | int i; | 452 | int i; |
453 | 453 | ||
454 | mutex_lock(&blkcg_pol_mutex); | 454 | /* |
455 | * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex | ||
456 | * which ends up putting cgroup's internal cgroup_tree_mutex under | ||
457 | * it; however, cgroup_tree_mutex is nested above cgroup file | ||
458 | * active protection and grabbing blkcg_pol_mutex from a cgroup | ||
459 | * file operation creates a possible circular dependency. cgroup | ||
460 | * internal locking is planned to go through further simplification | ||
461 | * and this issue should go away soon. For now, let's trylock | ||
462 | * blkcg_pol_mutex and restart the write on failure. | ||
463 | * | ||
464 | * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com | ||
465 | */ | ||
466 | if (!mutex_trylock(&blkcg_pol_mutex)) | ||
467 | return restart_syscall(); | ||
455 | spin_lock_irq(&blkcg->lock); | 468 | spin_lock_irq(&blkcg->lock); |
456 | 469 | ||
457 | /* | 470 | /* |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9fcdaa705b6c..11a03d67635a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1495 | */ | 1495 | */ |
1496 | if (!use_task_css_set_links) | 1496 | if (!use_task_css_set_links) |
1497 | cgroup_enable_task_cg_lists(); | 1497 | cgroup_enable_task_cg_lists(); |
1498 | retry: | 1498 | |
1499 | mutex_lock(&cgroup_tree_mutex); | 1499 | mutex_lock(&cgroup_tree_mutex); |
1500 | mutex_lock(&cgroup_mutex); | 1500 | mutex_lock(&cgroup_mutex); |
1501 | 1501 | ||
@@ -1503,7 +1503,7 @@ retry: | |||
1503 | ret = parse_cgroupfs_options(data, &opts); | 1503 | ret = parse_cgroupfs_options(data, &opts); |
1504 | if (ret) | 1504 | if (ret) |
1505 | goto out_unlock; | 1505 | goto out_unlock; |
1506 | 1506 | retry: | |
1507 | /* look for a matching existing root */ | 1507 | /* look for a matching existing root */ |
1508 | if (!opts.subsys_mask && !opts.none && !opts.name) { | 1508 | if (!opts.subsys_mask && !opts.none && !opts.name) { |
1509 | cgrp_dfl_root_visible = true; | 1509 | cgrp_dfl_root_visible = true; |
@@ -1562,9 +1562,9 @@ retry: | |||
1562 | if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { | 1562 | if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { |
1563 | mutex_unlock(&cgroup_mutex); | 1563 | mutex_unlock(&cgroup_mutex); |
1564 | mutex_unlock(&cgroup_tree_mutex); | 1564 | mutex_unlock(&cgroup_tree_mutex); |
1565 | kfree(opts.release_agent); | ||
1566 | kfree(opts.name); | ||
1567 | msleep(10); | 1565 | msleep(10); |
1566 | mutex_lock(&cgroup_tree_mutex); | ||
1567 | mutex_lock(&cgroup_mutex); | ||
1568 | goto retry; | 1568 | goto retry; |
1569 | } | 1569 | } |
1570 | 1570 | ||
diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 8365909f5f8c..9134dbf70d3e 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c | |||
@@ -306,57 +306,138 @@ static int devcgroup_seq_show(struct seq_file *m, void *v) | |||
306 | } | 306 | } |
307 | 307 | ||
308 | /** | 308 | /** |
309 | * may_access - verifies if a new exception is part of what is allowed | 309 | * match_exception - iterates the exception list trying to find a complete match |
310 | * by a dev cgroup based on the default policy + | 310 | * @exceptions: list of exceptions |
311 | * exceptions. This is used to make sure a child cgroup | 311 | * @type: device type (DEV_BLOCK or DEV_CHAR) |
312 | * won't have more privileges than its parent or to | 312 | * @major: device file major number, ~0 to match all |
313 | * verify if a certain access is allowed. | 313 | * @minor: device file minor number, ~0 to match all |
314 | * @dev_cgroup: dev cgroup to be tested against | 314 | * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) |
315 | * @refex: new exception | 315 | * |
316 | * @behavior: behavior of the exception | 316 | * It is considered a complete match if an exception is found that will |
317 | * contain the entire range of provided parameters. | ||
318 | * | ||
319 | * Return: true in case it matches an exception completely | ||
317 | */ | 320 | */ |
318 | static bool may_access(struct dev_cgroup *dev_cgroup, | 321 | static bool match_exception(struct list_head *exceptions, short type, |
319 | struct dev_exception_item *refex, | 322 | u32 major, u32 minor, short access) |
320 | enum devcg_behavior behavior) | ||
321 | { | 323 | { |
322 | struct dev_exception_item *ex; | 324 | struct dev_exception_item *ex; |
323 | bool match = false; | ||
324 | 325 | ||
325 | rcu_lockdep_assert(rcu_read_lock_held() || | 326 | list_for_each_entry_rcu(ex, exceptions, list) { |
326 | lockdep_is_held(&devcgroup_mutex), | 327 | if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) |
327 | "device_cgroup::may_access() called without proper synchronization"); | 328 | continue; |
329 | if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) | ||
330 | continue; | ||
331 | if (ex->major != ~0 && ex->major != major) | ||
332 | continue; | ||
333 | if (ex->minor != ~0 && ex->minor != minor) | ||
334 | continue; | ||
335 | /* provided access cannot have more than the exception rule */ | ||
336 | if (access & (~ex->access)) | ||
337 | continue; | ||
338 | return true; | ||
339 | } | ||
340 | return false; | ||
341 | } | ||
342 | |||
343 | /** | ||
344 | * match_exception_partial - iterates the exception list trying to find a partial match | ||
345 | * @exceptions: list of exceptions | ||
346 | * @type: device type (DEV_BLOCK or DEV_CHAR) | ||
347 | * @major: device file major number, ~0 to match all | ||
348 | * @minor: device file minor number, ~0 to match all | ||
349 | * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) | ||
350 | * | ||
351 | * It is considered a partial match if an exception's range is found to | ||
352 | * contain *any* of the devices specified by provided parameters. This is | ||
353 | * used to make sure no extra access is being granted that is forbidden by | ||
354 | * any of the exception list. | ||
355 | * | ||
356 | * Return: true in case the provided range mat matches an exception completely | ||
357 | */ | ||
358 | static bool match_exception_partial(struct list_head *exceptions, short type, | ||
359 | u32 major, u32 minor, short access) | ||
360 | { | ||
361 | struct dev_exception_item *ex; | ||
328 | 362 | ||
329 | list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) { | 363 | list_for_each_entry_rcu(ex, exceptions, list) { |
330 | if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) | 364 | if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) |
331 | continue; | 365 | continue; |
332 | if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR)) | 366 | if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) |
333 | continue; | 367 | continue; |
334 | if (ex->major != ~0 && ex->major != refex->major) | 368 | /* |
369 | * We must be sure that both the exception and the provided | ||
370 | * range aren't masking all devices | ||
371 | */ | ||
372 | if (ex->major != ~0 && major != ~0 && ex->major != major) | ||
335 | continue; | 373 | continue; |
336 | if (ex->minor != ~0 && ex->minor != refex->minor) | 374 | if (ex->minor != ~0 && minor != ~0 && ex->minor != minor) |
337 | continue; | 375 | continue; |
338 | if (refex->access & (~ex->access)) | 376 | /* |
377 | * In order to make sure the provided range isn't matching | ||
378 | * an exception, all its access bits shouldn't match the | ||
379 | * exception's access bits | ||
380 | */ | ||
381 | if (!(access & ex->access)) | ||
339 | continue; | 382 | continue; |
340 | match = true; | 383 | return true; |
341 | break; | ||
342 | } | 384 | } |
385 | return false; | ||
386 | } | ||
387 | |||
388 | /** | ||
389 | * verify_new_ex - verifies if a new exception is allowed by parent cgroup's permissions | ||
390 | * @dev_cgroup: dev cgroup to be tested against | ||
391 | * @refex: new exception | ||
392 | * @behavior: behavior of the exception's dev_cgroup | ||
393 | * | ||
394 | * This is used to make sure a child cgroup won't have more privileges | ||
395 | * than its parent | ||
396 | */ | ||
397 | static bool verify_new_ex(struct dev_cgroup *dev_cgroup, | ||
398 | struct dev_exception_item *refex, | ||
399 | enum devcg_behavior behavior) | ||
400 | { | ||
401 | bool match = false; | ||
402 | |||
403 | rcu_lockdep_assert(rcu_read_lock_held() || | ||
404 | lockdep_is_held(&devcgroup_mutex), | ||
405 | "device_cgroup:verify_new_ex called without proper synchronization"); | ||
343 | 406 | ||
344 | if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { | 407 | if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { |
345 | if (behavior == DEVCG_DEFAULT_ALLOW) { | 408 | if (behavior == DEVCG_DEFAULT_ALLOW) { |
346 | /* the exception will deny access to certain devices */ | 409 | /* |
410 | * new exception in the child doesn't matter, only | ||
411 | * adding extra restrictions | ||
412 | */ | ||
347 | return true; | 413 | return true; |
348 | } else { | 414 | } else { |
349 | /* the exception will allow access to certain devices */ | 415 | /* |
416 | * new exception in the child will add more devices | ||
417 | * that can be acessed, so it can't match any of | ||
418 | * parent's exceptions, even slightly | ||
419 | */ | ||
420 | match = match_exception_partial(&dev_cgroup->exceptions, | ||
421 | refex->type, | ||
422 | refex->major, | ||
423 | refex->minor, | ||
424 | refex->access); | ||
425 | |||
350 | if (match) | 426 | if (match) |
351 | /* | ||
352 | * a new exception allowing access shouldn't | ||
353 | * match an parent's exception | ||
354 | */ | ||
355 | return false; | 427 | return false; |
356 | return true; | 428 | return true; |
357 | } | 429 | } |
358 | } else { | 430 | } else { |
359 | /* only behavior == DEVCG_DEFAULT_DENY allowed here */ | 431 | /* |
432 | * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore | ||
433 | * the new exception will add access to more devices and must | ||
434 | * be contained completely in an parent's exception to be | ||
435 | * allowed | ||
436 | */ | ||
437 | match = match_exception(&dev_cgroup->exceptions, refex->type, | ||
438 | refex->major, refex->minor, | ||
439 | refex->access); | ||
440 | |||
360 | if (match) | 441 | if (match) |
361 | /* parent has an exception that matches the proposed */ | 442 | /* parent has an exception that matches the proposed */ |
362 | return true; | 443 | return true; |
@@ -378,7 +459,38 @@ static int parent_has_perm(struct dev_cgroup *childcg, | |||
378 | 459 | ||
379 | if (!parent) | 460 | if (!parent) |
380 | return 1; | 461 | return 1; |
381 | return may_access(parent, ex, childcg->behavior); | 462 | return verify_new_ex(parent, ex, childcg->behavior); |
463 | } | ||
464 | |||
465 | /** | ||
466 | * parent_allows_removal - verify if it's ok to remove an exception | ||
467 | * @childcg: child cgroup from where the exception will be removed | ||
468 | * @ex: exception being removed | ||
469 | * | ||
470 | * When removing an exception in cgroups with default ALLOW policy, it must | ||
471 | * be checked if removing it will give the child cgroup more access than the | ||
472 | * parent. | ||
473 | * | ||
474 | * Return: true if it's ok to remove exception, false otherwise | ||
475 | */ | ||
476 | static bool parent_allows_removal(struct dev_cgroup *childcg, | ||
477 | struct dev_exception_item *ex) | ||
478 | { | ||
479 | struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css)); | ||
480 | |||
481 | if (!parent) | ||
482 | return true; | ||
483 | |||
484 | /* It's always allowed to remove access to devices */ | ||
485 | if (childcg->behavior == DEVCG_DEFAULT_DENY) | ||
486 | return true; | ||
487 | |||
488 | /* | ||
489 | * Make sure you're not removing part or a whole exception existing in | ||
490 | * the parent cgroup | ||
491 | */ | ||
492 | return !match_exception_partial(&parent->exceptions, ex->type, | ||
493 | ex->major, ex->minor, ex->access); | ||
382 | } | 494 | } |
383 | 495 | ||
384 | /** | 496 | /** |
@@ -616,17 +728,21 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, | |||
616 | 728 | ||
617 | switch (filetype) { | 729 | switch (filetype) { |
618 | case DEVCG_ALLOW: | 730 | case DEVCG_ALLOW: |
619 | if (!parent_has_perm(devcgroup, &ex)) | ||
620 | return -EPERM; | ||
621 | /* | 731 | /* |
622 | * If the default policy is to allow by default, try to remove | 732 | * If the default policy is to allow by default, try to remove |
623 | * an matching exception instead. And be silent about it: we | 733 | * an matching exception instead. And be silent about it: we |
624 | * don't want to break compatibility | 734 | * don't want to break compatibility |
625 | */ | 735 | */ |
626 | if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { | 736 | if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { |
737 | /* Check if the parent allows removing it first */ | ||
738 | if (!parent_allows_removal(devcgroup, &ex)) | ||
739 | return -EPERM; | ||
627 | dev_exception_rm(devcgroup, &ex); | 740 | dev_exception_rm(devcgroup, &ex); |
628 | return 0; | 741 | break; |
629 | } | 742 | } |
743 | |||
744 | if (!parent_has_perm(devcgroup, &ex)) | ||
745 | return -EPERM; | ||
630 | rc = dev_exception_add(devcgroup, &ex); | 746 | rc = dev_exception_add(devcgroup, &ex); |
631 | break; | 747 | break; |
632 | case DEVCG_DENY: | 748 | case DEVCG_DENY: |
@@ -704,18 +820,18 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor, | |||
704 | short access) | 820 | short access) |
705 | { | 821 | { |
706 | struct dev_cgroup *dev_cgroup; | 822 | struct dev_cgroup *dev_cgroup; |
707 | struct dev_exception_item ex; | 823 | bool rc; |
708 | int rc; | ||
709 | |||
710 | memset(&ex, 0, sizeof(ex)); | ||
711 | ex.type = type; | ||
712 | ex.major = major; | ||
713 | ex.minor = minor; | ||
714 | ex.access = access; | ||
715 | 824 | ||
716 | rcu_read_lock(); | 825 | rcu_read_lock(); |
717 | dev_cgroup = task_devcgroup(current); | 826 | dev_cgroup = task_devcgroup(current); |
718 | rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior); | 827 | if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) |
828 | /* Can't match any of the exceptions, even partially */ | ||
829 | rc = !match_exception_partial(&dev_cgroup->exceptions, | ||
830 | type, major, minor, access); | ||
831 | else | ||
832 | /* Need to match completely one exception to be allowed */ | ||
833 | rc = match_exception(&dev_cgroup->exceptions, type, major, | ||
834 | minor, access); | ||
719 | rcu_read_unlock(); | 835 | rcu_read_unlock(); |
720 | 836 | ||
721 | if (!rc) | 837 | if (!rc) |