aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-05-12 22:22:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-05-12 22:22:57 -0400
commit26a41cd1eeac299d0d7c505f8d38976a553c8fc4 (patch)
tree374461f6033b5c115274b3e3b1a1c23bf76755f5
parent619b5891903936f3e493bf8cda18a8b6664fcdd7 (diff)
parent36c38fb7144aa941dc072ba8f58b2dbe509c0345 (diff)
Merge branch 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "During recent restructuring, device_cgroup unified config input check and enforcement logic; unfortunately, it turned out to share too much. Aristeu's patches fix the breakage and marked for -stable backport. The other two patches are fallouts from kernfs conversion. The blkcg change is temporary and will go away once kernfs internal locking gets simplified (patches pending)" * 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: blkcg: use trylock on blkcg_pol_mutex in blkcg_reset_stats() device_cgroup: check if exception removal is allowed device_cgroup: fix the comment format for recently added functions device_cgroup: rework device access check and exception checking cgroup: fix the retry path of cgroup_mount()
-rw-r--r--block/blk-cgroup.c15
-rw-r--r--kernel/cgroup.c8
-rw-r--r--security/device_cgroup.c202
3 files changed, 177 insertions, 48 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e4a4145926f6..1039fb9ff5f5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -451,7 +451,20 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
451 struct blkcg_gq *blkg; 451 struct blkcg_gq *blkg;
452 int i; 452 int i;
453 453
454 mutex_lock(&blkcg_pol_mutex); 454 /*
455 * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex
456 * which ends up putting cgroup's internal cgroup_tree_mutex under
457 * it; however, cgroup_tree_mutex is nested above cgroup file
458 * active protection and grabbing blkcg_pol_mutex from a cgroup
459 * file operation creates a possible circular dependency. cgroup
460 * internal locking is planned to go through further simplification
461 * and this issue should go away soon. For now, let's trylock
462 * blkcg_pol_mutex and restart the write on failure.
463 *
464 * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com
465 */
466 if (!mutex_trylock(&blkcg_pol_mutex))
467 return restart_syscall();
455 spin_lock_irq(&blkcg->lock); 468 spin_lock_irq(&blkcg->lock);
456 469
457 /* 470 /*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9fcdaa705b6c..11a03d67635a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1495,7 +1495,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1495 */ 1495 */
1496 if (!use_task_css_set_links) 1496 if (!use_task_css_set_links)
1497 cgroup_enable_task_cg_lists(); 1497 cgroup_enable_task_cg_lists();
1498retry: 1498
1499 mutex_lock(&cgroup_tree_mutex); 1499 mutex_lock(&cgroup_tree_mutex);
1500 mutex_lock(&cgroup_mutex); 1500 mutex_lock(&cgroup_mutex);
1501 1501
@@ -1503,7 +1503,7 @@ retry:
1503 ret = parse_cgroupfs_options(data, &opts); 1503 ret = parse_cgroupfs_options(data, &opts);
1504 if (ret) 1504 if (ret)
1505 goto out_unlock; 1505 goto out_unlock;
1506 1506retry:
1507 /* look for a matching existing root */ 1507 /* look for a matching existing root */
1508 if (!opts.subsys_mask && !opts.none && !opts.name) { 1508 if (!opts.subsys_mask && !opts.none && !opts.name) {
1509 cgrp_dfl_root_visible = true; 1509 cgrp_dfl_root_visible = true;
@@ -1562,9 +1562,9 @@ retry:
1562 if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { 1562 if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
1563 mutex_unlock(&cgroup_mutex); 1563 mutex_unlock(&cgroup_mutex);
1564 mutex_unlock(&cgroup_tree_mutex); 1564 mutex_unlock(&cgroup_tree_mutex);
1565 kfree(opts.release_agent);
1566 kfree(opts.name);
1567 msleep(10); 1565 msleep(10);
1566 mutex_lock(&cgroup_tree_mutex);
1567 mutex_lock(&cgroup_mutex);
1568 goto retry; 1568 goto retry;
1569 } 1569 }
1570 1570
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 8365909f5f8c..9134dbf70d3e 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -306,57 +306,138 @@ static int devcgroup_seq_show(struct seq_file *m, void *v)
306} 306}
307 307
308/** 308/**
309 * may_access - verifies if a new exception is part of what is allowed 309 * match_exception - iterates the exception list trying to find a complete match
310 * by a dev cgroup based on the default policy + 310 * @exceptions: list of exceptions
311 * exceptions. This is used to make sure a child cgroup 311 * @type: device type (DEV_BLOCK or DEV_CHAR)
312 * won't have more privileges than its parent or to 312 * @major: device file major number, ~0 to match all
313 * verify if a certain access is allowed. 313 * @minor: device file minor number, ~0 to match all
314 * @dev_cgroup: dev cgroup to be tested against 314 * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
315 * @refex: new exception 315 *
316 * @behavior: behavior of the exception 316 * It is considered a complete match if an exception is found that will
317 * contain the entire range of provided parameters.
318 *
319 * Return: true in case it matches an exception completely
317 */ 320 */
318static bool may_access(struct dev_cgroup *dev_cgroup, 321static bool match_exception(struct list_head *exceptions, short type,
319 struct dev_exception_item *refex, 322 u32 major, u32 minor, short access)
320 enum devcg_behavior behavior)
321{ 323{
322 struct dev_exception_item *ex; 324 struct dev_exception_item *ex;
323 bool match = false;
324 325
325 rcu_lockdep_assert(rcu_read_lock_held() || 326 list_for_each_entry_rcu(ex, exceptions, list) {
326 lockdep_is_held(&devcgroup_mutex), 327 if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
327 "device_cgroup::may_access() called without proper synchronization"); 328 continue;
329 if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
330 continue;
331 if (ex->major != ~0 && ex->major != major)
332 continue;
333 if (ex->minor != ~0 && ex->minor != minor)
334 continue;
335 /* provided access cannot have more than the exception rule */
336 if (access & (~ex->access))
337 continue;
338 return true;
339 }
340 return false;
341}
342
343/**
344 * match_exception_partial - iterates the exception list trying to find a partial match
345 * @exceptions: list of exceptions
346 * @type: device type (DEV_BLOCK or DEV_CHAR)
347 * @major: device file major number, ~0 to match all
348 * @minor: device file minor number, ~0 to match all
349 * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
350 *
351 * It is considered a partial match if an exception's range is found to
352 * contain *any* of the devices specified by provided parameters. This is
353 * used to make sure no extra access is being granted that is forbidden by
354 * any of the exception list.
355 *
356 * Return: true in case the provided range mat matches an exception completely
357 */
358static bool match_exception_partial(struct list_head *exceptions, short type,
359 u32 major, u32 minor, short access)
360{
361 struct dev_exception_item *ex;
328 362
329 list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) { 363 list_for_each_entry_rcu(ex, exceptions, list) {
330 if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) 364 if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
331 continue; 365 continue;
332 if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR)) 366 if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
333 continue; 367 continue;
334 if (ex->major != ~0 && ex->major != refex->major) 368 /*
369 * We must be sure that both the exception and the provided
370 * range aren't masking all devices
371 */
372 if (ex->major != ~0 && major != ~0 && ex->major != major)
335 continue; 373 continue;
336 if (ex->minor != ~0 && ex->minor != refex->minor) 374 if (ex->minor != ~0 && minor != ~0 && ex->minor != minor)
337 continue; 375 continue;
338 if (refex->access & (~ex->access)) 376 /*
377 * In order to make sure the provided range isn't matching
378 * an exception, all its access bits shouldn't match the
379 * exception's access bits
380 */
381 if (!(access & ex->access))
339 continue; 382 continue;
340 match = true; 383 return true;
341 break;
342 } 384 }
385 return false;
386}
387
388/**
389 * verify_new_ex - verifies if a new exception is allowed by parent cgroup's permissions
390 * @dev_cgroup: dev cgroup to be tested against
391 * @refex: new exception
392 * @behavior: behavior of the exception's dev_cgroup
393 *
394 * This is used to make sure a child cgroup won't have more privileges
395 * than its parent
396 */
397static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
398 struct dev_exception_item *refex,
399 enum devcg_behavior behavior)
400{
401 bool match = false;
402
403 rcu_lockdep_assert(rcu_read_lock_held() ||
404 lockdep_is_held(&devcgroup_mutex),
405 "device_cgroup:verify_new_ex called without proper synchronization");
343 406
344 if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { 407 if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
345 if (behavior == DEVCG_DEFAULT_ALLOW) { 408 if (behavior == DEVCG_DEFAULT_ALLOW) {
346 /* the exception will deny access to certain devices */ 409 /*
410 * new exception in the child doesn't matter, only
411 * adding extra restrictions
412 */
347 return true; 413 return true;
348 } else { 414 } else {
349 /* the exception will allow access to certain devices */ 415 /*
416 * new exception in the child will add more devices
417 * that can be acessed, so it can't match any of
418 * parent's exceptions, even slightly
419 */
420 match = match_exception_partial(&dev_cgroup->exceptions,
421 refex->type,
422 refex->major,
423 refex->minor,
424 refex->access);
425
350 if (match) 426 if (match)
351 /*
352 * a new exception allowing access shouldn't
353 * match an parent's exception
354 */
355 return false; 427 return false;
356 return true; 428 return true;
357 } 429 }
358 } else { 430 } else {
359 /* only behavior == DEVCG_DEFAULT_DENY allowed here */ 431 /*
432 * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore
433 * the new exception will add access to more devices and must
434 * be contained completely in an parent's exception to be
435 * allowed
436 */
437 match = match_exception(&dev_cgroup->exceptions, refex->type,
438 refex->major, refex->minor,
439 refex->access);
440
360 if (match) 441 if (match)
361 /* parent has an exception that matches the proposed */ 442 /* parent has an exception that matches the proposed */
362 return true; 443 return true;
@@ -378,7 +459,38 @@ static int parent_has_perm(struct dev_cgroup *childcg,
378 459
379 if (!parent) 460 if (!parent)
380 return 1; 461 return 1;
381 return may_access(parent, ex, childcg->behavior); 462 return verify_new_ex(parent, ex, childcg->behavior);
463}
464
465/**
466 * parent_allows_removal - verify if it's ok to remove an exception
467 * @childcg: child cgroup from where the exception will be removed
468 * @ex: exception being removed
469 *
470 * When removing an exception in cgroups with default ALLOW policy, it must
471 * be checked if removing it will give the child cgroup more access than the
472 * parent.
473 *
474 * Return: true if it's ok to remove exception, false otherwise
475 */
476static bool parent_allows_removal(struct dev_cgroup *childcg,
477 struct dev_exception_item *ex)
478{
479 struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
480
481 if (!parent)
482 return true;
483
484 /* It's always allowed to remove access to devices */
485 if (childcg->behavior == DEVCG_DEFAULT_DENY)
486 return true;
487
488 /*
489 * Make sure you're not removing part or a whole exception existing in
490 * the parent cgroup
491 */
492 return !match_exception_partial(&parent->exceptions, ex->type,
493 ex->major, ex->minor, ex->access);
382} 494}
383 495
384/** 496/**
@@ -616,17 +728,21 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
616 728
617 switch (filetype) { 729 switch (filetype) {
618 case DEVCG_ALLOW: 730 case DEVCG_ALLOW:
619 if (!parent_has_perm(devcgroup, &ex))
620 return -EPERM;
621 /* 731 /*
622 * If the default policy is to allow by default, try to remove 732 * If the default policy is to allow by default, try to remove
623 * an matching exception instead. And be silent about it: we 733 * an matching exception instead. And be silent about it: we
624 * don't want to break compatibility 734 * don't want to break compatibility
625 */ 735 */
626 if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { 736 if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
737 /* Check if the parent allows removing it first */
738 if (!parent_allows_removal(devcgroup, &ex))
739 return -EPERM;
627 dev_exception_rm(devcgroup, &ex); 740 dev_exception_rm(devcgroup, &ex);
628 return 0; 741 break;
629 } 742 }
743
744 if (!parent_has_perm(devcgroup, &ex))
745 return -EPERM;
630 rc = dev_exception_add(devcgroup, &ex); 746 rc = dev_exception_add(devcgroup, &ex);
631 break; 747 break;
632 case DEVCG_DENY: 748 case DEVCG_DENY:
@@ -704,18 +820,18 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor,
704 short access) 820 short access)
705{ 821{
706 struct dev_cgroup *dev_cgroup; 822 struct dev_cgroup *dev_cgroup;
707 struct dev_exception_item ex; 823 bool rc;
708 int rc;
709
710 memset(&ex, 0, sizeof(ex));
711 ex.type = type;
712 ex.major = major;
713 ex.minor = minor;
714 ex.access = access;
715 824
716 rcu_read_lock(); 825 rcu_read_lock();
717 dev_cgroup = task_devcgroup(current); 826 dev_cgroup = task_devcgroup(current);
718 rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior); 827 if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW)
828 /* Can't match any of the exceptions, even partially */
829 rc = !match_exception_partial(&dev_cgroup->exceptions,
830 type, major, minor, access);
831 else
832 /* Need to match completely one exception to be allowed */
833 rc = match_exception(&dev_cgroup->exceptions, type, major,
834 minor, access);
719 rcu_read_unlock(); 835 rcu_read_unlock();
720 836
721 if (!rc) 837 if (!rc)