aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-06-28 20:07:30 -0400
committerTejun Heo <tj@kernel.org>2013-07-12 15:34:02 -0400
commit3126121fb30941552b1a806c7c2e686bde57e270 (patch)
treeea545b574e3658e0dc5282fb6f1ba88b73b26144
parentb420ba7db15659253d4f286a0ba479d336371999 (diff)
cgroup: make rebind_subsystems() handle file additions and removals with proper error handling
Currently, creating and removing cgroup files in the root directory are handled separately from the actual subsystem binding and unbinding which happens in rebind_subsystems(). Also, rebind_subsystems() users aren't handling file creation errors properly. Let's integrate top_cgroup file handling into rebind_subsystems() so that it's simpler to use and everyone handles file creation errors correctly. * On a successful return, rebind_subsystems() is guaranteed to have created all files of the new subsystems and deleted the ones belonging to the removed subsystems. After a failure, no file is created or removed. * cgroup_remount() no longer needs to make explicit populate/clear calls as it's all handled by rebind_subsystems(), and it gets proper error handling automatically. * cgroup_mount() has been updated such that the root dentry and cgroup are linked before rebind_subsystems(). Also, the init_cred dancing and base file handling are moved right above rebind_subsystems() call and proper error handling for the base files is added. While at it, add a comment explaining what's going on with the cred thing. * cgroup_kill_sb() calls rebind_subsystems() to unbind all subsystems which now implies removing all subsystem files which requires the directory's i_mutex. Grab it. This means that files on the root cgroup are removed earlier - they used to be deleted from generic super_block cleanup from vfs. This doesn't lead to any functional difference and it's cleaner to do the clean up explicitly for all files. Combined with the previous changes, this makes all cgroup file creation errors handled correctly. v2: Added comment on init_cred. v3: Li spotted that cgroup_mount() wasn't freeing tmp_links after base file addition failure. Fix it by adding free_tmp_links error handling label. v4: v3 introduced build bugs which got noticed by Fengguang's awesome kbuild test robot. Fixed, and shame on me. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com> Cc: Fengguang Wu <fengguang.wu@intel.com>
-rw-r--r--kernel/cgroup.c73
1 files changed, 41 insertions, 32 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8f70dc0c0c79..4ec8d2da94d1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1003,7 +1003,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
1003{ 1003{
1004 struct cgroup *cgrp = &root->top_cgroup; 1004 struct cgroup *cgrp = &root->top_cgroup;
1005 struct cgroup_subsys *ss; 1005 struct cgroup_subsys *ss;
1006 int i; 1006 int i, ret;
1007 1007
1008 BUG_ON(!mutex_is_locked(&cgroup_mutex)); 1008 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1009 BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); 1009 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
@@ -1028,7 +1028,16 @@ static int rebind_subsystems(struct cgroupfs_root *root,
1028 if (root->number_of_cgroups > 1) 1028 if (root->number_of_cgroups > 1)
1029 return -EBUSY; 1029 return -EBUSY;
1030 1030
1031 /* Process each subsystem */ 1031 ret = cgroup_populate_dir(cgrp, added_mask);
1032 if (ret)
1033 return ret;
1034
1035 /*
1036 * Nothing can fail from this point on. Remove files for the
1037 * removed subsystems and rebind each subsystem.
1038 */
1039 cgroup_clear_dir(cgrp, removed_mask);
1040
1032 for_each_subsys(ss, i) { 1041 for_each_subsys(ss, i) {
1033 unsigned long bit = 1UL << i; 1042 unsigned long bit = 1UL << i;
1034 1043
@@ -1364,22 +1373,9 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1364 goto out_unlock; 1373 goto out_unlock;
1365 } 1374 }
1366 1375
1367 /*
1368 * Clear out the files of subsystems that should be removed, do
1369 * this before rebind_subsystems, since rebind_subsystems may
1370 * change this hierarchy's subsys_list.
1371 */
1372 cgroup_clear_dir(cgrp, removed_mask);
1373
1374 ret = rebind_subsystems(root, added_mask, removed_mask); 1376 ret = rebind_subsystems(root, added_mask, removed_mask);
1375 if (ret) { 1377 if (ret)
1376 /* rebind_subsystems failed, re-populate the removed files */
1377 cgroup_populate_dir(cgrp, removed_mask);
1378 goto out_unlock; 1378 goto out_unlock;
1379 }
1380
1381 /* re-populate subsystem files */
1382 cgroup_populate_dir(cgrp, added_mask);
1383 1379
1384 if (opts.release_agent) 1380 if (opts.release_agent)
1385 strcpy(root->release_agent_path, opts.release_agent); 1381 strcpy(root->release_agent_path, opts.release_agent);
@@ -1578,7 +1574,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1578 int ret = 0; 1574 int ret = 0;
1579 struct super_block *sb; 1575 struct super_block *sb;
1580 struct cgroupfs_root *new_root; 1576 struct cgroupfs_root *new_root;
1577 struct list_head tmp_links;
1581 struct inode *inode; 1578 struct inode *inode;
1579 const struct cred *cred;
1582 1580
1583 /* First find the desired set of subsystems */ 1581 /* First find the desired set of subsystems */
1584 mutex_lock(&cgroup_mutex); 1582 mutex_lock(&cgroup_mutex);
@@ -1610,10 +1608,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1610 BUG_ON(!root); 1608 BUG_ON(!root);
1611 if (root == opts.new_root) { 1609 if (root == opts.new_root) {
1612 /* We used the new root structure, so this is a new hierarchy */ 1610 /* We used the new root structure, so this is a new hierarchy */
1613 struct list_head tmp_links;
1614 struct cgroup *root_cgrp = &root->top_cgroup; 1611 struct cgroup *root_cgrp = &root->top_cgroup;
1615 struct cgroupfs_root *existing_root; 1612 struct cgroupfs_root *existing_root;
1616 const struct cred *cred;
1617 int i; 1613 int i;
1618 struct css_set *cset; 1614 struct css_set *cset;
1619 1615
@@ -1651,26 +1647,37 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1651 if (ret) 1647 if (ret)
1652 goto unlock_drop; 1648 goto unlock_drop;
1653 1649
1650 sb->s_root->d_fsdata = root_cgrp;
1651 root_cgrp->dentry = sb->s_root;
1652
1653 /*
1654 * We're inside get_sb() and will call lookup_one_len() to
1655 * create the root files, which doesn't work if SELinux is
1656 * in use. The following cred dancing somehow works around
1657 * it. See 2ce9738ba ("cgroupfs: use init_cred when
1658 * populating new cgroupfs mount") for more details.
1659 */
1660 cred = override_creds(&init_cred);
1661
1662 ret = cgroup_addrm_files(root_cgrp, NULL, cgroup_base_files, true);
1663 if (ret)
1664 goto rm_base_files;
1665
1654 ret = rebind_subsystems(root, root->subsys_mask, 0); 1666 ret = rebind_subsystems(root, root->subsys_mask, 0);
1655 if (ret == -EBUSY) { 1667 if (ret)
1656 free_cgrp_cset_links(&tmp_links); 1668 goto rm_base_files;
1657 goto unlock_drop; 1669
1658 } 1670 revert_creds(cred);
1671
1659 /* 1672 /*
1660 * There must be no failure case after here, since rebinding 1673 * There must be no failure case after here, since rebinding
1661 * takes care of subsystems' refcounts, which are explicitly 1674 * takes care of subsystems' refcounts, which are explicitly
1662 * dropped in the failure exit path. 1675 * dropped in the failure exit path.
1663 */ 1676 */
1664 1677
1665 /* EBUSY should be the only error here */
1666 BUG_ON(ret);
1667
1668 list_add(&root->root_list, &cgroup_roots); 1678 list_add(&root->root_list, &cgroup_roots);
1669 cgroup_root_count++; 1679 cgroup_root_count++;
1670 1680
1671 sb->s_root->d_fsdata = root_cgrp;
1672 root->top_cgroup.dentry = sb->s_root;
1673
1674 /* Link the top cgroup in this hierarchy into all 1681 /* Link the top cgroup in this hierarchy into all
1675 * the css_set objects */ 1682 * the css_set objects */
1676 write_lock(&css_set_lock); 1683 write_lock(&css_set_lock);
@@ -1683,10 +1690,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1683 BUG_ON(!list_empty(&root_cgrp->children)); 1690 BUG_ON(!list_empty(&root_cgrp->children));
1684 BUG_ON(root->number_of_cgroups != 1); 1691 BUG_ON(root->number_of_cgroups != 1);
1685 1692
1686 cred = override_creds(&init_cred);
1687 cgroup_addrm_files(root_cgrp, NULL, cgroup_base_files, true);
1688 cgroup_populate_dir(root_cgrp, root->subsys_mask);
1689 revert_creds(cred);
1690 mutex_unlock(&cgroup_root_mutex); 1693 mutex_unlock(&cgroup_root_mutex);
1691 mutex_unlock(&cgroup_mutex); 1694 mutex_unlock(&cgroup_mutex);
1692 mutex_unlock(&inode->i_mutex); 1695 mutex_unlock(&inode->i_mutex);
@@ -1715,6 +1718,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1715 kfree(opts.name); 1718 kfree(opts.name);
1716 return dget(sb->s_root); 1719 return dget(sb->s_root);
1717 1720
1721 rm_base_files:
1722 free_cgrp_cset_links(&tmp_links);
1723 cgroup_addrm_files(&root->top_cgroup, NULL, cgroup_base_files, false);
1724 revert_creds(cred);
1718 unlock_drop: 1725 unlock_drop:
1719 cgroup_exit_root_id(root); 1726 cgroup_exit_root_id(root);
1720 mutex_unlock(&cgroup_root_mutex); 1727 mutex_unlock(&cgroup_root_mutex);
@@ -1741,6 +1748,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
1741 BUG_ON(root->number_of_cgroups != 1); 1748 BUG_ON(root->number_of_cgroups != 1);
1742 BUG_ON(!list_empty(&cgrp->children)); 1749 BUG_ON(!list_empty(&cgrp->children));
1743 1750
1751 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1744 mutex_lock(&cgroup_mutex); 1752 mutex_lock(&cgroup_mutex);
1745 mutex_lock(&cgroup_root_mutex); 1753 mutex_lock(&cgroup_root_mutex);
1746 1754
@@ -1773,6 +1781,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
1773 1781
1774 mutex_unlock(&cgroup_root_mutex); 1782 mutex_unlock(&cgroup_root_mutex);
1775 mutex_unlock(&cgroup_mutex); 1783 mutex_unlock(&cgroup_mutex);
1784 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1776 1785
1777 simple_xattrs_free(&cgrp->xattrs); 1786 simple_xattrs_free(&cgrp->xattrs);
1778 1787