aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Menage <menage@google.com>2007-10-19 02:39:34 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:36 -0400
commit697f41610863c9264a7ae26dac9a387c9dda8c84 (patch)
tree9d4321f93abe36920a8e9d5239d120803f6cabbc
parentb4f48b6363c81ca743ef46943ef23fd72e60f679 (diff)
Task Control Groups: add cgroup_clone() interface
Add support for cgroup_clone(), a way to create new cgroups intended to be used for systems such as namespace unsharing. A new subsystem callback, post_clone(), is added to allow subsystems to automatically configure cloned cgroups. Signed-off-by: Paul Menage <menage@google.com> Cc: Serge E. Hallyn <serue@us.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Jackson <pj@sgi.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/cgroups.txt7
-rw-r--r--include/linux/cgroup.h3
-rw-r--r--kernel/cgroup.c135
3 files changed, 145 insertions, 0 deletions
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt
index 4717887fd75d..553727cc9944 100644
--- a/Documentation/cgroups.txt
+++ b/Documentation/cgroups.txt
@@ -504,6 +504,13 @@ include/linux/cgroup.h for details). Note that although this
504method can return an error code, the error code is currently not 504method can return an error code, the error code is currently not
505always handled well. 505always handled well.
506 506
507void post_clone(struct cgroup_subsys *ss, struct cgroup *cont)
508
509Called at the end of cgroup_clone() to do any paramater
510initialization which might be required before a task could attach. For
511example in cpusets, no task may attach before 'cpus' and 'mems' are set
512up.
513
507void bind(struct cgroup_subsys *ss, struct cgroup *root) 514void bind(struct cgroup_subsys *ss, struct cgroup *root)
508LL=callback_mutex 515LL=callback_mutex
509 516
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 792ad74be170..b21cf093ac62 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -182,6 +182,7 @@ struct cgroup_subsys {
182 void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); 182 void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
183 int (*populate)(struct cgroup_subsys *ss, 183 int (*populate)(struct cgroup_subsys *ss,
184 struct cgroup *cont); 184 struct cgroup *cont);
185 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cont);
185 void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); 186 void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
186 int subsys_id; 187 int subsys_id;
187 int active; 188 int active;
@@ -221,6 +222,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
221 222
222int cgroup_path(const struct cgroup *cont, char *buf, int buflen); 223int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
223 224
225int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
226
224#else /* !CONFIG_CGROUPS */ 227#else /* !CONFIG_CGROUPS */
225 228
226static inline int cgroup_init_early(void) { return 0; } 229static inline int cgroup_init_early(void) { return 0; }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7bb520aaf0a3..cc68fe68a60e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1708,3 +1708,138 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
1708 tsk->cgroups = init_task.cgroups; 1708 tsk->cgroups = init_task.cgroups;
1709 task_unlock(tsk); 1709 task_unlock(tsk);
1710} 1710}
1711
1712/**
1713 * cgroup_clone - duplicate the current cgroup in the hierarchy
1714 * that the given subsystem is attached to, and move this task into
1715 * the new child
1716 */
1717int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
1718{
1719 struct dentry *dentry;
1720 int ret = 0;
1721 char nodename[MAX_CGROUP_TYPE_NAMELEN];
1722 struct cgroup *parent, *child;
1723 struct inode *inode;
1724 struct css_set *cg;
1725 struct cgroupfs_root *root;
1726 struct cgroup_subsys *ss;
1727
1728 /* We shouldn't be called by an unregistered subsystem */
1729 BUG_ON(!subsys->active);
1730
1731 /* First figure out what hierarchy and cgroup we're dealing
1732 * with, and pin them so we can drop cgroup_mutex */
1733 mutex_lock(&cgroup_mutex);
1734 again:
1735 root = subsys->root;
1736 if (root == &rootnode) {
1737 printk(KERN_INFO
1738 "Not cloning cgroup for unused subsystem %s\n",
1739 subsys->name);
1740 mutex_unlock(&cgroup_mutex);
1741 return 0;
1742 }
1743 cg = &tsk->cgroups;
1744 parent = task_cgroup(tsk, subsys->subsys_id);
1745
1746 snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid);
1747
1748 /* Pin the hierarchy */
1749 atomic_inc(&parent->root->sb->s_active);
1750
1751 mutex_unlock(&cgroup_mutex);
1752
1753 /* Now do the VFS work to create a cgroup */
1754 inode = parent->dentry->d_inode;
1755
1756 /* Hold the parent directory mutex across this operation to
1757 * stop anyone else deleting the new cgroup */
1758 mutex_lock(&inode->i_mutex);
1759 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
1760 if (IS_ERR(dentry)) {
1761 printk(KERN_INFO
1762 "Couldn't allocate dentry for %s: %ld\n", nodename,
1763 PTR_ERR(dentry));
1764 ret = PTR_ERR(dentry);
1765 goto out_release;
1766 }
1767
1768 /* Create the cgroup directory, which also creates the cgroup */
1769 ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
1770 child = __d_cont(dentry);
1771 dput(dentry);
1772 if (ret) {
1773 printk(KERN_INFO
1774 "Failed to create cgroup %s: %d\n", nodename,
1775 ret);
1776 goto out_release;
1777 }
1778
1779 if (!child) {
1780 printk(KERN_INFO
1781 "Couldn't find new cgroup %s\n", nodename);
1782 ret = -ENOMEM;
1783 goto out_release;
1784 }
1785
1786 /* The cgroup now exists. Retake cgroup_mutex and check
1787 * that we're still in the same state that we thought we
1788 * were. */
1789 mutex_lock(&cgroup_mutex);
1790 if ((root != subsys->root) ||
1791 (parent != task_cgroup(tsk, subsys->subsys_id))) {
1792 /* Aargh, we raced ... */
1793 mutex_unlock(&inode->i_mutex);
1794
1795 deactivate_super(parent->root->sb);
1796 /* The cgroup is still accessible in the VFS, but
1797 * we're not going to try to rmdir() it at this
1798 * point. */
1799 printk(KERN_INFO
1800 "Race in cgroup_clone() - leaking cgroup %s\n",
1801 nodename);
1802 goto again;
1803 }
1804
1805 /* do any required auto-setup */
1806 for_each_subsys(root, ss) {
1807 if (ss->post_clone)
1808 ss->post_clone(ss, child);
1809 }
1810
1811 /* All seems fine. Finish by moving the task into the new cgroup */
1812 ret = attach_task(child, tsk);
1813 mutex_unlock(&cgroup_mutex);
1814
1815 out_release:
1816 mutex_unlock(&inode->i_mutex);
1817 deactivate_super(parent->root->sb);
1818 return ret;
1819}
1820
1821/*
1822 * See if "cont" is a descendant of the current task's cgroup in
1823 * the appropriate hierarchy
1824 *
1825 * If we are sending in dummytop, then presumably we are creating
1826 * the top cgroup in the subsystem.
1827 *
1828 * Called only by the ns (nsproxy) cgroup.
1829 */
1830int cgroup_is_descendant(const struct cgroup *cont)
1831{
1832 int ret;
1833 struct cgroup *target;
1834 int subsys_id;
1835
1836 if (cont == dummytop)
1837 return 1;
1838
1839 get_first_subsys(cont, NULL, &subsys_id);
1840 target = task_cgroup(current, subsys_id);
1841 while (cont != target && cont!= cont->top_cgroup)
1842 cont = cont->parent;
1843 ret = (cont == target);
1844 return ret;
1845}