diff options
author | Paul Menage <menage@google.com> | 2007-10-19 02:39:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-19 14:53:36 -0400 |
commit | 697f41610863c9264a7ae26dac9a387c9dda8c84 (patch) | |
tree | 9d4321f93abe36920a8e9d5239d120803f6cabbc | |
parent | b4f48b6363c81ca743ef46943ef23fd72e60f679 (diff) |
Task Control Groups: add cgroup_clone() interface
Add support for cgroup_clone(), a way to create new cgroups intended to
be used for systems such as namespace unsharing. A new subsystem callback,
post_clone(), is added to allow subsystems to automatically configure cloned
cgroups.
Signed-off-by: Paul Menage <menage@google.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/cgroups.txt | 7 | ||||
-rw-r--r-- | include/linux/cgroup.h | 3 | ||||
-rw-r--r-- | kernel/cgroup.c | 135 |
3 files changed, 145 insertions, 0 deletions
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt index 4717887fd75d..553727cc9944 100644 --- a/Documentation/cgroups.txt +++ b/Documentation/cgroups.txt | |||
@@ -504,6 +504,13 @@ include/linux/cgroup.h for details). Note that although this | |||
504 | method can return an error code, the error code is currently not | 504 | method can return an error code, the error code is currently not |
505 | always handled well. | 505 | always handled well. |
506 | 506 | ||
507 | void post_clone(struct cgroup_subsys *ss, struct cgroup *cont) | ||
508 | |||
509 | Called at the end of cgroup_clone() to do any paramater | ||
510 | initialization which might be required before a task could attach. For | ||
511 | example in cpusets, no task may attach before 'cpus' and 'mems' are set | ||
512 | up. | ||
513 | |||
507 | void bind(struct cgroup_subsys *ss, struct cgroup *root) | 514 | void bind(struct cgroup_subsys *ss, struct cgroup *root) |
508 | LL=callback_mutex | 515 | LL=callback_mutex |
509 | 516 | ||
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 792ad74be170..b21cf093ac62 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -182,6 +182,7 @@ struct cgroup_subsys { | |||
182 | void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); | 182 | void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); |
183 | int (*populate)(struct cgroup_subsys *ss, | 183 | int (*populate)(struct cgroup_subsys *ss, |
184 | struct cgroup *cont); | 184 | struct cgroup *cont); |
185 | void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cont); | ||
185 | void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); | 186 | void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); |
186 | int subsys_id; | 187 | int subsys_id; |
187 | int active; | 188 | int active; |
@@ -221,6 +222,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, | |||
221 | 222 | ||
222 | int cgroup_path(const struct cgroup *cont, char *buf, int buflen); | 223 | int cgroup_path(const struct cgroup *cont, char *buf, int buflen); |
223 | 224 | ||
225 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss); | ||
226 | |||
224 | #else /* !CONFIG_CGROUPS */ | 227 | #else /* !CONFIG_CGROUPS */ |
225 | 228 | ||
226 | static inline int cgroup_init_early(void) { return 0; } | 229 | static inline int cgroup_init_early(void) { return 0; } |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7bb520aaf0a3..cc68fe68a60e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1708,3 +1708,138 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
1708 | tsk->cgroups = init_task.cgroups; | 1708 | tsk->cgroups = init_task.cgroups; |
1709 | task_unlock(tsk); | 1709 | task_unlock(tsk); |
1710 | } | 1710 | } |
1711 | |||
1712 | /** | ||
1713 | * cgroup_clone - duplicate the current cgroup in the hierarchy | ||
1714 | * that the given subsystem is attached to, and move this task into | ||
1715 | * the new child | ||
1716 | */ | ||
1717 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) | ||
1718 | { | ||
1719 | struct dentry *dentry; | ||
1720 | int ret = 0; | ||
1721 | char nodename[MAX_CGROUP_TYPE_NAMELEN]; | ||
1722 | struct cgroup *parent, *child; | ||
1723 | struct inode *inode; | ||
1724 | struct css_set *cg; | ||
1725 | struct cgroupfs_root *root; | ||
1726 | struct cgroup_subsys *ss; | ||
1727 | |||
1728 | /* We shouldn't be called by an unregistered subsystem */ | ||
1729 | BUG_ON(!subsys->active); | ||
1730 | |||
1731 | /* First figure out what hierarchy and cgroup we're dealing | ||
1732 | * with, and pin them so we can drop cgroup_mutex */ | ||
1733 | mutex_lock(&cgroup_mutex); | ||
1734 | again: | ||
1735 | root = subsys->root; | ||
1736 | if (root == &rootnode) { | ||
1737 | printk(KERN_INFO | ||
1738 | "Not cloning cgroup for unused subsystem %s\n", | ||
1739 | subsys->name); | ||
1740 | mutex_unlock(&cgroup_mutex); | ||
1741 | return 0; | ||
1742 | } | ||
1743 | cg = &tsk->cgroups; | ||
1744 | parent = task_cgroup(tsk, subsys->subsys_id); | ||
1745 | |||
1746 | snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid); | ||
1747 | |||
1748 | /* Pin the hierarchy */ | ||
1749 | atomic_inc(&parent->root->sb->s_active); | ||
1750 | |||
1751 | mutex_unlock(&cgroup_mutex); | ||
1752 | |||
1753 | /* Now do the VFS work to create a cgroup */ | ||
1754 | inode = parent->dentry->d_inode; | ||
1755 | |||
1756 | /* Hold the parent directory mutex across this operation to | ||
1757 | * stop anyone else deleting the new cgroup */ | ||
1758 | mutex_lock(&inode->i_mutex); | ||
1759 | dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename)); | ||
1760 | if (IS_ERR(dentry)) { | ||
1761 | printk(KERN_INFO | ||
1762 | "Couldn't allocate dentry for %s: %ld\n", nodename, | ||
1763 | PTR_ERR(dentry)); | ||
1764 | ret = PTR_ERR(dentry); | ||
1765 | goto out_release; | ||
1766 | } | ||
1767 | |||
1768 | /* Create the cgroup directory, which also creates the cgroup */ | ||
1769 | ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755); | ||
1770 | child = __d_cont(dentry); | ||
1771 | dput(dentry); | ||
1772 | if (ret) { | ||
1773 | printk(KERN_INFO | ||
1774 | "Failed to create cgroup %s: %d\n", nodename, | ||
1775 | ret); | ||
1776 | goto out_release; | ||
1777 | } | ||
1778 | |||
1779 | if (!child) { | ||
1780 | printk(KERN_INFO | ||
1781 | "Couldn't find new cgroup %s\n", nodename); | ||
1782 | ret = -ENOMEM; | ||
1783 | goto out_release; | ||
1784 | } | ||
1785 | |||
1786 | /* The cgroup now exists. Retake cgroup_mutex and check | ||
1787 | * that we're still in the same state that we thought we | ||
1788 | * were. */ | ||
1789 | mutex_lock(&cgroup_mutex); | ||
1790 | if ((root != subsys->root) || | ||
1791 | (parent != task_cgroup(tsk, subsys->subsys_id))) { | ||
1792 | /* Aargh, we raced ... */ | ||
1793 | mutex_unlock(&inode->i_mutex); | ||
1794 | |||
1795 | deactivate_super(parent->root->sb); | ||
1796 | /* The cgroup is still accessible in the VFS, but | ||
1797 | * we're not going to try to rmdir() it at this | ||
1798 | * point. */ | ||
1799 | printk(KERN_INFO | ||
1800 | "Race in cgroup_clone() - leaking cgroup %s\n", | ||
1801 | nodename); | ||
1802 | goto again; | ||
1803 | } | ||
1804 | |||
1805 | /* do any required auto-setup */ | ||
1806 | for_each_subsys(root, ss) { | ||
1807 | if (ss->post_clone) | ||
1808 | ss->post_clone(ss, child); | ||
1809 | } | ||
1810 | |||
1811 | /* All seems fine. Finish by moving the task into the new cgroup */ | ||
1812 | ret = attach_task(child, tsk); | ||
1813 | mutex_unlock(&cgroup_mutex); | ||
1814 | |||
1815 | out_release: | ||
1816 | mutex_unlock(&inode->i_mutex); | ||
1817 | deactivate_super(parent->root->sb); | ||
1818 | return ret; | ||
1819 | } | ||
1820 | |||
1821 | /* | ||
1822 | * See if "cont" is a descendant of the current task's cgroup in | ||
1823 | * the appropriate hierarchy | ||
1824 | * | ||
1825 | * If we are sending in dummytop, then presumably we are creating | ||
1826 | * the top cgroup in the subsystem. | ||
1827 | * | ||
1828 | * Called only by the ns (nsproxy) cgroup. | ||
1829 | */ | ||
1830 | int cgroup_is_descendant(const struct cgroup *cont) | ||
1831 | { | ||
1832 | int ret; | ||
1833 | struct cgroup *target; | ||
1834 | int subsys_id; | ||
1835 | |||
1836 | if (cont == dummytop) | ||
1837 | return 1; | ||
1838 | |||
1839 | get_first_subsys(cont, NULL, &subsys_id); | ||
1840 | target = task_cgroup(current, subsys_id); | ||
1841 | while (cont != target && cont!= cont->top_cgroup) | ||
1842 | cont = cont->parent; | ||
1843 | ret = (cont == target); | ||
1844 | return ret; | ||
1845 | } | ||