aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c614
1 files changed, 425 insertions, 189 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 25c7eb52de1a..2731d115d725 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,6 +57,7 @@
57#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ 57#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
58#include <linux/eventfd.h> 58#include <linux/eventfd.h>
59#include <linux/poll.h> 59#include <linux/poll.h>
60#include <linux/flex_array.h> /* used in cgroup_attach_proc */
60 61
61#include <asm/atomic.h> 62#include <asm/atomic.h>
62 63
@@ -326,12 +327,6 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
326 return &css_set_table[index]; 327 return &css_set_table[index];
327} 328}
328 329
329static void free_css_set_rcu(struct rcu_head *obj)
330{
331 struct css_set *cg = container_of(obj, struct css_set, rcu_head);
332 kfree(cg);
333}
334
335/* We don't maintain the lists running through each css_set to its 330/* We don't maintain the lists running through each css_set to its
336 * task until after the first call to cgroup_iter_start(). This 331 * task until after the first call to cgroup_iter_start(). This
337 * reduces the fork()/exit() overhead for people who have cgroups 332 * reduces the fork()/exit() overhead for people who have cgroups
@@ -375,7 +370,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
375 } 370 }
376 371
377 write_unlock(&css_set_lock); 372 write_unlock(&css_set_lock);
378 call_rcu(&cg->rcu_head, free_css_set_rcu); 373 kfree_rcu(cg, rcu_head);
379} 374}
380 375
381/* 376/*
@@ -812,13 +807,6 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
812 return ret; 807 return ret;
813} 808}
814 809
815static void free_cgroup_rcu(struct rcu_head *obj)
816{
817 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
818
819 kfree(cgrp);
820}
821
822static void cgroup_diput(struct dentry *dentry, struct inode *inode) 810static void cgroup_diput(struct dentry *dentry, struct inode *inode)
823{ 811{
824 /* is dentry a directory ? if so, kfree() associated cgroup */ 812 /* is dentry a directory ? if so, kfree() associated cgroup */
@@ -856,7 +844,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
856 */ 844 */
857 BUG_ON(!list_empty(&cgrp->pidlists)); 845 BUG_ON(!list_empty(&cgrp->pidlists));
858 846
859 call_rcu(&cgrp->rcu_head, free_cgroup_rcu); 847 kfree_rcu(cgrp, rcu_head);
860 } 848 }
861 iput(inode); 849 iput(inode);
862} 850}
@@ -1748,6 +1736,76 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1748} 1736}
1749EXPORT_SYMBOL_GPL(cgroup_path); 1737EXPORT_SYMBOL_GPL(cgroup_path);
1750 1738
1739/*
1740 * cgroup_task_migrate - move a task from one cgroup to another.
1741 *
1742 * 'guarantee' is set if the caller promises that a new css_set for the task
1743 * will already exist. If not set, this function might sleep, and can fail with
1744 * -ENOMEM. Otherwise, it can only fail with -ESRCH.
1745 */
1746static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1747 struct task_struct *tsk, bool guarantee)
1748{
1749 struct css_set *oldcg;
1750 struct css_set *newcg;
1751
1752 /*
1753 * get old css_set. we need to take task_lock and refcount it, because
1754 * an exiting task can change its css_set to init_css_set and drop its
1755 * old one without taking cgroup_mutex.
1756 */
1757 task_lock(tsk);
1758 oldcg = tsk->cgroups;
1759 get_css_set(oldcg);
1760 task_unlock(tsk);
1761
1762 /* locate or allocate a new css_set for this task. */
1763 if (guarantee) {
1764 /* we know the css_set we want already exists. */
1765 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1766 read_lock(&css_set_lock);
1767 newcg = find_existing_css_set(oldcg, cgrp, template);
1768 BUG_ON(!newcg);
1769 get_css_set(newcg);
1770 read_unlock(&css_set_lock);
1771 } else {
1772 might_sleep();
1773 /* find_css_set will give us newcg already referenced. */
1774 newcg = find_css_set(oldcg, cgrp);
1775 if (!newcg) {
1776 put_css_set(oldcg);
1777 return -ENOMEM;
1778 }
1779 }
1780 put_css_set(oldcg);
1781
1782 /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */
1783 task_lock(tsk);
1784 if (tsk->flags & PF_EXITING) {
1785 task_unlock(tsk);
1786 put_css_set(newcg);
1787 return -ESRCH;
1788 }
1789 rcu_assign_pointer(tsk->cgroups, newcg);
1790 task_unlock(tsk);
1791
1792 /* Update the css_set linked lists if we're using them */
1793 write_lock(&css_set_lock);
1794 if (!list_empty(&tsk->cg_list))
1795 list_move(&tsk->cg_list, &newcg->tasks);
1796 write_unlock(&css_set_lock);
1797
1798 /*
1799 * We just gained a reference on oldcg by taking it from the task. As
1800 * trading it for newcg is protected by cgroup_mutex, we're safe to drop
1801 * it here; it will be freed under RCU.
1802 */
1803 put_css_set(oldcg);
1804
1805 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1806 return 0;
1807}
1808
1751/** 1809/**
1752 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' 1810 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
1753 * @cgrp: the cgroup the task is attaching to 1811 * @cgrp: the cgroup the task is attaching to
@@ -1758,11 +1816,9 @@ EXPORT_SYMBOL_GPL(cgroup_path);
1758 */ 1816 */
1759int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1817int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1760{ 1818{
1761 int retval = 0; 1819 int retval;
1762 struct cgroup_subsys *ss, *failed_ss = NULL; 1820 struct cgroup_subsys *ss, *failed_ss = NULL;
1763 struct cgroup *oldcgrp; 1821 struct cgroup *oldcgrp;
1764 struct css_set *cg;
1765 struct css_set *newcg;
1766 struct cgroupfs_root *root = cgrp->root; 1822 struct cgroupfs_root *root = cgrp->root;
1767 1823
1768 /* Nothing to do if the task is already in that cgroup */ 1824 /* Nothing to do if the task is already in that cgroup */
@@ -1772,7 +1828,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1772 1828
1773 for_each_subsys(root, ss) { 1829 for_each_subsys(root, ss) {
1774 if (ss->can_attach) { 1830 if (ss->can_attach) {
1775 retval = ss->can_attach(ss, cgrp, tsk, false); 1831 retval = ss->can_attach(ss, cgrp, tsk);
1776 if (retval) { 1832 if (retval) {
1777 /* 1833 /*
1778 * Remember on which subsystem the can_attach() 1834 * Remember on which subsystem the can_attach()
@@ -1784,46 +1840,29 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1784 goto out; 1840 goto out;
1785 } 1841 }
1786 } 1842 }
1843 if (ss->can_attach_task) {
1844 retval = ss->can_attach_task(cgrp, tsk);
1845 if (retval) {
1846 failed_ss = ss;
1847 goto out;
1848 }
1849 }
1787 } 1850 }
1788 1851
1789 task_lock(tsk); 1852 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
1790 cg = tsk->cgroups; 1853 if (retval)
1791 get_css_set(cg);
1792 task_unlock(tsk);
1793 /*
1794 * Locate or allocate a new css_set for this task,
1795 * based on its final set of cgroups
1796 */
1797 newcg = find_css_set(cg, cgrp);
1798 put_css_set(cg);
1799 if (!newcg) {
1800 retval = -ENOMEM;
1801 goto out; 1854 goto out;
1802 }
1803
1804 task_lock(tsk);
1805 if (tsk->flags & PF_EXITING) {
1806 task_unlock(tsk);
1807 put_css_set(newcg);
1808 retval = -ESRCH;
1809 goto out;
1810 }
1811 rcu_assign_pointer(tsk->cgroups, newcg);
1812 task_unlock(tsk);
1813
1814 /* Update the css_set linked lists if we're using them */
1815 write_lock(&css_set_lock);
1816 if (!list_empty(&tsk->cg_list))
1817 list_move(&tsk->cg_list, &newcg->tasks);
1818 write_unlock(&css_set_lock);
1819 1855
1820 for_each_subsys(root, ss) { 1856 for_each_subsys(root, ss) {
1857 if (ss->pre_attach)
1858 ss->pre_attach(cgrp);
1859 if (ss->attach_task)
1860 ss->attach_task(cgrp, tsk);
1821 if (ss->attach) 1861 if (ss->attach)
1822 ss->attach(ss, cgrp, oldcgrp, tsk, false); 1862 ss->attach(ss, cgrp, oldcgrp, tsk);
1823 } 1863 }
1824 set_bit(CGRP_RELEASABLE, &oldcgrp->flags); 1864
1825 synchronize_rcu(); 1865 synchronize_rcu();
1826 put_css_set(cg);
1827 1866
1828 /* 1867 /*
1829 * wake up rmdir() waiter. the rmdir should fail since the cgroup 1868 * wake up rmdir() waiter. the rmdir should fail since the cgroup
@@ -1842,7 +1881,7 @@ out:
1842 */ 1881 */
1843 break; 1882 break;
1844 if (ss->cancel_attach) 1883 if (ss->cancel_attach)
1845 ss->cancel_attach(ss, cgrp, tsk, false); 1884 ss->cancel_attach(ss, cgrp, tsk);
1846 } 1885 }
1847 } 1886 }
1848 return retval; 1887 return retval;
@@ -1873,49 +1912,370 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1873EXPORT_SYMBOL_GPL(cgroup_attach_task_all); 1912EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1874 1913
1875/* 1914/*
1876 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex 1915 * cgroup_attach_proc works in two stages, the first of which prefetches all
1877 * held. May take task_lock of task 1916 * new css_sets needed (to make sure we have enough memory before committing
1917 * to the move) and stores them in a list of entries of the following type.
1918 * TODO: possible optimization: use css_set->rcu_head for chaining instead
1919 */
1920struct cg_list_entry {
1921 struct css_set *cg;
1922 struct list_head links;
1923};
1924
1925static bool css_set_check_fetched(struct cgroup *cgrp,
1926 struct task_struct *tsk, struct css_set *cg,
1927 struct list_head *newcg_list)
1928{
1929 struct css_set *newcg;
1930 struct cg_list_entry *cg_entry;
1931 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1932
1933 read_lock(&css_set_lock);
1934 newcg = find_existing_css_set(cg, cgrp, template);
1935 if (newcg)
1936 get_css_set(newcg);
1937 read_unlock(&css_set_lock);
1938
1939 /* doesn't exist at all? */
1940 if (!newcg)
1941 return false;
1942 /* see if it's already in the list */
1943 list_for_each_entry(cg_entry, newcg_list, links) {
1944 if (cg_entry->cg == newcg) {
1945 put_css_set(newcg);
1946 return true;
1947 }
1948 }
1949
1950 /* not found */
1951 put_css_set(newcg);
1952 return false;
1953}
1954
1955/*
1956 * Find the new css_set and store it in the list in preparation for moving the
1957 * given task to the given cgroup. Returns 0 or -ENOMEM.
1958 */
1959static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
1960 struct list_head *newcg_list)
1961{
1962 struct css_set *newcg;
1963 struct cg_list_entry *cg_entry;
1964
1965 /* ensure a new css_set will exist for this thread */
1966 newcg = find_css_set(cg, cgrp);
1967 if (!newcg)
1968 return -ENOMEM;
1969 /* add it to the list */
1970 cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
1971 if (!cg_entry) {
1972 put_css_set(newcg);
1973 return -ENOMEM;
1974 }
1975 cg_entry->cg = newcg;
1976 list_add(&cg_entry->links, newcg_list);
1977 return 0;
1978}
1979
1980/**
1981 * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
1982 * @cgrp: the cgroup to attach to
1983 * @leader: the threadgroup leader task_struct of the group to be attached
1984 *
1985 * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will
1986 * take task_lock of each thread in leader's threadgroup individually in turn.
1987 */
1988int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
1989{
1990 int retval, i, group_size;
1991 struct cgroup_subsys *ss, *failed_ss = NULL;
1992 bool cancel_failed_ss = false;
1993 /* guaranteed to be initialized later, but the compiler needs this */
1994 struct cgroup *oldcgrp = NULL;
1995 struct css_set *oldcg;
1996 struct cgroupfs_root *root = cgrp->root;
1997 /* threadgroup list cursor and array */
1998 struct task_struct *tsk;
1999 struct flex_array *group;
2000 /*
2001 * we need to make sure we have css_sets for all the tasks we're
2002 * going to move -before- we actually start moving them, so that in
2003 * case we get an ENOMEM we can bail out before making any changes.
2004 */
2005 struct list_head newcg_list;
2006 struct cg_list_entry *cg_entry, *temp_nobe;
2007
2008 /*
2009 * step 0: in order to do expensive, possibly blocking operations for
2010 * every thread, we cannot iterate the thread group list, since it needs
2011 * rcu or tasklist locked. instead, build an array of all threads in the
2012 * group - threadgroup_fork_lock prevents new threads from appearing,
2013 * and if threads exit, this will just be an over-estimate.
2014 */
2015 group_size = get_nr_threads(leader);
2016 /* flex_array supports very large thread-groups better than kmalloc. */
2017 group = flex_array_alloc(sizeof(struct task_struct *), group_size,
2018 GFP_KERNEL);
2019 if (!group)
2020 return -ENOMEM;
2021 /* pre-allocate to guarantee space while iterating in rcu read-side. */
2022 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2023 if (retval)
2024 goto out_free_group_list;
2025
2026 /* prevent changes to the threadgroup list while we take a snapshot. */
2027 rcu_read_lock();
2028 if (!thread_group_leader(leader)) {
2029 /*
2030 * a race with de_thread from another thread's exec() may strip
2031 * us of our leadership, making while_each_thread unsafe to use
2032 * on this task. if this happens, there is no choice but to
2033 * throw this task away and try again (from cgroup_procs_write);
2034 * this is "double-double-toil-and-trouble-check locking".
2035 */
2036 rcu_read_unlock();
2037 retval = -EAGAIN;
2038 goto out_free_group_list;
2039 }
2040 /* take a reference on each task in the group to go in the array. */
2041 tsk = leader;
2042 i = 0;
2043 do {
2044 /* as per above, nr_threads may decrease, but not increase. */
2045 BUG_ON(i >= group_size);
2046 get_task_struct(tsk);
2047 /*
2048 * saying GFP_ATOMIC has no effect here because we did prealloc
2049 * earlier, but it's good form to communicate our expectations.
2050 */
2051 retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC);
2052 BUG_ON(retval != 0);
2053 i++;
2054 } while_each_thread(leader, tsk);
2055 /* remember the number of threads in the array for later. */
2056 group_size = i;
2057 rcu_read_unlock();
2058
2059 /*
2060 * step 1: check that we can legitimately attach to the cgroup.
2061 */
2062 for_each_subsys(root, ss) {
2063 if (ss->can_attach) {
2064 retval = ss->can_attach(ss, cgrp, leader);
2065 if (retval) {
2066 failed_ss = ss;
2067 goto out_cancel_attach;
2068 }
2069 }
2070 /* a callback to be run on every thread in the threadgroup. */
2071 if (ss->can_attach_task) {
2072 /* run on each task in the threadgroup. */
2073 for (i = 0; i < group_size; i++) {
2074 tsk = flex_array_get_ptr(group, i);
2075 retval = ss->can_attach_task(cgrp, tsk);
2076 if (retval) {
2077 failed_ss = ss;
2078 cancel_failed_ss = true;
2079 goto out_cancel_attach;
2080 }
2081 }
2082 }
2083 }
2084
2085 /*
2086 * step 2: make sure css_sets exist for all threads to be migrated.
2087 * we use find_css_set, which allocates a new one if necessary.
2088 */
2089 INIT_LIST_HEAD(&newcg_list);
2090 for (i = 0; i < group_size; i++) {
2091 tsk = flex_array_get_ptr(group, i);
2092 /* nothing to do if this task is already in the cgroup */
2093 oldcgrp = task_cgroup_from_root(tsk, root);
2094 if (cgrp == oldcgrp)
2095 continue;
2096 /* get old css_set pointer */
2097 task_lock(tsk);
2098 if (tsk->flags & PF_EXITING) {
2099 /* ignore this task if it's going away */
2100 task_unlock(tsk);
2101 continue;
2102 }
2103 oldcg = tsk->cgroups;
2104 get_css_set(oldcg);
2105 task_unlock(tsk);
2106 /* see if the new one for us is already in the list? */
2107 if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
2108 /* was already there, nothing to do. */
2109 put_css_set(oldcg);
2110 } else {
2111 /* we don't already have it. get new one. */
2112 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2113 put_css_set(oldcg);
2114 if (retval)
2115 goto out_list_teardown;
2116 }
2117 }
2118
2119 /*
2120 * step 3: now that we're guaranteed success wrt the css_sets, proceed
2121 * to move all tasks to the new cgroup, calling ss->attach_task for each
2122 * one along the way. there are no failure cases after here, so this is
2123 * the commit point.
2124 */
2125 for_each_subsys(root, ss) {
2126 if (ss->pre_attach)
2127 ss->pre_attach(cgrp);
2128 }
2129 for (i = 0; i < group_size; i++) {
2130 tsk = flex_array_get_ptr(group, i);
2131 /* leave current thread as it is if it's already there */
2132 oldcgrp = task_cgroup_from_root(tsk, root);
2133 if (cgrp == oldcgrp)
2134 continue;
2135 /* attach each task to each subsystem */
2136 for_each_subsys(root, ss) {
2137 if (ss->attach_task)
2138 ss->attach_task(cgrp, tsk);
2139 }
2140 /* if the thread is PF_EXITING, it can just get skipped. */
2141 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2142 BUG_ON(retval != 0 && retval != -ESRCH);
2143 }
2144 /* nothing is sensitive to fork() after this point. */
2145
2146 /*
2147 * step 4: do expensive, non-thread-specific subsystem callbacks.
2148 * TODO: if ever a subsystem needs to know the oldcgrp for each task
2149 * being moved, this call will need to be reworked to communicate that.
2150 */
2151 for_each_subsys(root, ss) {
2152 if (ss->attach)
2153 ss->attach(ss, cgrp, oldcgrp, leader);
2154 }
2155
2156 /*
2157 * step 5: success! and cleanup
2158 */
2159 synchronize_rcu();
2160 cgroup_wakeup_rmdir_waiter(cgrp);
2161 retval = 0;
2162out_list_teardown:
2163 /* clean up the list of prefetched css_sets. */
2164 list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
2165 list_del(&cg_entry->links);
2166 put_css_set(cg_entry->cg);
2167 kfree(cg_entry);
2168 }
2169out_cancel_attach:
2170 /* same deal as in cgroup_attach_task */
2171 if (retval) {
2172 for_each_subsys(root, ss) {
2173 if (ss == failed_ss) {
2174 if (cancel_failed_ss && ss->cancel_attach)
2175 ss->cancel_attach(ss, cgrp, leader);
2176 break;
2177 }
2178 if (ss->cancel_attach)
2179 ss->cancel_attach(ss, cgrp, leader);
2180 }
2181 }
2182 /* clean up the array of referenced threads in the group. */
2183 for (i = 0; i < group_size; i++) {
2184 tsk = flex_array_get_ptr(group, i);
2185 put_task_struct(tsk);
2186 }
2187out_free_group_list:
2188 flex_array_free(group);
2189 return retval;
2190}
2191
2192/*
2193 * Find the task_struct of the task to attach by vpid and pass it along to the
2194 * function to attach either it or all tasks in its threadgroup. Will take
2195 * cgroup_mutex; may take task_lock of task.
1878 */ 2196 */
1879static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) 2197static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
1880{ 2198{
1881 struct task_struct *tsk; 2199 struct task_struct *tsk;
1882 const struct cred *cred = current_cred(), *tcred; 2200 const struct cred *cred = current_cred(), *tcred;
1883 int ret; 2201 int ret;
1884 2202
2203 if (!cgroup_lock_live_group(cgrp))
2204 return -ENODEV;
2205
1885 if (pid) { 2206 if (pid) {
1886 rcu_read_lock(); 2207 rcu_read_lock();
1887 tsk = find_task_by_vpid(pid); 2208 tsk = find_task_by_vpid(pid);
1888 if (!tsk || tsk->flags & PF_EXITING) { 2209 if (!tsk) {
1889 rcu_read_unlock(); 2210 rcu_read_unlock();
2211 cgroup_unlock();
2212 return -ESRCH;
2213 }
2214 if (threadgroup) {
2215 /*
2216 * RCU protects this access, since tsk was found in the
2217 * tid map. a race with de_thread may cause group_leader
2218 * to stop being the leader, but cgroup_attach_proc will
2219 * detect it later.
2220 */
2221 tsk = tsk->group_leader;
2222 } else if (tsk->flags & PF_EXITING) {
2223 /* optimization for the single-task-only case */
2224 rcu_read_unlock();
2225 cgroup_unlock();
1890 return -ESRCH; 2226 return -ESRCH;
1891 } 2227 }
1892 2228
2229 /*
2230 * even if we're attaching all tasks in the thread group, we
2231 * only need to check permissions on one of them.
2232 */
1893 tcred = __task_cred(tsk); 2233 tcred = __task_cred(tsk);
1894 if (cred->euid && 2234 if (cred->euid &&
1895 cred->euid != tcred->uid && 2235 cred->euid != tcred->uid &&
1896 cred->euid != tcred->suid) { 2236 cred->euid != tcred->suid) {
1897 rcu_read_unlock(); 2237 rcu_read_unlock();
2238 cgroup_unlock();
1898 return -EACCES; 2239 return -EACCES;
1899 } 2240 }
1900 get_task_struct(tsk); 2241 get_task_struct(tsk);
1901 rcu_read_unlock(); 2242 rcu_read_unlock();
1902 } else { 2243 } else {
1903 tsk = current; 2244 if (threadgroup)
2245 tsk = current->group_leader;
2246 else
2247 tsk = current;
1904 get_task_struct(tsk); 2248 get_task_struct(tsk);
1905 } 2249 }
1906 2250
1907 ret = cgroup_attach_task(cgrp, tsk); 2251 if (threadgroup) {
2252 threadgroup_fork_write_lock(tsk);
2253 ret = cgroup_attach_proc(cgrp, tsk);
2254 threadgroup_fork_write_unlock(tsk);
2255 } else {
2256 ret = cgroup_attach_task(cgrp, tsk);
2257 }
1908 put_task_struct(tsk); 2258 put_task_struct(tsk);
2259 cgroup_unlock();
1909 return ret; 2260 return ret;
1910} 2261}
1911 2262
1912static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) 2263static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1913{ 2264{
2265 return attach_task_by_pid(cgrp, pid, false);
2266}
2267
2268static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2269{
1914 int ret; 2270 int ret;
1915 if (!cgroup_lock_live_group(cgrp)) 2271 do {
1916 return -ENODEV; 2272 /*
1917 ret = attach_task_by_pid(cgrp, pid); 2273 * attach_proc fails with -EAGAIN if threadgroup leadership
1918 cgroup_unlock(); 2274 * changes in the middle of the operation, in which case we need
2275 * to find the task_struct for the new leader and start over.
2276 */
2277 ret = attach_task_by_pid(cgrp, tgid, true);
2278 } while (ret == -EAGAIN);
1919 return ret; 2279 return ret;
1920} 2280}
1921 2281
@@ -3272,9 +3632,9 @@ static struct cftype files[] = {
3272 { 3632 {
3273 .name = CGROUP_FILE_GENERIC_PREFIX "procs", 3633 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3274 .open = cgroup_procs_open, 3634 .open = cgroup_procs_open,
3275 /* .write_u64 = cgroup_procs_write, TODO */ 3635 .write_u64 = cgroup_procs_write,
3276 .release = cgroup_pidlist_release, 3636 .release = cgroup_pidlist_release,
3277 .mode = S_IRUGO, 3637 .mode = S_IRUGO | S_IWUSR,
3278 }, 3638 },
3279 { 3639 {
3280 .name = "notify_on_release", 3640 .name = "notify_on_release",
@@ -4270,122 +4630,6 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4270} 4630}
4271 4631
4272/** 4632/**
4273 * cgroup_clone - clone the cgroup the given subsystem is attached to
4274 * @tsk: the task to be moved
4275 * @subsys: the given subsystem
4276 * @nodename: the name for the new cgroup
4277 *
4278 * Duplicate the current cgroup in the hierarchy that the given
4279 * subsystem is attached to, and move this task into the new
4280 * child.
4281 */
4282int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
4283 char *nodename)
4284{
4285 struct dentry *dentry;
4286 int ret = 0;
4287 struct cgroup *parent, *child;
4288 struct inode *inode;
4289 struct css_set *cg;
4290 struct cgroupfs_root *root;
4291 struct cgroup_subsys *ss;
4292
4293 /* We shouldn't be called by an unregistered subsystem */
4294 BUG_ON(!subsys->active);
4295
4296 /* First figure out what hierarchy and cgroup we're dealing
4297 * with, and pin them so we can drop cgroup_mutex */
4298 mutex_lock(&cgroup_mutex);
4299 again:
4300 root = subsys->root;
4301 if (root == &rootnode) {
4302 mutex_unlock(&cgroup_mutex);
4303 return 0;
4304 }
4305
4306 /* Pin the hierarchy */
4307 if (!atomic_inc_not_zero(&root->sb->s_active)) {
4308 /* We race with the final deactivate_super() */
4309 mutex_unlock(&cgroup_mutex);
4310 return 0;
4311 }
4312
4313 /* Keep the cgroup alive */
4314 task_lock(tsk);
4315 parent = task_cgroup(tsk, subsys->subsys_id);
4316 cg = tsk->cgroups;
4317 get_css_set(cg);
4318 task_unlock(tsk);
4319
4320 mutex_unlock(&cgroup_mutex);
4321
4322 /* Now do the VFS work to create a cgroup */
4323 inode = parent->dentry->d_inode;
4324
4325 /* Hold the parent directory mutex across this operation to
4326 * stop anyone else deleting the new cgroup */
4327 mutex_lock(&inode->i_mutex);
4328 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
4329 if (IS_ERR(dentry)) {
4330 printk(KERN_INFO
4331 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
4332 PTR_ERR(dentry));
4333 ret = PTR_ERR(dentry);
4334 goto out_release;
4335 }
4336
4337 /* Create the cgroup directory, which also creates the cgroup */
4338 ret = vfs_mkdir(inode, dentry, 0755);
4339 child = __d_cgrp(dentry);
4340 dput(dentry);
4341 if (ret) {
4342 printk(KERN_INFO
4343 "Failed to create cgroup %s: %d\n", nodename,
4344 ret);
4345 goto out_release;
4346 }
4347
4348 /* The cgroup now exists. Retake cgroup_mutex and check
4349 * that we're still in the same state that we thought we
4350 * were. */
4351 mutex_lock(&cgroup_mutex);
4352 if ((root != subsys->root) ||
4353 (parent != task_cgroup(tsk, subsys->subsys_id))) {
4354 /* Aargh, we raced ... */
4355 mutex_unlock(&inode->i_mutex);
4356 put_css_set(cg);
4357
4358 deactivate_super(root->sb);
4359 /* The cgroup is still accessible in the VFS, but
4360 * we're not going to try to rmdir() it at this
4361 * point. */
4362 printk(KERN_INFO
4363 "Race in cgroup_clone() - leaking cgroup %s\n",
4364 nodename);
4365 goto again;
4366 }
4367
4368 /* do any required auto-setup */
4369 for_each_subsys(root, ss) {
4370 if (ss->post_clone)
4371 ss->post_clone(ss, child);
4372 }
4373
4374 /* All seems fine. Finish by moving the task into the new cgroup */
4375 ret = cgroup_attach_task(child, tsk);
4376 mutex_unlock(&cgroup_mutex);
4377
4378 out_release:
4379 mutex_unlock(&inode->i_mutex);
4380
4381 mutex_lock(&cgroup_mutex);
4382 put_css_set(cg);
4383 mutex_unlock(&cgroup_mutex);
4384 deactivate_super(root->sb);
4385 return ret;
4386}
4387
4388/**
4389 * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp 4633 * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
4390 * @cgrp: the cgroup in question 4634 * @cgrp: the cgroup in question
4391 * @task: the task in question 4635 * @task: the task in question
@@ -4623,14 +4867,6 @@ bool css_is_ancestor(struct cgroup_subsys_state *child,
4623 return ret; 4867 return ret;
4624} 4868}
4625 4869
4626static void __free_css_id_cb(struct rcu_head *head)
4627{
4628 struct css_id *id;
4629
4630 id = container_of(head, struct css_id, rcu_head);
4631 kfree(id);
4632}
4633
4634void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) 4870void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4635{ 4871{
4636 struct css_id *id = css->id; 4872 struct css_id *id = css->id;
@@ -4645,7 +4881,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4645 spin_lock(&ss->id_lock); 4881 spin_lock(&ss->id_lock);
4646 idr_remove(&ss->idr, id->id); 4882 idr_remove(&ss->idr, id->id);
4647 spin_unlock(&ss->id_lock); 4883 spin_unlock(&ss->id_lock);
4648 call_rcu(&id->rcu_head, __free_css_id_cb); 4884 kfree_rcu(id, rcu_head);
4649} 4885}
4650EXPORT_SYMBOL_GPL(free_css_id); 4886EXPORT_SYMBOL_GPL(free_css_id);
4651 4887