aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c587
1 files changed, 422 insertions, 165 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 909a35510af5..2731d115d725 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,6 +57,7 @@
57#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ 57#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
58#include <linux/eventfd.h> 58#include <linux/eventfd.h>
59#include <linux/poll.h> 59#include <linux/poll.h>
60#include <linux/flex_array.h> /* used in cgroup_attach_proc */
60 61
61#include <asm/atomic.h> 62#include <asm/atomic.h>
62 63
@@ -1735,6 +1736,76 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1735} 1736}
1736EXPORT_SYMBOL_GPL(cgroup_path); 1737EXPORT_SYMBOL_GPL(cgroup_path);
1737 1738
1739/*
1740 * cgroup_task_migrate - move a task from one cgroup to another.
1741 *
1742 * 'guarantee' is set if the caller promises that a new css_set for the task
1743 * will already exist. If not set, this function might sleep, and can fail with
1744 * -ENOMEM. Otherwise, it can only fail with -ESRCH.
1745 */
1746static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1747 struct task_struct *tsk, bool guarantee)
1748{
1749 struct css_set *oldcg;
1750 struct css_set *newcg;
1751
1752 /*
1753 * get old css_set. we need to take task_lock and refcount it, because
1754 * an exiting task can change its css_set to init_css_set and drop its
1755 * old one without taking cgroup_mutex.
1756 */
1757 task_lock(tsk);
1758 oldcg = tsk->cgroups;
1759 get_css_set(oldcg);
1760 task_unlock(tsk);
1761
1762 /* locate or allocate a new css_set for this task. */
1763 if (guarantee) {
1764 /* we know the css_set we want already exists. */
1765 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1766 read_lock(&css_set_lock);
1767 newcg = find_existing_css_set(oldcg, cgrp, template);
1768 BUG_ON(!newcg);
1769 get_css_set(newcg);
1770 read_unlock(&css_set_lock);
1771 } else {
1772 might_sleep();
1773 /* find_css_set will give us newcg already referenced. */
1774 newcg = find_css_set(oldcg, cgrp);
1775 if (!newcg) {
1776 put_css_set(oldcg);
1777 return -ENOMEM;
1778 }
1779 }
1780 put_css_set(oldcg);
1781
1782 /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */
1783 task_lock(tsk);
1784 if (tsk->flags & PF_EXITING) {
1785 task_unlock(tsk);
1786 put_css_set(newcg);
1787 return -ESRCH;
1788 }
1789 rcu_assign_pointer(tsk->cgroups, newcg);
1790 task_unlock(tsk);
1791
1792 /* Update the css_set linked lists if we're using them */
1793 write_lock(&css_set_lock);
1794 if (!list_empty(&tsk->cg_list))
1795 list_move(&tsk->cg_list, &newcg->tasks);
1796 write_unlock(&css_set_lock);
1797
1798 /*
1799 * We just gained a reference on oldcg by taking it from the task. As
1800 * trading it for newcg is protected by cgroup_mutex, we're safe to drop
1801 * it here; it will be freed under RCU.
1802 */
1803 put_css_set(oldcg);
1804
1805 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1806 return 0;
1807}
1808
1738/** 1809/**
1739 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' 1810 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
1740 * @cgrp: the cgroup the task is attaching to 1811 * @cgrp: the cgroup the task is attaching to
@@ -1745,11 +1816,9 @@ EXPORT_SYMBOL_GPL(cgroup_path);
1745 */ 1816 */
1746int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1817int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1747{ 1818{
1748 int retval = 0; 1819 int retval;
1749 struct cgroup_subsys *ss, *failed_ss = NULL; 1820 struct cgroup_subsys *ss, *failed_ss = NULL;
1750 struct cgroup *oldcgrp; 1821 struct cgroup *oldcgrp;
1751 struct css_set *cg;
1752 struct css_set *newcg;
1753 struct cgroupfs_root *root = cgrp->root; 1822 struct cgroupfs_root *root = cgrp->root;
1754 1823
1755 /* Nothing to do if the task is already in that cgroup */ 1824 /* Nothing to do if the task is already in that cgroup */
@@ -1759,7 +1828,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1759 1828
1760 for_each_subsys(root, ss) { 1829 for_each_subsys(root, ss) {
1761 if (ss->can_attach) { 1830 if (ss->can_attach) {
1762 retval = ss->can_attach(ss, cgrp, tsk, false); 1831 retval = ss->can_attach(ss, cgrp, tsk);
1763 if (retval) { 1832 if (retval) {
1764 /* 1833 /*
1765 * Remember on which subsystem the can_attach() 1834 * Remember on which subsystem the can_attach()
@@ -1771,46 +1840,29 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1771 goto out; 1840 goto out;
1772 } 1841 }
1773 } 1842 }
1843 if (ss->can_attach_task) {
1844 retval = ss->can_attach_task(cgrp, tsk);
1845 if (retval) {
1846 failed_ss = ss;
1847 goto out;
1848 }
1849 }
1774 } 1850 }
1775 1851
1776 task_lock(tsk); 1852 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
1777 cg = tsk->cgroups; 1853 if (retval)
1778 get_css_set(cg);
1779 task_unlock(tsk);
1780 /*
1781 * Locate or allocate a new css_set for this task,
1782 * based on its final set of cgroups
1783 */
1784 newcg = find_css_set(cg, cgrp);
1785 put_css_set(cg);
1786 if (!newcg) {
1787 retval = -ENOMEM;
1788 goto out;
1789 }
1790
1791 task_lock(tsk);
1792 if (tsk->flags & PF_EXITING) {
1793 task_unlock(tsk);
1794 put_css_set(newcg);
1795 retval = -ESRCH;
1796 goto out; 1854 goto out;
1797 }
1798 rcu_assign_pointer(tsk->cgroups, newcg);
1799 task_unlock(tsk);
1800
1801 /* Update the css_set linked lists if we're using them */
1802 write_lock(&css_set_lock);
1803 if (!list_empty(&tsk->cg_list))
1804 list_move(&tsk->cg_list, &newcg->tasks);
1805 write_unlock(&css_set_lock);
1806 1855
1807 for_each_subsys(root, ss) { 1856 for_each_subsys(root, ss) {
1857 if (ss->pre_attach)
1858 ss->pre_attach(cgrp);
1859 if (ss->attach_task)
1860 ss->attach_task(cgrp, tsk);
1808 if (ss->attach) 1861 if (ss->attach)
1809 ss->attach(ss, cgrp, oldcgrp, tsk, false); 1862 ss->attach(ss, cgrp, oldcgrp, tsk);
1810 } 1863 }
1811 set_bit(CGRP_RELEASABLE, &oldcgrp->flags); 1864
1812 synchronize_rcu(); 1865 synchronize_rcu();
1813 put_css_set(cg);
1814 1866
1815 /* 1867 /*
1816 * wake up rmdir() waiter. the rmdir should fail since the cgroup 1868 * wake up rmdir() waiter. the rmdir should fail since the cgroup
@@ -1829,7 +1881,7 @@ out:
1829 */ 1881 */
1830 break; 1882 break;
1831 if (ss->cancel_attach) 1883 if (ss->cancel_attach)
1832 ss->cancel_attach(ss, cgrp, tsk, false); 1884 ss->cancel_attach(ss, cgrp, tsk);
1833 } 1885 }
1834 } 1886 }
1835 return retval; 1887 return retval;
@@ -1860,49 +1912,370 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1860EXPORT_SYMBOL_GPL(cgroup_attach_task_all); 1912EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1861 1913
1862/* 1914/*
1863 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex 1915 * cgroup_attach_proc works in two stages, the first of which prefetches all
1864 * held. May take task_lock of task 1916 * new css_sets needed (to make sure we have enough memory before committing
1917 * to the move) and stores them in a list of entries of the following type.
1918 * TODO: possible optimization: use css_set->rcu_head for chaining instead
1919 */
1920struct cg_list_entry {
1921 struct css_set *cg;
1922 struct list_head links;
1923};
1924
1925static bool css_set_check_fetched(struct cgroup *cgrp,
1926 struct task_struct *tsk, struct css_set *cg,
1927 struct list_head *newcg_list)
1928{
1929 struct css_set *newcg;
1930 struct cg_list_entry *cg_entry;
1931 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1932
1933 read_lock(&css_set_lock);
1934 newcg = find_existing_css_set(cg, cgrp, template);
1935 if (newcg)
1936 get_css_set(newcg);
1937 read_unlock(&css_set_lock);
1938
1939 /* doesn't exist at all? */
1940 if (!newcg)
1941 return false;
1942 /* see if it's already in the list */
1943 list_for_each_entry(cg_entry, newcg_list, links) {
1944 if (cg_entry->cg == newcg) {
1945 put_css_set(newcg);
1946 return true;
1947 }
1948 }
1949
1950 /* not found */
1951 put_css_set(newcg);
1952 return false;
1953}
1954
1955/*
1956 * Find the new css_set and store it in the list in preparation for moving the
1957 * given task to the given cgroup. Returns 0 or -ENOMEM.
1958 */
1959static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
1960 struct list_head *newcg_list)
1961{
1962 struct css_set *newcg;
1963 struct cg_list_entry *cg_entry;
1964
1965 /* ensure a new css_set will exist for this thread */
1966 newcg = find_css_set(cg, cgrp);
1967 if (!newcg)
1968 return -ENOMEM;
1969 /* add it to the list */
1970 cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
1971 if (!cg_entry) {
1972 put_css_set(newcg);
1973 return -ENOMEM;
1974 }
1975 cg_entry->cg = newcg;
1976 list_add(&cg_entry->links, newcg_list);
1977 return 0;
1978}
1979
1980/**
1981 * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
1982 * @cgrp: the cgroup to attach to
1983 * @leader: the threadgroup leader task_struct of the group to be attached
1984 *
1985 * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will
1986 * take task_lock of each thread in leader's threadgroup individually in turn.
1987 */
1988int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
1989{
1990 int retval, i, group_size;
1991 struct cgroup_subsys *ss, *failed_ss = NULL;
1992 bool cancel_failed_ss = false;
1993 /* guaranteed to be initialized later, but the compiler needs this */
1994 struct cgroup *oldcgrp = NULL;
1995 struct css_set *oldcg;
1996 struct cgroupfs_root *root = cgrp->root;
1997 /* threadgroup list cursor and array */
1998 struct task_struct *tsk;
1999 struct flex_array *group;
2000 /*
2001 * we need to make sure we have css_sets for all the tasks we're
2002 * going to move -before- we actually start moving them, so that in
2003 * case we get an ENOMEM we can bail out before making any changes.
2004 */
2005 struct list_head newcg_list;
2006 struct cg_list_entry *cg_entry, *temp_nobe;
2007
2008 /*
2009 * step 0: in order to do expensive, possibly blocking operations for
2010 * every thread, we cannot iterate the thread group list, since it needs
2011 * rcu or tasklist locked. instead, build an array of all threads in the
2012 * group - threadgroup_fork_lock prevents new threads from appearing,
2013 * and if threads exit, this will just be an over-estimate.
2014 */
2015 group_size = get_nr_threads(leader);
2016 /* flex_array supports very large thread-groups better than kmalloc. */
2017 group = flex_array_alloc(sizeof(struct task_struct *), group_size,
2018 GFP_KERNEL);
2019 if (!group)
2020 return -ENOMEM;
2021 /* pre-allocate to guarantee space while iterating in rcu read-side. */
2022 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2023 if (retval)
2024 goto out_free_group_list;
2025
2026 /* prevent changes to the threadgroup list while we take a snapshot. */
2027 rcu_read_lock();
2028 if (!thread_group_leader(leader)) {
2029 /*
2030 * a race with de_thread from another thread's exec() may strip
2031 * us of our leadership, making while_each_thread unsafe to use
2032 * on this task. if this happens, there is no choice but to
2033 * throw this task away and try again (from cgroup_procs_write);
2034 * this is "double-double-toil-and-trouble-check locking".
2035 */
2036 rcu_read_unlock();
2037 retval = -EAGAIN;
2038 goto out_free_group_list;
2039 }
2040 /* take a reference on each task in the group to go in the array. */
2041 tsk = leader;
2042 i = 0;
2043 do {
2044 /* as per above, nr_threads may decrease, but not increase. */
2045 BUG_ON(i >= group_size);
2046 get_task_struct(tsk);
2047 /*
2048 * saying GFP_ATOMIC has no effect here because we did prealloc
2049 * earlier, but it's good form to communicate our expectations.
2050 */
2051 retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC);
2052 BUG_ON(retval != 0);
2053 i++;
2054 } while_each_thread(leader, tsk);
2055 /* remember the number of threads in the array for later. */
2056 group_size = i;
2057 rcu_read_unlock();
2058
2059 /*
2060 * step 1: check that we can legitimately attach to the cgroup.
2061 */
2062 for_each_subsys(root, ss) {
2063 if (ss->can_attach) {
2064 retval = ss->can_attach(ss, cgrp, leader);
2065 if (retval) {
2066 failed_ss = ss;
2067 goto out_cancel_attach;
2068 }
2069 }
2070 /* a callback to be run on every thread in the threadgroup. */
2071 if (ss->can_attach_task) {
2072 /* run on each task in the threadgroup. */
2073 for (i = 0; i < group_size; i++) {
2074 tsk = flex_array_get_ptr(group, i);
2075 retval = ss->can_attach_task(cgrp, tsk);
2076 if (retval) {
2077 failed_ss = ss;
2078 cancel_failed_ss = true;
2079 goto out_cancel_attach;
2080 }
2081 }
2082 }
2083 }
2084
2085 /*
2086 * step 2: make sure css_sets exist for all threads to be migrated.
2087 * we use find_css_set, which allocates a new one if necessary.
2088 */
2089 INIT_LIST_HEAD(&newcg_list);
2090 for (i = 0; i < group_size; i++) {
2091 tsk = flex_array_get_ptr(group, i);
2092 /* nothing to do if this task is already in the cgroup */
2093 oldcgrp = task_cgroup_from_root(tsk, root);
2094 if (cgrp == oldcgrp)
2095 continue;
2096 /* get old css_set pointer */
2097 task_lock(tsk);
2098 if (tsk->flags & PF_EXITING) {
2099 /* ignore this task if it's going away */
2100 task_unlock(tsk);
2101 continue;
2102 }
2103 oldcg = tsk->cgroups;
2104 get_css_set(oldcg);
2105 task_unlock(tsk);
2106 /* see if the new one for us is already in the list? */
2107 if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
2108 /* was already there, nothing to do. */
2109 put_css_set(oldcg);
2110 } else {
2111 /* we don't already have it. get new one. */
2112 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2113 put_css_set(oldcg);
2114 if (retval)
2115 goto out_list_teardown;
2116 }
2117 }
2118
2119 /*
2120 * step 3: now that we're guaranteed success wrt the css_sets, proceed
2121 * to move all tasks to the new cgroup, calling ss->attach_task for each
2122 * one along the way. there are no failure cases after here, so this is
2123 * the commit point.
2124 */
2125 for_each_subsys(root, ss) {
2126 if (ss->pre_attach)
2127 ss->pre_attach(cgrp);
2128 }
2129 for (i = 0; i < group_size; i++) {
2130 tsk = flex_array_get_ptr(group, i);
2131 /* leave current thread as it is if it's already there */
2132 oldcgrp = task_cgroup_from_root(tsk, root);
2133 if (cgrp == oldcgrp)
2134 continue;
2135 /* attach each task to each subsystem */
2136 for_each_subsys(root, ss) {
2137 if (ss->attach_task)
2138 ss->attach_task(cgrp, tsk);
2139 }
2140 /* if the thread is PF_EXITING, it can just get skipped. */
2141 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2142 BUG_ON(retval != 0 && retval != -ESRCH);
2143 }
2144 /* nothing is sensitive to fork() after this point. */
2145
2146 /*
2147 * step 4: do expensive, non-thread-specific subsystem callbacks.
2148 * TODO: if ever a subsystem needs to know the oldcgrp for each task
2149 * being moved, this call will need to be reworked to communicate that.
2150 */
2151 for_each_subsys(root, ss) {
2152 if (ss->attach)
2153 ss->attach(ss, cgrp, oldcgrp, leader);
2154 }
2155
2156 /*
2157 * step 5: success! and cleanup
2158 */
2159 synchronize_rcu();
2160 cgroup_wakeup_rmdir_waiter(cgrp);
2161 retval = 0;
2162out_list_teardown:
2163 /* clean up the list of prefetched css_sets. */
2164 list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
2165 list_del(&cg_entry->links);
2166 put_css_set(cg_entry->cg);
2167 kfree(cg_entry);
2168 }
2169out_cancel_attach:
2170 /* same deal as in cgroup_attach_task */
2171 if (retval) {
2172 for_each_subsys(root, ss) {
2173 if (ss == failed_ss) {
2174 if (cancel_failed_ss && ss->cancel_attach)
2175 ss->cancel_attach(ss, cgrp, leader);
2176 break;
2177 }
2178 if (ss->cancel_attach)
2179 ss->cancel_attach(ss, cgrp, leader);
2180 }
2181 }
2182 /* clean up the array of referenced threads in the group. */
2183 for (i = 0; i < group_size; i++) {
2184 tsk = flex_array_get_ptr(group, i);
2185 put_task_struct(tsk);
2186 }
2187out_free_group_list:
2188 flex_array_free(group);
2189 return retval;
2190}
2191
2192/*
2193 * Find the task_struct of the task to attach by vpid and pass it along to the
2194 * function to attach either it or all tasks in its threadgroup. Will take
2195 * cgroup_mutex; may take task_lock of task.
1865 */ 2196 */
1866static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) 2197static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
1867{ 2198{
1868 struct task_struct *tsk; 2199 struct task_struct *tsk;
1869 const struct cred *cred = current_cred(), *tcred; 2200 const struct cred *cred = current_cred(), *tcred;
1870 int ret; 2201 int ret;
1871 2202
2203 if (!cgroup_lock_live_group(cgrp))
2204 return -ENODEV;
2205
1872 if (pid) { 2206 if (pid) {
1873 rcu_read_lock(); 2207 rcu_read_lock();
1874 tsk = find_task_by_vpid(pid); 2208 tsk = find_task_by_vpid(pid);
1875 if (!tsk || tsk->flags & PF_EXITING) { 2209 if (!tsk) {
1876 rcu_read_unlock(); 2210 rcu_read_unlock();
2211 cgroup_unlock();
2212 return -ESRCH;
2213 }
2214 if (threadgroup) {
2215 /*
2216 * RCU protects this access, since tsk was found in the
2217 * tid map. a race with de_thread may cause group_leader
2218 * to stop being the leader, but cgroup_attach_proc will
2219 * detect it later.
2220 */
2221 tsk = tsk->group_leader;
2222 } else if (tsk->flags & PF_EXITING) {
2223 /* optimization for the single-task-only case */
2224 rcu_read_unlock();
2225 cgroup_unlock();
1877 return -ESRCH; 2226 return -ESRCH;
1878 } 2227 }
1879 2228
2229 /*
2230 * even if we're attaching all tasks in the thread group, we
2231 * only need to check permissions on one of them.
2232 */
1880 tcred = __task_cred(tsk); 2233 tcred = __task_cred(tsk);
1881 if (cred->euid && 2234 if (cred->euid &&
1882 cred->euid != tcred->uid && 2235 cred->euid != tcred->uid &&
1883 cred->euid != tcred->suid) { 2236 cred->euid != tcred->suid) {
1884 rcu_read_unlock(); 2237 rcu_read_unlock();
2238 cgroup_unlock();
1885 return -EACCES; 2239 return -EACCES;
1886 } 2240 }
1887 get_task_struct(tsk); 2241 get_task_struct(tsk);
1888 rcu_read_unlock(); 2242 rcu_read_unlock();
1889 } else { 2243 } else {
1890 tsk = current; 2244 if (threadgroup)
2245 tsk = current->group_leader;
2246 else
2247 tsk = current;
1891 get_task_struct(tsk); 2248 get_task_struct(tsk);
1892 } 2249 }
1893 2250
1894 ret = cgroup_attach_task(cgrp, tsk); 2251 if (threadgroup) {
2252 threadgroup_fork_write_lock(tsk);
2253 ret = cgroup_attach_proc(cgrp, tsk);
2254 threadgroup_fork_write_unlock(tsk);
2255 } else {
2256 ret = cgroup_attach_task(cgrp, tsk);
2257 }
1895 put_task_struct(tsk); 2258 put_task_struct(tsk);
2259 cgroup_unlock();
1896 return ret; 2260 return ret;
1897} 2261}
1898 2262
1899static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) 2263static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1900{ 2264{
2265 return attach_task_by_pid(cgrp, pid, false);
2266}
2267
2268static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2269{
1901 int ret; 2270 int ret;
1902 if (!cgroup_lock_live_group(cgrp)) 2271 do {
1903 return -ENODEV; 2272 /*
1904 ret = attach_task_by_pid(cgrp, pid); 2273 * attach_proc fails with -EAGAIN if threadgroup leadership
1905 cgroup_unlock(); 2274 * changes in the middle of the operation, in which case we need
2275 * to find the task_struct for the new leader and start over.
2276 */
2277 ret = attach_task_by_pid(cgrp, tgid, true);
2278 } while (ret == -EAGAIN);
1906 return ret; 2279 return ret;
1907} 2280}
1908 2281
@@ -3259,9 +3632,9 @@ static struct cftype files[] = {
3259 { 3632 {
3260 .name = CGROUP_FILE_GENERIC_PREFIX "procs", 3633 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3261 .open = cgroup_procs_open, 3634 .open = cgroup_procs_open,
3262 /* .write_u64 = cgroup_procs_write, TODO */ 3635 .write_u64 = cgroup_procs_write,
3263 .release = cgroup_pidlist_release, 3636 .release = cgroup_pidlist_release,
3264 .mode = S_IRUGO, 3637 .mode = S_IRUGO | S_IWUSR,
3265 }, 3638 },
3266 { 3639 {
3267 .name = "notify_on_release", 3640 .name = "notify_on_release",
@@ -4257,122 +4630,6 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4257} 4630}
4258 4631
4259/** 4632/**
4260 * cgroup_clone - clone the cgroup the given subsystem is attached to
4261 * @tsk: the task to be moved
4262 * @subsys: the given subsystem
4263 * @nodename: the name for the new cgroup
4264 *
4265 * Duplicate the current cgroup in the hierarchy that the given
4266 * subsystem is attached to, and move this task into the new
4267 * child.
4268 */
4269int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
4270 char *nodename)
4271{
4272 struct dentry *dentry;
4273 int ret = 0;
4274 struct cgroup *parent, *child;
4275 struct inode *inode;
4276 struct css_set *cg;
4277 struct cgroupfs_root *root;
4278 struct cgroup_subsys *ss;
4279
4280 /* We shouldn't be called by an unregistered subsystem */
4281 BUG_ON(!subsys->active);
4282
4283 /* First figure out what hierarchy and cgroup we're dealing
4284 * with, and pin them so we can drop cgroup_mutex */
4285 mutex_lock(&cgroup_mutex);
4286 again:
4287 root = subsys->root;
4288 if (root == &rootnode) {
4289 mutex_unlock(&cgroup_mutex);
4290 return 0;
4291 }
4292
4293 /* Pin the hierarchy */
4294 if (!atomic_inc_not_zero(&root->sb->s_active)) {
4295 /* We race with the final deactivate_super() */
4296 mutex_unlock(&cgroup_mutex);
4297 return 0;
4298 }
4299
4300 /* Keep the cgroup alive */
4301 task_lock(tsk);
4302 parent = task_cgroup(tsk, subsys->subsys_id);
4303 cg = tsk->cgroups;
4304 get_css_set(cg);
4305 task_unlock(tsk);
4306
4307 mutex_unlock(&cgroup_mutex);
4308
4309 /* Now do the VFS work to create a cgroup */
4310 inode = parent->dentry->d_inode;
4311
4312 /* Hold the parent directory mutex across this operation to
4313 * stop anyone else deleting the new cgroup */
4314 mutex_lock(&inode->i_mutex);
4315 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
4316 if (IS_ERR(dentry)) {
4317 printk(KERN_INFO
4318 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
4319 PTR_ERR(dentry));
4320 ret = PTR_ERR(dentry);
4321 goto out_release;
4322 }
4323
4324 /* Create the cgroup directory, which also creates the cgroup */
4325 ret = vfs_mkdir(inode, dentry, 0755);
4326 child = __d_cgrp(dentry);
4327 dput(dentry);
4328 if (ret) {
4329 printk(KERN_INFO
4330 "Failed to create cgroup %s: %d\n", nodename,
4331 ret);
4332 goto out_release;
4333 }
4334
4335 /* The cgroup now exists. Retake cgroup_mutex and check
4336 * that we're still in the same state that we thought we
4337 * were. */
4338 mutex_lock(&cgroup_mutex);
4339 if ((root != subsys->root) ||
4340 (parent != task_cgroup(tsk, subsys->subsys_id))) {
4341 /* Aargh, we raced ... */
4342 mutex_unlock(&inode->i_mutex);
4343 put_css_set(cg);
4344
4345 deactivate_super(root->sb);
4346 /* The cgroup is still accessible in the VFS, but
4347 * we're not going to try to rmdir() it at this
4348 * point. */
4349 printk(KERN_INFO
4350 "Race in cgroup_clone() - leaking cgroup %s\n",
4351 nodename);
4352 goto again;
4353 }
4354
4355 /* do any required auto-setup */
4356 for_each_subsys(root, ss) {
4357 if (ss->post_clone)
4358 ss->post_clone(ss, child);
4359 }
4360
4361 /* All seems fine. Finish by moving the task into the new cgroup */
4362 ret = cgroup_attach_task(child, tsk);
4363 mutex_unlock(&cgroup_mutex);
4364
4365 out_release:
4366 mutex_unlock(&inode->i_mutex);
4367
4368 mutex_lock(&cgroup_mutex);
4369 put_css_set(cg);
4370 mutex_unlock(&cgroup_mutex);
4371 deactivate_super(root->sb);
4372 return ret;
4373}
4374
4375/**
4376 * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp 4633 * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
4377 * @cgrp: the cgroup in question 4634 * @cgrp: the cgroup in question
4378 * @task: the task in question 4635 * @task: the task in question