aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/cgroups.txt9
-rw-r--r--kernel/cgroup.c439
2 files changed, 401 insertions, 47 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index b3bd3bdbe202..8c4f3466c894 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -236,7 +236,8 @@ containing the following files describing that cgroup:
236 - cgroup.procs: list of tgids in the cgroup. This list is not 236 - cgroup.procs: list of tgids in the cgroup. This list is not
237 guaranteed to be sorted or free of duplicate tgids, and userspace 237 guaranteed to be sorted or free of duplicate tgids, and userspace
238 should sort/uniquify the list if this property is required. 238 should sort/uniquify the list if this property is required.
239 This is a read-only file, for now. 239 Writing a thread group id into this file moves all threads in that
240 group into this cgroup.
240 - notify_on_release flag: run the release agent on exit? 241 - notify_on_release flag: run the release agent on exit?
241 - release_agent: the path to use for release notifications (this file 242 - release_agent: the path to use for release notifications (this file
242 exists in the top cgroup only) 243 exists in the top cgroup only)
@@ -430,6 +431,12 @@ You can attach the current shell task by echoing 0:
430 431
431# echo 0 > tasks 432# echo 0 > tasks
432 433
434You can use the cgroup.procs file instead of the tasks file to move all
435threads in a threadgroup at once. Echoing the pid of any task in a
436threadgroup to cgroup.procs causes all tasks in that threadgroup to be
437be attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
438in the writing task's threadgroup.
439
433Note: Since every task is always a member of exactly one cgroup in each 440Note: Since every task is always a member of exactly one cgroup in each
434mounted hierarchy, to remove a task from its current cgroup you must 441mounted hierarchy, to remove a task from its current cgroup you must
435move it into a new cgroup (possibly the root cgroup) by writing to the 442move it into a new cgroup (possibly the root cgroup) by writing to the
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 38fb0ad1cb46..5e6a9745f0e7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1735,6 +1735,76 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1735} 1735}
1736EXPORT_SYMBOL_GPL(cgroup_path); 1736EXPORT_SYMBOL_GPL(cgroup_path);
1737 1737
1738/*
1739 * cgroup_task_migrate - move a task from one cgroup to another.
1740 *
1741 * 'guarantee' is set if the caller promises that a new css_set for the task
1742 * will already exist. If not set, this function might sleep, and can fail with
1743 * -ENOMEM. Otherwise, it can only fail with -ESRCH.
1744 */
1745static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1746 struct task_struct *tsk, bool guarantee)
1747{
1748 struct css_set *oldcg;
1749 struct css_set *newcg;
1750
1751 /*
1752 * get old css_set. we need to take task_lock and refcount it, because
1753 * an exiting task can change its css_set to init_css_set and drop its
1754 * old one without taking cgroup_mutex.
1755 */
1756 task_lock(tsk);
1757 oldcg = tsk->cgroups;
1758 get_css_set(oldcg);
1759 task_unlock(tsk);
1760
1761 /* locate or allocate a new css_set for this task. */
1762 if (guarantee) {
1763 /* we know the css_set we want already exists. */
1764 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1765 read_lock(&css_set_lock);
1766 newcg = find_existing_css_set(oldcg, cgrp, template);
1767 BUG_ON(!newcg);
1768 get_css_set(newcg);
1769 read_unlock(&css_set_lock);
1770 } else {
1771 might_sleep();
1772 /* find_css_set will give us newcg already referenced. */
1773 newcg = find_css_set(oldcg, cgrp);
1774 if (!newcg) {
1775 put_css_set(oldcg);
1776 return -ENOMEM;
1777 }
1778 }
1779 put_css_set(oldcg);
1780
1781 /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */
1782 task_lock(tsk);
1783 if (tsk->flags & PF_EXITING) {
1784 task_unlock(tsk);
1785 put_css_set(newcg);
1786 return -ESRCH;
1787 }
1788 rcu_assign_pointer(tsk->cgroups, newcg);
1789 task_unlock(tsk);
1790
1791 /* Update the css_set linked lists if we're using them */
1792 write_lock(&css_set_lock);
1793 if (!list_empty(&tsk->cg_list))
1794 list_move(&tsk->cg_list, &newcg->tasks);
1795 write_unlock(&css_set_lock);
1796
1797 /*
1798 * We just gained a reference on oldcg by taking it from the task. As
1799 * trading it for newcg is protected by cgroup_mutex, we're safe to drop
1800 * it here; it will be freed under RCU.
1801 */
1802 put_css_set(oldcg);
1803
1804 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1805 return 0;
1806}
1807
1738/** 1808/**
1739 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' 1809 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
1740 * @cgrp: the cgroup the task is attaching to 1810 * @cgrp: the cgroup the task is attaching to
@@ -1745,11 +1815,9 @@ EXPORT_SYMBOL_GPL(cgroup_path);
1745 */ 1815 */
1746int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1816int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1747{ 1817{
1748 int retval = 0; 1818 int retval;
1749 struct cgroup_subsys *ss, *failed_ss = NULL; 1819 struct cgroup_subsys *ss, *failed_ss = NULL;
1750 struct cgroup *oldcgrp; 1820 struct cgroup *oldcgrp;
1751 struct css_set *cg;
1752 struct css_set *newcg;
1753 struct cgroupfs_root *root = cgrp->root; 1821 struct cgroupfs_root *root = cgrp->root;
1754 1822
1755 /* Nothing to do if the task is already in that cgroup */ 1823 /* Nothing to do if the task is already in that cgroup */
@@ -1780,36 +1848,9 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1780 } 1848 }
1781 } 1849 }
1782 1850
1783 task_lock(tsk); 1851 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
1784 cg = tsk->cgroups; 1852 if (retval)
1785 get_css_set(cg);
1786 task_unlock(tsk);
1787 /*
1788 * Locate or allocate a new css_set for this task,
1789 * based on its final set of cgroups
1790 */
1791 newcg = find_css_set(cg, cgrp);
1792 put_css_set(cg);
1793 if (!newcg) {
1794 retval = -ENOMEM;
1795 goto out;
1796 }
1797
1798 task_lock(tsk);
1799 if (tsk->flags & PF_EXITING) {
1800 task_unlock(tsk);
1801 put_css_set(newcg);
1802 retval = -ESRCH;
1803 goto out; 1853 goto out;
1804 }
1805 rcu_assign_pointer(tsk->cgroups, newcg);
1806 task_unlock(tsk);
1807
1808 /* Update the css_set linked lists if we're using them */
1809 write_lock(&css_set_lock);
1810 if (!list_empty(&tsk->cg_list))
1811 list_move(&tsk->cg_list, &newcg->tasks);
1812 write_unlock(&css_set_lock);
1813 1854
1814 for_each_subsys(root, ss) { 1855 for_each_subsys(root, ss) {
1815 if (ss->pre_attach) 1856 if (ss->pre_attach)
@@ -1819,9 +1860,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1819 if (ss->attach) 1860 if (ss->attach)
1820 ss->attach(ss, cgrp, oldcgrp, tsk); 1861 ss->attach(ss, cgrp, oldcgrp, tsk);
1821 } 1862 }
1822 set_bit(CGRP_RELEASABLE, &oldcgrp->flags); 1863
1823 synchronize_rcu(); 1864 synchronize_rcu();
1824 put_css_set(cg);
1825 1865
1826 /* 1866 /*
1827 * wake up rmdir() waiter. the rmdir should fail since the cgroup 1867 * wake up rmdir() waiter. the rmdir should fail since the cgroup
@@ -1871,49 +1911,356 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1871EXPORT_SYMBOL_GPL(cgroup_attach_task_all); 1911EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1872 1912
1873/* 1913/*
1874 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex 1914 * cgroup_attach_proc works in two stages, the first of which prefetches all
1875 * held. May take task_lock of task 1915 * new css_sets needed (to make sure we have enough memory before committing
1916 * to the move) and stores them in a list of entries of the following type.
1917 * TODO: possible optimization: use css_set->rcu_head for chaining instead
1918 */
1919struct cg_list_entry {
1920 struct css_set *cg;
1921 struct list_head links;
1922};
1923
1924static bool css_set_check_fetched(struct cgroup *cgrp,
1925 struct task_struct *tsk, struct css_set *cg,
1926 struct list_head *newcg_list)
1927{
1928 struct css_set *newcg;
1929 struct cg_list_entry *cg_entry;
1930 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1931
1932 read_lock(&css_set_lock);
1933 newcg = find_existing_css_set(cg, cgrp, template);
1934 if (newcg)
1935 get_css_set(newcg);
1936 read_unlock(&css_set_lock);
1937
1938 /* doesn't exist at all? */
1939 if (!newcg)
1940 return false;
1941 /* see if it's already in the list */
1942 list_for_each_entry(cg_entry, newcg_list, links) {
1943 if (cg_entry->cg == newcg) {
1944 put_css_set(newcg);
1945 return true;
1946 }
1947 }
1948
1949 /* not found */
1950 put_css_set(newcg);
1951 return false;
1952}
1953
1954/*
1955 * Find the new css_set and store it in the list in preparation for moving the
1956 * given task to the given cgroup. Returns 0 or -ENOMEM.
1957 */
1958static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
1959 struct list_head *newcg_list)
1960{
1961 struct css_set *newcg;
1962 struct cg_list_entry *cg_entry;
1963
1964 /* ensure a new css_set will exist for this thread */
1965 newcg = find_css_set(cg, cgrp);
1966 if (!newcg)
1967 return -ENOMEM;
1968 /* add it to the list */
1969 cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
1970 if (!cg_entry) {
1971 put_css_set(newcg);
1972 return -ENOMEM;
1973 }
1974 cg_entry->cg = newcg;
1975 list_add(&cg_entry->links, newcg_list);
1976 return 0;
1977}
1978
1979/**
1980 * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
1981 * @cgrp: the cgroup to attach to
1982 * @leader: the threadgroup leader task_struct of the group to be attached
1983 *
1984 * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will
1985 * take task_lock of each thread in leader's threadgroup individually in turn.
1986 */
1987int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
1988{
1989 int retval, i, group_size;
1990 struct cgroup_subsys *ss, *failed_ss = NULL;
1991 bool cancel_failed_ss = false;
1992 /* guaranteed to be initialized later, but the compiler needs this */
1993 struct cgroup *oldcgrp = NULL;
1994 struct css_set *oldcg;
1995 struct cgroupfs_root *root = cgrp->root;
1996 /* threadgroup list cursor and array */
1997 struct task_struct *tsk;
1998 struct task_struct **group;
1999 /*
2000 * we need to make sure we have css_sets for all the tasks we're
2001 * going to move -before- we actually start moving them, so that in
2002 * case we get an ENOMEM we can bail out before making any changes.
2003 */
2004 struct list_head newcg_list;
2005 struct cg_list_entry *cg_entry, *temp_nobe;
2006
2007 /*
2008 * step 0: in order to do expensive, possibly blocking operations for
2009 * every thread, we cannot iterate the thread group list, since it needs
2010 * rcu or tasklist locked. instead, build an array of all threads in the
2011 * group - threadgroup_fork_lock prevents new threads from appearing,
2012 * and if threads exit, this will just be an over-estimate.
2013 */
2014 group_size = get_nr_threads(leader);
2015 group = kmalloc(group_size * sizeof(*group), GFP_KERNEL);
2016 if (!group)
2017 return -ENOMEM;
2018
2019 /* prevent changes to the threadgroup list while we take a snapshot. */
2020 rcu_read_lock();
2021 if (!thread_group_leader(leader)) {
2022 /*
2023 * a race with de_thread from another thread's exec() may strip
2024 * us of our leadership, making while_each_thread unsafe to use
2025 * on this task. if this happens, there is no choice but to
2026 * throw this task away and try again (from cgroup_procs_write);
2027 * this is "double-double-toil-and-trouble-check locking".
2028 */
2029 rcu_read_unlock();
2030 retval = -EAGAIN;
2031 goto out_free_group_list;
2032 }
2033 /* take a reference on each task in the group to go in the array. */
2034 tsk = leader;
2035 i = 0;
2036 do {
2037 /* as per above, nr_threads may decrease, but not increase. */
2038 BUG_ON(i >= group_size);
2039 get_task_struct(tsk);
2040 group[i] = tsk;
2041 i++;
2042 } while_each_thread(leader, tsk);
2043 /* remember the number of threads in the array for later. */
2044 group_size = i;
2045 rcu_read_unlock();
2046
2047 /*
2048 * step 1: check that we can legitimately attach to the cgroup.
2049 */
2050 for_each_subsys(root, ss) {
2051 if (ss->can_attach) {
2052 retval = ss->can_attach(ss, cgrp, leader);
2053 if (retval) {
2054 failed_ss = ss;
2055 goto out_cancel_attach;
2056 }
2057 }
2058 /* a callback to be run on every thread in the threadgroup. */
2059 if (ss->can_attach_task) {
2060 /* run on each task in the threadgroup. */
2061 for (i = 0; i < group_size; i++) {
2062 retval = ss->can_attach_task(cgrp, group[i]);
2063 if (retval) {
2064 failed_ss = ss;
2065 cancel_failed_ss = true;
2066 goto out_cancel_attach;
2067 }
2068 }
2069 }
2070 }
2071
2072 /*
2073 * step 2: make sure css_sets exist for all threads to be migrated.
2074 * we use find_css_set, which allocates a new one if necessary.
2075 */
2076 INIT_LIST_HEAD(&newcg_list);
2077 for (i = 0; i < group_size; i++) {
2078 tsk = group[i];
2079 /* nothing to do if this task is already in the cgroup */
2080 oldcgrp = task_cgroup_from_root(tsk, root);
2081 if (cgrp == oldcgrp)
2082 continue;
2083 /* get old css_set pointer */
2084 task_lock(tsk);
2085 if (tsk->flags & PF_EXITING) {
2086 /* ignore this task if it's going away */
2087 task_unlock(tsk);
2088 continue;
2089 }
2090 oldcg = tsk->cgroups;
2091 get_css_set(oldcg);
2092 task_unlock(tsk);
2093 /* see if the new one for us is already in the list? */
2094 if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
2095 /* was already there, nothing to do. */
2096 put_css_set(oldcg);
2097 } else {
2098 /* we don't already have it. get new one. */
2099 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2100 put_css_set(oldcg);
2101 if (retval)
2102 goto out_list_teardown;
2103 }
2104 }
2105
2106 /*
2107 * step 3: now that we're guaranteed success wrt the css_sets, proceed
2108 * to move all tasks to the new cgroup, calling ss->attach_task for each
2109 * one along the way. there are no failure cases after here, so this is
2110 * the commit point.
2111 */
2112 for_each_subsys(root, ss) {
2113 if (ss->pre_attach)
2114 ss->pre_attach(cgrp);
2115 }
2116 for (i = 0; i < group_size; i++) {
2117 tsk = group[i];
2118 /* leave current thread as it is if it's already there */
2119 oldcgrp = task_cgroup_from_root(tsk, root);
2120 if (cgrp == oldcgrp)
2121 continue;
2122 /* attach each task to each subsystem */
2123 for_each_subsys(root, ss) {
2124 if (ss->attach_task)
2125 ss->attach_task(cgrp, tsk);
2126 }
2127 /* if the thread is PF_EXITING, it can just get skipped. */
2128 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2129 BUG_ON(retval != 0 && retval != -ESRCH);
2130 }
2131 /* nothing is sensitive to fork() after this point. */
2132
2133 /*
2134 * step 4: do expensive, non-thread-specific subsystem callbacks.
2135 * TODO: if ever a subsystem needs to know the oldcgrp for each task
2136 * being moved, this call will need to be reworked to communicate that.
2137 */
2138 for_each_subsys(root, ss) {
2139 if (ss->attach)
2140 ss->attach(ss, cgrp, oldcgrp, leader);
2141 }
2142
2143 /*
2144 * step 5: success! and cleanup
2145 */
2146 synchronize_rcu();
2147 cgroup_wakeup_rmdir_waiter(cgrp);
2148 retval = 0;
2149out_list_teardown:
2150 /* clean up the list of prefetched css_sets. */
2151 list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
2152 list_del(&cg_entry->links);
2153 put_css_set(cg_entry->cg);
2154 kfree(cg_entry);
2155 }
2156out_cancel_attach:
2157 /* same deal as in cgroup_attach_task */
2158 if (retval) {
2159 for_each_subsys(root, ss) {
2160 if (ss == failed_ss) {
2161 if (cancel_failed_ss && ss->cancel_attach)
2162 ss->cancel_attach(ss, cgrp, leader);
2163 break;
2164 }
2165 if (ss->cancel_attach)
2166 ss->cancel_attach(ss, cgrp, leader);
2167 }
2168 }
2169 /* clean up the array of referenced threads in the group. */
2170 for (i = 0; i < group_size; i++)
2171 put_task_struct(group[i]);
2172out_free_group_list:
2173 kfree(group);
2174 return retval;
2175}
2176
2177/*
2178 * Find the task_struct of the task to attach by vpid and pass it along to the
2179 * function to attach either it or all tasks in its threadgroup. Will take
2180 * cgroup_mutex; may take task_lock of task.
1876 */ 2181 */
1877static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) 2182static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
1878{ 2183{
1879 struct task_struct *tsk; 2184 struct task_struct *tsk;
1880 const struct cred *cred = current_cred(), *tcred; 2185 const struct cred *cred = current_cred(), *tcred;
1881 int ret; 2186 int ret;
1882 2187
2188 if (!cgroup_lock_live_group(cgrp))
2189 return -ENODEV;
2190
1883 if (pid) { 2191 if (pid) {
1884 rcu_read_lock(); 2192 rcu_read_lock();
1885 tsk = find_task_by_vpid(pid); 2193 tsk = find_task_by_vpid(pid);
1886 if (!tsk || tsk->flags & PF_EXITING) { 2194 if (!tsk) {
1887 rcu_read_unlock(); 2195 rcu_read_unlock();
2196 cgroup_unlock();
2197 return -ESRCH;
2198 }
2199 if (threadgroup) {
2200 /*
2201 * RCU protects this access, since tsk was found in the
2202 * tid map. a race with de_thread may cause group_leader
2203 * to stop being the leader, but cgroup_attach_proc will
2204 * detect it later.
2205 */
2206 tsk = tsk->group_leader;
2207 } else if (tsk->flags & PF_EXITING) {
2208 /* optimization for the single-task-only case */
2209 rcu_read_unlock();
2210 cgroup_unlock();
1888 return -ESRCH; 2211 return -ESRCH;
1889 } 2212 }
1890 2213
2214 /*
2215 * even if we're attaching all tasks in the thread group, we
2216 * only need to check permissions on one of them.
2217 */
1891 tcred = __task_cred(tsk); 2218 tcred = __task_cred(tsk);
1892 if (cred->euid && 2219 if (cred->euid &&
1893 cred->euid != tcred->uid && 2220 cred->euid != tcred->uid &&
1894 cred->euid != tcred->suid) { 2221 cred->euid != tcred->suid) {
1895 rcu_read_unlock(); 2222 rcu_read_unlock();
2223 cgroup_unlock();
1896 return -EACCES; 2224 return -EACCES;
1897 } 2225 }
1898 get_task_struct(tsk); 2226 get_task_struct(tsk);
1899 rcu_read_unlock(); 2227 rcu_read_unlock();
1900 } else { 2228 } else {
1901 tsk = current; 2229 if (threadgroup)
2230 tsk = current->group_leader;
2231 else
2232 tsk = current;
1902 get_task_struct(tsk); 2233 get_task_struct(tsk);
1903 } 2234 }
1904 2235
1905 ret = cgroup_attach_task(cgrp, tsk); 2236 if (threadgroup) {
2237 threadgroup_fork_write_lock(tsk);
2238 ret = cgroup_attach_proc(cgrp, tsk);
2239 threadgroup_fork_write_unlock(tsk);
2240 } else {
2241 ret = cgroup_attach_task(cgrp, tsk);
2242 }
1906 put_task_struct(tsk); 2243 put_task_struct(tsk);
2244 cgroup_unlock();
1907 return ret; 2245 return ret;
1908} 2246}
1909 2247
1910static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) 2248static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1911{ 2249{
2250 return attach_task_by_pid(cgrp, pid, false);
2251}
2252
2253static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2254{
1912 int ret; 2255 int ret;
1913 if (!cgroup_lock_live_group(cgrp)) 2256 do {
1914 return -ENODEV; 2257 /*
1915 ret = attach_task_by_pid(cgrp, pid); 2258 * attach_proc fails with -EAGAIN if threadgroup leadership
1916 cgroup_unlock(); 2259 * changes in the middle of the operation, in which case we need
2260 * to find the task_struct for the new leader and start over.
2261 */
2262 ret = attach_task_by_pid(cgrp, tgid, true);
2263 } while (ret == -EAGAIN);
1917 return ret; 2264 return ret;
1918} 2265}
1919 2266
@@ -3270,9 +3617,9 @@ static struct cftype files[] = {
3270 { 3617 {
3271 .name = CGROUP_FILE_GENERIC_PREFIX "procs", 3618 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3272 .open = cgroup_procs_open, 3619 .open = cgroup_procs_open,
3273 /* .write_u64 = cgroup_procs_write, TODO */ 3620 .write_u64 = cgroup_procs_write,
3274 .release = cgroup_pidlist_release, 3621 .release = cgroup_pidlist_release,
3275 .mode = S_IRUGO, 3622 .mode = S_IRUGO | S_IWUSR,
3276 }, 3623 },
3277 { 3624 {
3278 .name = "notify_on_release", 3625 .name = "notify_on_release",