diff options
author | Ben Blum <bblum@andrew.cmu.edu> | 2011-05-26 19:25:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 20:12:34 -0400 |
commit | 74a1166dfe1135dcc168d35fa5261aa7e087011b (patch) | |
tree | a7add70f0344e2352b8d0d6beb10aef85c6585f7 | |
parent | f780bdb7c1c73009cb57adcf99ef50027d80bf3c (diff) |
cgroups: make procs file writable
Make procs file writable to move all threads by tgid at once.
Add functionality that enables users to move all threads in a threadgroup
at once to a cgroup by writing the tgid to the 'cgroup.procs' file. This
current implementation makes use of a per-threadgroup rwsem that's taken
for reading in the fork() path to prevent newly forking threads within the
threadgroup from "escaping" while the move is in progress.
Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/cgroups/cgroups.txt | 9 | ||||
-rw-r--r-- | kernel/cgroup.c | 439 |
2 files changed, 401 insertions, 47 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index b3bd3bdbe202..8c4f3466c894 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -236,7 +236,8 @@ containing the following files describing that cgroup: | |||
236 | - cgroup.procs: list of tgids in the cgroup. This list is not | 236 | - cgroup.procs: list of tgids in the cgroup. This list is not |
237 | guaranteed to be sorted or free of duplicate tgids, and userspace | 237 | guaranteed to be sorted or free of duplicate tgids, and userspace |
238 | should sort/uniquify the list if this property is required. | 238 | should sort/uniquify the list if this property is required. |
239 | This is a read-only file, for now. | 239 | Writing a thread group id into this file moves all threads in that |
240 | group into this cgroup. | ||
240 | - notify_on_release flag: run the release agent on exit? | 241 | - notify_on_release flag: run the release agent on exit? |
241 | - release_agent: the path to use for release notifications (this file | 242 | - release_agent: the path to use for release notifications (this file |
242 | exists in the top cgroup only) | 243 | exists in the top cgroup only) |
@@ -430,6 +431,12 @@ You can attach the current shell task by echoing 0: | |||
430 | 431 | ||
431 | # echo 0 > tasks | 432 | # echo 0 > tasks |
432 | 433 | ||
434 | You can use the cgroup.procs file instead of the tasks file to move all | ||
435 | threads in a threadgroup at once. Echoing the pid of any task in a | ||
436 | threadgroup to cgroup.procs causes all tasks in that threadgroup to be | ||
437 | be attached to the cgroup. Writing 0 to cgroup.procs moves all tasks | ||
438 | in the writing task's threadgroup. | ||
439 | |||
433 | Note: Since every task is always a member of exactly one cgroup in each | 440 | Note: Since every task is always a member of exactly one cgroup in each |
434 | mounted hierarchy, to remove a task from its current cgroup you must | 441 | mounted hierarchy, to remove a task from its current cgroup you must |
435 | move it into a new cgroup (possibly the root cgroup) by writing to the | 442 | move it into a new cgroup (possibly the root cgroup) by writing to the |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 38fb0ad1cb46..5e6a9745f0e7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1735,6 +1735,76 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1735 | } | 1735 | } |
1736 | EXPORT_SYMBOL_GPL(cgroup_path); | 1736 | EXPORT_SYMBOL_GPL(cgroup_path); |
1737 | 1737 | ||
1738 | /* | ||
1739 | * cgroup_task_migrate - move a task from one cgroup to another. | ||
1740 | * | ||
1741 | * 'guarantee' is set if the caller promises that a new css_set for the task | ||
1742 | * will already exist. If not set, this function might sleep, and can fail with | ||
1743 | * -ENOMEM. Otherwise, it can only fail with -ESRCH. | ||
1744 | */ | ||
1745 | static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | ||
1746 | struct task_struct *tsk, bool guarantee) | ||
1747 | { | ||
1748 | struct css_set *oldcg; | ||
1749 | struct css_set *newcg; | ||
1750 | |||
1751 | /* | ||
1752 | * get old css_set. we need to take task_lock and refcount it, because | ||
1753 | * an exiting task can change its css_set to init_css_set and drop its | ||
1754 | * old one without taking cgroup_mutex. | ||
1755 | */ | ||
1756 | task_lock(tsk); | ||
1757 | oldcg = tsk->cgroups; | ||
1758 | get_css_set(oldcg); | ||
1759 | task_unlock(tsk); | ||
1760 | |||
1761 | /* locate or allocate a new css_set for this task. */ | ||
1762 | if (guarantee) { | ||
1763 | /* we know the css_set we want already exists. */ | ||
1764 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | ||
1765 | read_lock(&css_set_lock); | ||
1766 | newcg = find_existing_css_set(oldcg, cgrp, template); | ||
1767 | BUG_ON(!newcg); | ||
1768 | get_css_set(newcg); | ||
1769 | read_unlock(&css_set_lock); | ||
1770 | } else { | ||
1771 | might_sleep(); | ||
1772 | /* find_css_set will give us newcg already referenced. */ | ||
1773 | newcg = find_css_set(oldcg, cgrp); | ||
1774 | if (!newcg) { | ||
1775 | put_css_set(oldcg); | ||
1776 | return -ENOMEM; | ||
1777 | } | ||
1778 | } | ||
1779 | put_css_set(oldcg); | ||
1780 | |||
1781 | /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */ | ||
1782 | task_lock(tsk); | ||
1783 | if (tsk->flags & PF_EXITING) { | ||
1784 | task_unlock(tsk); | ||
1785 | put_css_set(newcg); | ||
1786 | return -ESRCH; | ||
1787 | } | ||
1788 | rcu_assign_pointer(tsk->cgroups, newcg); | ||
1789 | task_unlock(tsk); | ||
1790 | |||
1791 | /* Update the css_set linked lists if we're using them */ | ||
1792 | write_lock(&css_set_lock); | ||
1793 | if (!list_empty(&tsk->cg_list)) | ||
1794 | list_move(&tsk->cg_list, &newcg->tasks); | ||
1795 | write_unlock(&css_set_lock); | ||
1796 | |||
1797 | /* | ||
1798 | * We just gained a reference on oldcg by taking it from the task. As | ||
1799 | * trading it for newcg is protected by cgroup_mutex, we're safe to drop | ||
1800 | * it here; it will be freed under RCU. | ||
1801 | */ | ||
1802 | put_css_set(oldcg); | ||
1803 | |||
1804 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | ||
1805 | return 0; | ||
1806 | } | ||
1807 | |||
1738 | /** | 1808 | /** |
1739 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' | 1809 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' |
1740 | * @cgrp: the cgroup the task is attaching to | 1810 | * @cgrp: the cgroup the task is attaching to |
@@ -1745,11 +1815,9 @@ EXPORT_SYMBOL_GPL(cgroup_path); | |||
1745 | */ | 1815 | */ |
1746 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1816 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
1747 | { | 1817 | { |
1748 | int retval = 0; | 1818 | int retval; |
1749 | struct cgroup_subsys *ss, *failed_ss = NULL; | 1819 | struct cgroup_subsys *ss, *failed_ss = NULL; |
1750 | struct cgroup *oldcgrp; | 1820 | struct cgroup *oldcgrp; |
1751 | struct css_set *cg; | ||
1752 | struct css_set *newcg; | ||
1753 | struct cgroupfs_root *root = cgrp->root; | 1821 | struct cgroupfs_root *root = cgrp->root; |
1754 | 1822 | ||
1755 | /* Nothing to do if the task is already in that cgroup */ | 1823 | /* Nothing to do if the task is already in that cgroup */ |
@@ -1780,36 +1848,9 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1780 | } | 1848 | } |
1781 | } | 1849 | } |
1782 | 1850 | ||
1783 | task_lock(tsk); | 1851 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); |
1784 | cg = tsk->cgroups; | 1852 | if (retval) |
1785 | get_css_set(cg); | ||
1786 | task_unlock(tsk); | ||
1787 | /* | ||
1788 | * Locate or allocate a new css_set for this task, | ||
1789 | * based on its final set of cgroups | ||
1790 | */ | ||
1791 | newcg = find_css_set(cg, cgrp); | ||
1792 | put_css_set(cg); | ||
1793 | if (!newcg) { | ||
1794 | retval = -ENOMEM; | ||
1795 | goto out; | ||
1796 | } | ||
1797 | |||
1798 | task_lock(tsk); | ||
1799 | if (tsk->flags & PF_EXITING) { | ||
1800 | task_unlock(tsk); | ||
1801 | put_css_set(newcg); | ||
1802 | retval = -ESRCH; | ||
1803 | goto out; | 1853 | goto out; |
1804 | } | ||
1805 | rcu_assign_pointer(tsk->cgroups, newcg); | ||
1806 | task_unlock(tsk); | ||
1807 | |||
1808 | /* Update the css_set linked lists if we're using them */ | ||
1809 | write_lock(&css_set_lock); | ||
1810 | if (!list_empty(&tsk->cg_list)) | ||
1811 | list_move(&tsk->cg_list, &newcg->tasks); | ||
1812 | write_unlock(&css_set_lock); | ||
1813 | 1854 | ||
1814 | for_each_subsys(root, ss) { | 1855 | for_each_subsys(root, ss) { |
1815 | if (ss->pre_attach) | 1856 | if (ss->pre_attach) |
@@ -1819,9 +1860,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1819 | if (ss->attach) | 1860 | if (ss->attach) |
1820 | ss->attach(ss, cgrp, oldcgrp, tsk); | 1861 | ss->attach(ss, cgrp, oldcgrp, tsk); |
1821 | } | 1862 | } |
1822 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1863 | |
1823 | synchronize_rcu(); | 1864 | synchronize_rcu(); |
1824 | put_css_set(cg); | ||
1825 | 1865 | ||
1826 | /* | 1866 | /* |
1827 | * wake up rmdir() waiter. the rmdir should fail since the cgroup | 1867 | * wake up rmdir() waiter. the rmdir should fail since the cgroup |
@@ -1871,49 +1911,356 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | |||
1871 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | 1911 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); |
1872 | 1912 | ||
1873 | /* | 1913 | /* |
1874 | * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex | 1914 | * cgroup_attach_proc works in two stages, the first of which prefetches all |
1875 | * held. May take task_lock of task | 1915 | * new css_sets needed (to make sure we have enough memory before committing |
1916 | * to the move) and stores them in a list of entries of the following type. | ||
1917 | * TODO: possible optimization: use css_set->rcu_head for chaining instead | ||
1918 | */ | ||
1919 | struct cg_list_entry { | ||
1920 | struct css_set *cg; | ||
1921 | struct list_head links; | ||
1922 | }; | ||
1923 | |||
1924 | static bool css_set_check_fetched(struct cgroup *cgrp, | ||
1925 | struct task_struct *tsk, struct css_set *cg, | ||
1926 | struct list_head *newcg_list) | ||
1927 | { | ||
1928 | struct css_set *newcg; | ||
1929 | struct cg_list_entry *cg_entry; | ||
1930 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | ||
1931 | |||
1932 | read_lock(&css_set_lock); | ||
1933 | newcg = find_existing_css_set(cg, cgrp, template); | ||
1934 | if (newcg) | ||
1935 | get_css_set(newcg); | ||
1936 | read_unlock(&css_set_lock); | ||
1937 | |||
1938 | /* doesn't exist at all? */ | ||
1939 | if (!newcg) | ||
1940 | return false; | ||
1941 | /* see if it's already in the list */ | ||
1942 | list_for_each_entry(cg_entry, newcg_list, links) { | ||
1943 | if (cg_entry->cg == newcg) { | ||
1944 | put_css_set(newcg); | ||
1945 | return true; | ||
1946 | } | ||
1947 | } | ||
1948 | |||
1949 | /* not found */ | ||
1950 | put_css_set(newcg); | ||
1951 | return false; | ||
1952 | } | ||
1953 | |||
1954 | /* | ||
1955 | * Find the new css_set and store it in the list in preparation for moving the | ||
1956 | * given task to the given cgroup. Returns 0 or -ENOMEM. | ||
1957 | */ | ||
1958 | static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, | ||
1959 | struct list_head *newcg_list) | ||
1960 | { | ||
1961 | struct css_set *newcg; | ||
1962 | struct cg_list_entry *cg_entry; | ||
1963 | |||
1964 | /* ensure a new css_set will exist for this thread */ | ||
1965 | newcg = find_css_set(cg, cgrp); | ||
1966 | if (!newcg) | ||
1967 | return -ENOMEM; | ||
1968 | /* add it to the list */ | ||
1969 | cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL); | ||
1970 | if (!cg_entry) { | ||
1971 | put_css_set(newcg); | ||
1972 | return -ENOMEM; | ||
1973 | } | ||
1974 | cg_entry->cg = newcg; | ||
1975 | list_add(&cg_entry->links, newcg_list); | ||
1976 | return 0; | ||
1977 | } | ||
1978 | |||
1979 | /** | ||
1980 | * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup | ||
1981 | * @cgrp: the cgroup to attach to | ||
1982 | * @leader: the threadgroup leader task_struct of the group to be attached | ||
1983 | * | ||
1984 | * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will | ||
1985 | * take task_lock of each thread in leader's threadgroup individually in turn. | ||
1986 | */ | ||
1987 | int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | ||
1988 | { | ||
1989 | int retval, i, group_size; | ||
1990 | struct cgroup_subsys *ss, *failed_ss = NULL; | ||
1991 | bool cancel_failed_ss = false; | ||
1992 | /* guaranteed to be initialized later, but the compiler needs this */ | ||
1993 | struct cgroup *oldcgrp = NULL; | ||
1994 | struct css_set *oldcg; | ||
1995 | struct cgroupfs_root *root = cgrp->root; | ||
1996 | /* threadgroup list cursor and array */ | ||
1997 | struct task_struct *tsk; | ||
1998 | struct task_struct **group; | ||
1999 | /* | ||
2000 | * we need to make sure we have css_sets for all the tasks we're | ||
2001 | * going to move -before- we actually start moving them, so that in | ||
2002 | * case we get an ENOMEM we can bail out before making any changes. | ||
2003 | */ | ||
2004 | struct list_head newcg_list; | ||
2005 | struct cg_list_entry *cg_entry, *temp_nobe; | ||
2006 | |||
2007 | /* | ||
2008 | * step 0: in order to do expensive, possibly blocking operations for | ||
2009 | * every thread, we cannot iterate the thread group list, since it needs | ||
2010 | * rcu or tasklist locked. instead, build an array of all threads in the | ||
2011 | * group - threadgroup_fork_lock prevents new threads from appearing, | ||
2012 | * and if threads exit, this will just be an over-estimate. | ||
2013 | */ | ||
2014 | group_size = get_nr_threads(leader); | ||
2015 | group = kmalloc(group_size * sizeof(*group), GFP_KERNEL); | ||
2016 | if (!group) | ||
2017 | return -ENOMEM; | ||
2018 | |||
2019 | /* prevent changes to the threadgroup list while we take a snapshot. */ | ||
2020 | rcu_read_lock(); | ||
2021 | if (!thread_group_leader(leader)) { | ||
2022 | /* | ||
2023 | * a race with de_thread from another thread's exec() may strip | ||
2024 | * us of our leadership, making while_each_thread unsafe to use | ||
2025 | * on this task. if this happens, there is no choice but to | ||
2026 | * throw this task away and try again (from cgroup_procs_write); | ||
2027 | * this is "double-double-toil-and-trouble-check locking". | ||
2028 | */ | ||
2029 | rcu_read_unlock(); | ||
2030 | retval = -EAGAIN; | ||
2031 | goto out_free_group_list; | ||
2032 | } | ||
2033 | /* take a reference on each task in the group to go in the array. */ | ||
2034 | tsk = leader; | ||
2035 | i = 0; | ||
2036 | do { | ||
2037 | /* as per above, nr_threads may decrease, but not increase. */ | ||
2038 | BUG_ON(i >= group_size); | ||
2039 | get_task_struct(tsk); | ||
2040 | group[i] = tsk; | ||
2041 | i++; | ||
2042 | } while_each_thread(leader, tsk); | ||
2043 | /* remember the number of threads in the array for later. */ | ||
2044 | group_size = i; | ||
2045 | rcu_read_unlock(); | ||
2046 | |||
2047 | /* | ||
2048 | * step 1: check that we can legitimately attach to the cgroup. | ||
2049 | */ | ||
2050 | for_each_subsys(root, ss) { | ||
2051 | if (ss->can_attach) { | ||
2052 | retval = ss->can_attach(ss, cgrp, leader); | ||
2053 | if (retval) { | ||
2054 | failed_ss = ss; | ||
2055 | goto out_cancel_attach; | ||
2056 | } | ||
2057 | } | ||
2058 | /* a callback to be run on every thread in the threadgroup. */ | ||
2059 | if (ss->can_attach_task) { | ||
2060 | /* run on each task in the threadgroup. */ | ||
2061 | for (i = 0; i < group_size; i++) { | ||
2062 | retval = ss->can_attach_task(cgrp, group[i]); | ||
2063 | if (retval) { | ||
2064 | failed_ss = ss; | ||
2065 | cancel_failed_ss = true; | ||
2066 | goto out_cancel_attach; | ||
2067 | } | ||
2068 | } | ||
2069 | } | ||
2070 | } | ||
2071 | |||
2072 | /* | ||
2073 | * step 2: make sure css_sets exist for all threads to be migrated. | ||
2074 | * we use find_css_set, which allocates a new one if necessary. | ||
2075 | */ | ||
2076 | INIT_LIST_HEAD(&newcg_list); | ||
2077 | for (i = 0; i < group_size; i++) { | ||
2078 | tsk = group[i]; | ||
2079 | /* nothing to do if this task is already in the cgroup */ | ||
2080 | oldcgrp = task_cgroup_from_root(tsk, root); | ||
2081 | if (cgrp == oldcgrp) | ||
2082 | continue; | ||
2083 | /* get old css_set pointer */ | ||
2084 | task_lock(tsk); | ||
2085 | if (tsk->flags & PF_EXITING) { | ||
2086 | /* ignore this task if it's going away */ | ||
2087 | task_unlock(tsk); | ||
2088 | continue; | ||
2089 | } | ||
2090 | oldcg = tsk->cgroups; | ||
2091 | get_css_set(oldcg); | ||
2092 | task_unlock(tsk); | ||
2093 | /* see if the new one for us is already in the list? */ | ||
2094 | if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) { | ||
2095 | /* was already there, nothing to do. */ | ||
2096 | put_css_set(oldcg); | ||
2097 | } else { | ||
2098 | /* we don't already have it. get new one. */ | ||
2099 | retval = css_set_prefetch(cgrp, oldcg, &newcg_list); | ||
2100 | put_css_set(oldcg); | ||
2101 | if (retval) | ||
2102 | goto out_list_teardown; | ||
2103 | } | ||
2104 | } | ||
2105 | |||
2106 | /* | ||
2107 | * step 3: now that we're guaranteed success wrt the css_sets, proceed | ||
2108 | * to move all tasks to the new cgroup, calling ss->attach_task for each | ||
2109 | * one along the way. there are no failure cases after here, so this is | ||
2110 | * the commit point. | ||
2111 | */ | ||
2112 | for_each_subsys(root, ss) { | ||
2113 | if (ss->pre_attach) | ||
2114 | ss->pre_attach(cgrp); | ||
2115 | } | ||
2116 | for (i = 0; i < group_size; i++) { | ||
2117 | tsk = group[i]; | ||
2118 | /* leave current thread as it is if it's already there */ | ||
2119 | oldcgrp = task_cgroup_from_root(tsk, root); | ||
2120 | if (cgrp == oldcgrp) | ||
2121 | continue; | ||
2122 | /* attach each task to each subsystem */ | ||
2123 | for_each_subsys(root, ss) { | ||
2124 | if (ss->attach_task) | ||
2125 | ss->attach_task(cgrp, tsk); | ||
2126 | } | ||
2127 | /* if the thread is PF_EXITING, it can just get skipped. */ | ||
2128 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); | ||
2129 | BUG_ON(retval != 0 && retval != -ESRCH); | ||
2130 | } | ||
2131 | /* nothing is sensitive to fork() after this point. */ | ||
2132 | |||
2133 | /* | ||
2134 | * step 4: do expensive, non-thread-specific subsystem callbacks. | ||
2135 | * TODO: if ever a subsystem needs to know the oldcgrp for each task | ||
2136 | * being moved, this call will need to be reworked to communicate that. | ||
2137 | */ | ||
2138 | for_each_subsys(root, ss) { | ||
2139 | if (ss->attach) | ||
2140 | ss->attach(ss, cgrp, oldcgrp, leader); | ||
2141 | } | ||
2142 | |||
2143 | /* | ||
2144 | * step 5: success! and cleanup | ||
2145 | */ | ||
2146 | synchronize_rcu(); | ||
2147 | cgroup_wakeup_rmdir_waiter(cgrp); | ||
2148 | retval = 0; | ||
2149 | out_list_teardown: | ||
2150 | /* clean up the list of prefetched css_sets. */ | ||
2151 | list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) { | ||
2152 | list_del(&cg_entry->links); | ||
2153 | put_css_set(cg_entry->cg); | ||
2154 | kfree(cg_entry); | ||
2155 | } | ||
2156 | out_cancel_attach: | ||
2157 | /* same deal as in cgroup_attach_task */ | ||
2158 | if (retval) { | ||
2159 | for_each_subsys(root, ss) { | ||
2160 | if (ss == failed_ss) { | ||
2161 | if (cancel_failed_ss && ss->cancel_attach) | ||
2162 | ss->cancel_attach(ss, cgrp, leader); | ||
2163 | break; | ||
2164 | } | ||
2165 | if (ss->cancel_attach) | ||
2166 | ss->cancel_attach(ss, cgrp, leader); | ||
2167 | } | ||
2168 | } | ||
2169 | /* clean up the array of referenced threads in the group. */ | ||
2170 | for (i = 0; i < group_size; i++) | ||
2171 | put_task_struct(group[i]); | ||
2172 | out_free_group_list: | ||
2173 | kfree(group); | ||
2174 | return retval; | ||
2175 | } | ||
2176 | |||
2177 | /* | ||
2178 | * Find the task_struct of the task to attach by vpid and pass it along to the | ||
2179 | * function to attach either it or all tasks in its threadgroup. Will take | ||
2180 | * cgroup_mutex; may take task_lock of task. | ||
1876 | */ | 2181 | */ |
1877 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) | 2182 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) |
1878 | { | 2183 | { |
1879 | struct task_struct *tsk; | 2184 | struct task_struct *tsk; |
1880 | const struct cred *cred = current_cred(), *tcred; | 2185 | const struct cred *cred = current_cred(), *tcred; |
1881 | int ret; | 2186 | int ret; |
1882 | 2187 | ||
2188 | if (!cgroup_lock_live_group(cgrp)) | ||
2189 | return -ENODEV; | ||
2190 | |||
1883 | if (pid) { | 2191 | if (pid) { |
1884 | rcu_read_lock(); | 2192 | rcu_read_lock(); |
1885 | tsk = find_task_by_vpid(pid); | 2193 | tsk = find_task_by_vpid(pid); |
1886 | if (!tsk || tsk->flags & PF_EXITING) { | 2194 | if (!tsk) { |
1887 | rcu_read_unlock(); | 2195 | rcu_read_unlock(); |
2196 | cgroup_unlock(); | ||
2197 | return -ESRCH; | ||
2198 | } | ||
2199 | if (threadgroup) { | ||
2200 | /* | ||
2201 | * RCU protects this access, since tsk was found in the | ||
2202 | * tid map. a race with de_thread may cause group_leader | ||
2203 | * to stop being the leader, but cgroup_attach_proc will | ||
2204 | * detect it later. | ||
2205 | */ | ||
2206 | tsk = tsk->group_leader; | ||
2207 | } else if (tsk->flags & PF_EXITING) { | ||
2208 | /* optimization for the single-task-only case */ | ||
2209 | rcu_read_unlock(); | ||
2210 | cgroup_unlock(); | ||
1888 | return -ESRCH; | 2211 | return -ESRCH; |
1889 | } | 2212 | } |
1890 | 2213 | ||
2214 | /* | ||
2215 | * even if we're attaching all tasks in the thread group, we | ||
2216 | * only need to check permissions on one of them. | ||
2217 | */ | ||
1891 | tcred = __task_cred(tsk); | 2218 | tcred = __task_cred(tsk); |
1892 | if (cred->euid && | 2219 | if (cred->euid && |
1893 | cred->euid != tcred->uid && | 2220 | cred->euid != tcred->uid && |
1894 | cred->euid != tcred->suid) { | 2221 | cred->euid != tcred->suid) { |
1895 | rcu_read_unlock(); | 2222 | rcu_read_unlock(); |
2223 | cgroup_unlock(); | ||
1896 | return -EACCES; | 2224 | return -EACCES; |
1897 | } | 2225 | } |
1898 | get_task_struct(tsk); | 2226 | get_task_struct(tsk); |
1899 | rcu_read_unlock(); | 2227 | rcu_read_unlock(); |
1900 | } else { | 2228 | } else { |
1901 | tsk = current; | 2229 | if (threadgroup) |
2230 | tsk = current->group_leader; | ||
2231 | else | ||
2232 | tsk = current; | ||
1902 | get_task_struct(tsk); | 2233 | get_task_struct(tsk); |
1903 | } | 2234 | } |
1904 | 2235 | ||
1905 | ret = cgroup_attach_task(cgrp, tsk); | 2236 | if (threadgroup) { |
2237 | threadgroup_fork_write_lock(tsk); | ||
2238 | ret = cgroup_attach_proc(cgrp, tsk); | ||
2239 | threadgroup_fork_write_unlock(tsk); | ||
2240 | } else { | ||
2241 | ret = cgroup_attach_task(cgrp, tsk); | ||
2242 | } | ||
1906 | put_task_struct(tsk); | 2243 | put_task_struct(tsk); |
2244 | cgroup_unlock(); | ||
1907 | return ret; | 2245 | return ret; |
1908 | } | 2246 | } |
1909 | 2247 | ||
1910 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) | 2248 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) |
1911 | { | 2249 | { |
2250 | return attach_task_by_pid(cgrp, pid, false); | ||
2251 | } | ||
2252 | |||
2253 | static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) | ||
2254 | { | ||
1912 | int ret; | 2255 | int ret; |
1913 | if (!cgroup_lock_live_group(cgrp)) | 2256 | do { |
1914 | return -ENODEV; | 2257 | /* |
1915 | ret = attach_task_by_pid(cgrp, pid); | 2258 | * attach_proc fails with -EAGAIN if threadgroup leadership |
1916 | cgroup_unlock(); | 2259 | * changes in the middle of the operation, in which case we need |
2260 | * to find the task_struct for the new leader and start over. | ||
2261 | */ | ||
2262 | ret = attach_task_by_pid(cgrp, tgid, true); | ||
2263 | } while (ret == -EAGAIN); | ||
1917 | return ret; | 2264 | return ret; |
1918 | } | 2265 | } |
1919 | 2266 | ||
@@ -3270,9 +3617,9 @@ static struct cftype files[] = { | |||
3270 | { | 3617 | { |
3271 | .name = CGROUP_FILE_GENERIC_PREFIX "procs", | 3618 | .name = CGROUP_FILE_GENERIC_PREFIX "procs", |
3272 | .open = cgroup_procs_open, | 3619 | .open = cgroup_procs_open, |
3273 | /* .write_u64 = cgroup_procs_write, TODO */ | 3620 | .write_u64 = cgroup_procs_write, |
3274 | .release = cgroup_pidlist_release, | 3621 | .release = cgroup_pidlist_release, |
3275 | .mode = S_IRUGO, | 3622 | .mode = S_IRUGO | S_IWUSR, |
3276 | }, | 3623 | }, |
3277 | { | 3624 | { |
3278 | .name = "notify_on_release", | 3625 | .name = "notify_on_release", |