diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 614 |
1 files changed, 425 insertions, 189 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 25c7eb52de1a..2731d115d725 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | 57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ |
58 | #include <linux/eventfd.h> | 58 | #include <linux/eventfd.h> |
59 | #include <linux/poll.h> | 59 | #include <linux/poll.h> |
60 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ | ||
60 | 61 | ||
61 | #include <asm/atomic.h> | 62 | #include <asm/atomic.h> |
62 | 63 | ||
@@ -326,12 +327,6 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | |||
326 | return &css_set_table[index]; | 327 | return &css_set_table[index]; |
327 | } | 328 | } |
328 | 329 | ||
329 | static void free_css_set_rcu(struct rcu_head *obj) | ||
330 | { | ||
331 | struct css_set *cg = container_of(obj, struct css_set, rcu_head); | ||
332 | kfree(cg); | ||
333 | } | ||
334 | |||
335 | /* We don't maintain the lists running through each css_set to its | 330 | /* We don't maintain the lists running through each css_set to its |
336 | * task until after the first call to cgroup_iter_start(). This | 331 | * task until after the first call to cgroup_iter_start(). This |
337 | * reduces the fork()/exit() overhead for people who have cgroups | 332 | * reduces the fork()/exit() overhead for people who have cgroups |
@@ -375,7 +370,7 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
375 | } | 370 | } |
376 | 371 | ||
377 | write_unlock(&css_set_lock); | 372 | write_unlock(&css_set_lock); |
378 | call_rcu(&cg->rcu_head, free_css_set_rcu); | 373 | kfree_rcu(cg, rcu_head); |
379 | } | 374 | } |
380 | 375 | ||
381 | /* | 376 | /* |
@@ -812,13 +807,6 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp) | |||
812 | return ret; | 807 | return ret; |
813 | } | 808 | } |
814 | 809 | ||
815 | static void free_cgroup_rcu(struct rcu_head *obj) | ||
816 | { | ||
817 | struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head); | ||
818 | |||
819 | kfree(cgrp); | ||
820 | } | ||
821 | |||
822 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 810 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
823 | { | 811 | { |
824 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 812 | /* is dentry a directory ? if so, kfree() associated cgroup */ |
@@ -856,7 +844,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
856 | */ | 844 | */ |
857 | BUG_ON(!list_empty(&cgrp->pidlists)); | 845 | BUG_ON(!list_empty(&cgrp->pidlists)); |
858 | 846 | ||
859 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); | 847 | kfree_rcu(cgrp, rcu_head); |
860 | } | 848 | } |
861 | iput(inode); | 849 | iput(inode); |
862 | } | 850 | } |
@@ -1748,6 +1736,76 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1748 | } | 1736 | } |
1749 | EXPORT_SYMBOL_GPL(cgroup_path); | 1737 | EXPORT_SYMBOL_GPL(cgroup_path); |
1750 | 1738 | ||
1739 | /* | ||
1740 | * cgroup_task_migrate - move a task from one cgroup to another. | ||
1741 | * | ||
1742 | * 'guarantee' is set if the caller promises that a new css_set for the task | ||
1743 | * will already exist. If not set, this function might sleep, and can fail with | ||
1744 | * -ENOMEM. Otherwise, it can only fail with -ESRCH. | ||
1745 | */ | ||
1746 | static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | ||
1747 | struct task_struct *tsk, bool guarantee) | ||
1748 | { | ||
1749 | struct css_set *oldcg; | ||
1750 | struct css_set *newcg; | ||
1751 | |||
1752 | /* | ||
1753 | * get old css_set. we need to take task_lock and refcount it, because | ||
1754 | * an exiting task can change its css_set to init_css_set and drop its | ||
1755 | * old one without taking cgroup_mutex. | ||
1756 | */ | ||
1757 | task_lock(tsk); | ||
1758 | oldcg = tsk->cgroups; | ||
1759 | get_css_set(oldcg); | ||
1760 | task_unlock(tsk); | ||
1761 | |||
1762 | /* locate or allocate a new css_set for this task. */ | ||
1763 | if (guarantee) { | ||
1764 | /* we know the css_set we want already exists. */ | ||
1765 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | ||
1766 | read_lock(&css_set_lock); | ||
1767 | newcg = find_existing_css_set(oldcg, cgrp, template); | ||
1768 | BUG_ON(!newcg); | ||
1769 | get_css_set(newcg); | ||
1770 | read_unlock(&css_set_lock); | ||
1771 | } else { | ||
1772 | might_sleep(); | ||
1773 | /* find_css_set will give us newcg already referenced. */ | ||
1774 | newcg = find_css_set(oldcg, cgrp); | ||
1775 | if (!newcg) { | ||
1776 | put_css_set(oldcg); | ||
1777 | return -ENOMEM; | ||
1778 | } | ||
1779 | } | ||
1780 | put_css_set(oldcg); | ||
1781 | |||
1782 | /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */ | ||
1783 | task_lock(tsk); | ||
1784 | if (tsk->flags & PF_EXITING) { | ||
1785 | task_unlock(tsk); | ||
1786 | put_css_set(newcg); | ||
1787 | return -ESRCH; | ||
1788 | } | ||
1789 | rcu_assign_pointer(tsk->cgroups, newcg); | ||
1790 | task_unlock(tsk); | ||
1791 | |||
1792 | /* Update the css_set linked lists if we're using them */ | ||
1793 | write_lock(&css_set_lock); | ||
1794 | if (!list_empty(&tsk->cg_list)) | ||
1795 | list_move(&tsk->cg_list, &newcg->tasks); | ||
1796 | write_unlock(&css_set_lock); | ||
1797 | |||
1798 | /* | ||
1799 | * We just gained a reference on oldcg by taking it from the task. As | ||
1800 | * trading it for newcg is protected by cgroup_mutex, we're safe to drop | ||
1801 | * it here; it will be freed under RCU. | ||
1802 | */ | ||
1803 | put_css_set(oldcg); | ||
1804 | |||
1805 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | ||
1806 | return 0; | ||
1807 | } | ||
1808 | |||
1751 | /** | 1809 | /** |
1752 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' | 1810 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' |
1753 | * @cgrp: the cgroup the task is attaching to | 1811 | * @cgrp: the cgroup the task is attaching to |
@@ -1758,11 +1816,9 @@ EXPORT_SYMBOL_GPL(cgroup_path); | |||
1758 | */ | 1816 | */ |
1759 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1817 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
1760 | { | 1818 | { |
1761 | int retval = 0; | 1819 | int retval; |
1762 | struct cgroup_subsys *ss, *failed_ss = NULL; | 1820 | struct cgroup_subsys *ss, *failed_ss = NULL; |
1763 | struct cgroup *oldcgrp; | 1821 | struct cgroup *oldcgrp; |
1764 | struct css_set *cg; | ||
1765 | struct css_set *newcg; | ||
1766 | struct cgroupfs_root *root = cgrp->root; | 1822 | struct cgroupfs_root *root = cgrp->root; |
1767 | 1823 | ||
1768 | /* Nothing to do if the task is already in that cgroup */ | 1824 | /* Nothing to do if the task is already in that cgroup */ |
@@ -1772,7 +1828,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1772 | 1828 | ||
1773 | for_each_subsys(root, ss) { | 1829 | for_each_subsys(root, ss) { |
1774 | if (ss->can_attach) { | 1830 | if (ss->can_attach) { |
1775 | retval = ss->can_attach(ss, cgrp, tsk, false); | 1831 | retval = ss->can_attach(ss, cgrp, tsk); |
1776 | if (retval) { | 1832 | if (retval) { |
1777 | /* | 1833 | /* |
1778 | * Remember on which subsystem the can_attach() | 1834 | * Remember on which subsystem the can_attach() |
@@ -1784,46 +1840,29 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1784 | goto out; | 1840 | goto out; |
1785 | } | 1841 | } |
1786 | } | 1842 | } |
1843 | if (ss->can_attach_task) { | ||
1844 | retval = ss->can_attach_task(cgrp, tsk); | ||
1845 | if (retval) { | ||
1846 | failed_ss = ss; | ||
1847 | goto out; | ||
1848 | } | ||
1849 | } | ||
1787 | } | 1850 | } |
1788 | 1851 | ||
1789 | task_lock(tsk); | 1852 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); |
1790 | cg = tsk->cgroups; | 1853 | if (retval) |
1791 | get_css_set(cg); | ||
1792 | task_unlock(tsk); | ||
1793 | /* | ||
1794 | * Locate or allocate a new css_set for this task, | ||
1795 | * based on its final set of cgroups | ||
1796 | */ | ||
1797 | newcg = find_css_set(cg, cgrp); | ||
1798 | put_css_set(cg); | ||
1799 | if (!newcg) { | ||
1800 | retval = -ENOMEM; | ||
1801 | goto out; | 1854 | goto out; |
1802 | } | ||
1803 | |||
1804 | task_lock(tsk); | ||
1805 | if (tsk->flags & PF_EXITING) { | ||
1806 | task_unlock(tsk); | ||
1807 | put_css_set(newcg); | ||
1808 | retval = -ESRCH; | ||
1809 | goto out; | ||
1810 | } | ||
1811 | rcu_assign_pointer(tsk->cgroups, newcg); | ||
1812 | task_unlock(tsk); | ||
1813 | |||
1814 | /* Update the css_set linked lists if we're using them */ | ||
1815 | write_lock(&css_set_lock); | ||
1816 | if (!list_empty(&tsk->cg_list)) | ||
1817 | list_move(&tsk->cg_list, &newcg->tasks); | ||
1818 | write_unlock(&css_set_lock); | ||
1819 | 1855 | ||
1820 | for_each_subsys(root, ss) { | 1856 | for_each_subsys(root, ss) { |
1857 | if (ss->pre_attach) | ||
1858 | ss->pre_attach(cgrp); | ||
1859 | if (ss->attach_task) | ||
1860 | ss->attach_task(cgrp, tsk); | ||
1821 | if (ss->attach) | 1861 | if (ss->attach) |
1822 | ss->attach(ss, cgrp, oldcgrp, tsk, false); | 1862 | ss->attach(ss, cgrp, oldcgrp, tsk); |
1823 | } | 1863 | } |
1824 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1864 | |
1825 | synchronize_rcu(); | 1865 | synchronize_rcu(); |
1826 | put_css_set(cg); | ||
1827 | 1866 | ||
1828 | /* | 1867 | /* |
1829 | * wake up rmdir() waiter. the rmdir should fail since the cgroup | 1868 | * wake up rmdir() waiter. the rmdir should fail since the cgroup |
@@ -1842,7 +1881,7 @@ out: | |||
1842 | */ | 1881 | */ |
1843 | break; | 1882 | break; |
1844 | if (ss->cancel_attach) | 1883 | if (ss->cancel_attach) |
1845 | ss->cancel_attach(ss, cgrp, tsk, false); | 1884 | ss->cancel_attach(ss, cgrp, tsk); |
1846 | } | 1885 | } |
1847 | } | 1886 | } |
1848 | return retval; | 1887 | return retval; |
@@ -1873,49 +1912,370 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | |||
1873 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | 1912 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); |
1874 | 1913 | ||
1875 | /* | 1914 | /* |
1876 | * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex | 1915 | * cgroup_attach_proc works in two stages, the first of which prefetches all |
1877 | * held. May take task_lock of task | 1916 | * new css_sets needed (to make sure we have enough memory before committing |
1917 | * to the move) and stores them in a list of entries of the following type. | ||
1918 | * TODO: possible optimization: use css_set->rcu_head for chaining instead | ||
1919 | */ | ||
1920 | struct cg_list_entry { | ||
1921 | struct css_set *cg; | ||
1922 | struct list_head links; | ||
1923 | }; | ||
1924 | |||
1925 | static bool css_set_check_fetched(struct cgroup *cgrp, | ||
1926 | struct task_struct *tsk, struct css_set *cg, | ||
1927 | struct list_head *newcg_list) | ||
1928 | { | ||
1929 | struct css_set *newcg; | ||
1930 | struct cg_list_entry *cg_entry; | ||
1931 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | ||
1932 | |||
1933 | read_lock(&css_set_lock); | ||
1934 | newcg = find_existing_css_set(cg, cgrp, template); | ||
1935 | if (newcg) | ||
1936 | get_css_set(newcg); | ||
1937 | read_unlock(&css_set_lock); | ||
1938 | |||
1939 | /* doesn't exist at all? */ | ||
1940 | if (!newcg) | ||
1941 | return false; | ||
1942 | /* see if it's already in the list */ | ||
1943 | list_for_each_entry(cg_entry, newcg_list, links) { | ||
1944 | if (cg_entry->cg == newcg) { | ||
1945 | put_css_set(newcg); | ||
1946 | return true; | ||
1947 | } | ||
1948 | } | ||
1949 | |||
1950 | /* not found */ | ||
1951 | put_css_set(newcg); | ||
1952 | return false; | ||
1953 | } | ||
1954 | |||
1955 | /* | ||
1956 | * Find the new css_set and store it in the list in preparation for moving the | ||
1957 | * given task to the given cgroup. Returns 0 or -ENOMEM. | ||
1958 | */ | ||
1959 | static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, | ||
1960 | struct list_head *newcg_list) | ||
1961 | { | ||
1962 | struct css_set *newcg; | ||
1963 | struct cg_list_entry *cg_entry; | ||
1964 | |||
1965 | /* ensure a new css_set will exist for this thread */ | ||
1966 | newcg = find_css_set(cg, cgrp); | ||
1967 | if (!newcg) | ||
1968 | return -ENOMEM; | ||
1969 | /* add it to the list */ | ||
1970 | cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL); | ||
1971 | if (!cg_entry) { | ||
1972 | put_css_set(newcg); | ||
1973 | return -ENOMEM; | ||
1974 | } | ||
1975 | cg_entry->cg = newcg; | ||
1976 | list_add(&cg_entry->links, newcg_list); | ||
1977 | return 0; | ||
1978 | } | ||
1979 | |||
1980 | /** | ||
1981 | * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup | ||
1982 | * @cgrp: the cgroup to attach to | ||
1983 | * @leader: the threadgroup leader task_struct of the group to be attached | ||
1984 | * | ||
1985 | * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will | ||
1986 | * take task_lock of each thread in leader's threadgroup individually in turn. | ||
1987 | */ | ||
1988 | int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | ||
1989 | { | ||
1990 | int retval, i, group_size; | ||
1991 | struct cgroup_subsys *ss, *failed_ss = NULL; | ||
1992 | bool cancel_failed_ss = false; | ||
1993 | /* guaranteed to be initialized later, but the compiler needs this */ | ||
1994 | struct cgroup *oldcgrp = NULL; | ||
1995 | struct css_set *oldcg; | ||
1996 | struct cgroupfs_root *root = cgrp->root; | ||
1997 | /* threadgroup list cursor and array */ | ||
1998 | struct task_struct *tsk; | ||
1999 | struct flex_array *group; | ||
2000 | /* | ||
2001 | * we need to make sure we have css_sets for all the tasks we're | ||
2002 | * going to move -before- we actually start moving them, so that in | ||
2003 | * case we get an ENOMEM we can bail out before making any changes. | ||
2004 | */ | ||
2005 | struct list_head newcg_list; | ||
2006 | struct cg_list_entry *cg_entry, *temp_nobe; | ||
2007 | |||
2008 | /* | ||
2009 | * step 0: in order to do expensive, possibly blocking operations for | ||
2010 | * every thread, we cannot iterate the thread group list, since it needs | ||
2011 | * rcu or tasklist locked. instead, build an array of all threads in the | ||
2012 | * group - threadgroup_fork_lock prevents new threads from appearing, | ||
2013 | * and if threads exit, this will just be an over-estimate. | ||
2014 | */ | ||
2015 | group_size = get_nr_threads(leader); | ||
2016 | /* flex_array supports very large thread-groups better than kmalloc. */ | ||
2017 | group = flex_array_alloc(sizeof(struct task_struct *), group_size, | ||
2018 | GFP_KERNEL); | ||
2019 | if (!group) | ||
2020 | return -ENOMEM; | ||
2021 | /* pre-allocate to guarantee space while iterating in rcu read-side. */ | ||
2022 | retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL); | ||
2023 | if (retval) | ||
2024 | goto out_free_group_list; | ||
2025 | |||
2026 | /* prevent changes to the threadgroup list while we take a snapshot. */ | ||
2027 | rcu_read_lock(); | ||
2028 | if (!thread_group_leader(leader)) { | ||
2029 | /* | ||
2030 | * a race with de_thread from another thread's exec() may strip | ||
2031 | * us of our leadership, making while_each_thread unsafe to use | ||
2032 | * on this task. if this happens, there is no choice but to | ||
2033 | * throw this task away and try again (from cgroup_procs_write); | ||
2034 | * this is "double-double-toil-and-trouble-check locking". | ||
2035 | */ | ||
2036 | rcu_read_unlock(); | ||
2037 | retval = -EAGAIN; | ||
2038 | goto out_free_group_list; | ||
2039 | } | ||
2040 | /* take a reference on each task in the group to go in the array. */ | ||
2041 | tsk = leader; | ||
2042 | i = 0; | ||
2043 | do { | ||
2044 | /* as per above, nr_threads may decrease, but not increase. */ | ||
2045 | BUG_ON(i >= group_size); | ||
2046 | get_task_struct(tsk); | ||
2047 | /* | ||
2048 | * saying GFP_ATOMIC has no effect here because we did prealloc | ||
2049 | * earlier, but it's good form to communicate our expectations. | ||
2050 | */ | ||
2051 | retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC); | ||
2052 | BUG_ON(retval != 0); | ||
2053 | i++; | ||
2054 | } while_each_thread(leader, tsk); | ||
2055 | /* remember the number of threads in the array for later. */ | ||
2056 | group_size = i; | ||
2057 | rcu_read_unlock(); | ||
2058 | |||
2059 | /* | ||
2060 | * step 1: check that we can legitimately attach to the cgroup. | ||
2061 | */ | ||
2062 | for_each_subsys(root, ss) { | ||
2063 | if (ss->can_attach) { | ||
2064 | retval = ss->can_attach(ss, cgrp, leader); | ||
2065 | if (retval) { | ||
2066 | failed_ss = ss; | ||
2067 | goto out_cancel_attach; | ||
2068 | } | ||
2069 | } | ||
2070 | /* a callback to be run on every thread in the threadgroup. */ | ||
2071 | if (ss->can_attach_task) { | ||
2072 | /* run on each task in the threadgroup. */ | ||
2073 | for (i = 0; i < group_size; i++) { | ||
2074 | tsk = flex_array_get_ptr(group, i); | ||
2075 | retval = ss->can_attach_task(cgrp, tsk); | ||
2076 | if (retval) { | ||
2077 | failed_ss = ss; | ||
2078 | cancel_failed_ss = true; | ||
2079 | goto out_cancel_attach; | ||
2080 | } | ||
2081 | } | ||
2082 | } | ||
2083 | } | ||
2084 | |||
2085 | /* | ||
2086 | * step 2: make sure css_sets exist for all threads to be migrated. | ||
2087 | * we use find_css_set, which allocates a new one if necessary. | ||
2088 | */ | ||
2089 | INIT_LIST_HEAD(&newcg_list); | ||
2090 | for (i = 0; i < group_size; i++) { | ||
2091 | tsk = flex_array_get_ptr(group, i); | ||
2092 | /* nothing to do if this task is already in the cgroup */ | ||
2093 | oldcgrp = task_cgroup_from_root(tsk, root); | ||
2094 | if (cgrp == oldcgrp) | ||
2095 | continue; | ||
2096 | /* get old css_set pointer */ | ||
2097 | task_lock(tsk); | ||
2098 | if (tsk->flags & PF_EXITING) { | ||
2099 | /* ignore this task if it's going away */ | ||
2100 | task_unlock(tsk); | ||
2101 | continue; | ||
2102 | } | ||
2103 | oldcg = tsk->cgroups; | ||
2104 | get_css_set(oldcg); | ||
2105 | task_unlock(tsk); | ||
2106 | /* see if the new one for us is already in the list? */ | ||
2107 | if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) { | ||
2108 | /* was already there, nothing to do. */ | ||
2109 | put_css_set(oldcg); | ||
2110 | } else { | ||
2111 | /* we don't already have it. get new one. */ | ||
2112 | retval = css_set_prefetch(cgrp, oldcg, &newcg_list); | ||
2113 | put_css_set(oldcg); | ||
2114 | if (retval) | ||
2115 | goto out_list_teardown; | ||
2116 | } | ||
2117 | } | ||
2118 | |||
2119 | /* | ||
2120 | * step 3: now that we're guaranteed success wrt the css_sets, proceed | ||
2121 | * to move all tasks to the new cgroup, calling ss->attach_task for each | ||
2122 | * one along the way. there are no failure cases after here, so this is | ||
2123 | * the commit point. | ||
2124 | */ | ||
2125 | for_each_subsys(root, ss) { | ||
2126 | if (ss->pre_attach) | ||
2127 | ss->pre_attach(cgrp); | ||
2128 | } | ||
2129 | for (i = 0; i < group_size; i++) { | ||
2130 | tsk = flex_array_get_ptr(group, i); | ||
2131 | /* leave current thread as it is if it's already there */ | ||
2132 | oldcgrp = task_cgroup_from_root(tsk, root); | ||
2133 | if (cgrp == oldcgrp) | ||
2134 | continue; | ||
2135 | /* attach each task to each subsystem */ | ||
2136 | for_each_subsys(root, ss) { | ||
2137 | if (ss->attach_task) | ||
2138 | ss->attach_task(cgrp, tsk); | ||
2139 | } | ||
2140 | /* if the thread is PF_EXITING, it can just get skipped. */ | ||
2141 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); | ||
2142 | BUG_ON(retval != 0 && retval != -ESRCH); | ||
2143 | } | ||
2144 | /* nothing is sensitive to fork() after this point. */ | ||
2145 | |||
2146 | /* | ||
2147 | * step 4: do expensive, non-thread-specific subsystem callbacks. | ||
2148 | * TODO: if ever a subsystem needs to know the oldcgrp for each task | ||
2149 | * being moved, this call will need to be reworked to communicate that. | ||
2150 | */ | ||
2151 | for_each_subsys(root, ss) { | ||
2152 | if (ss->attach) | ||
2153 | ss->attach(ss, cgrp, oldcgrp, leader); | ||
2154 | } | ||
2155 | |||
2156 | /* | ||
2157 | * step 5: success! and cleanup | ||
2158 | */ | ||
2159 | synchronize_rcu(); | ||
2160 | cgroup_wakeup_rmdir_waiter(cgrp); | ||
2161 | retval = 0; | ||
2162 | out_list_teardown: | ||
2163 | /* clean up the list of prefetched css_sets. */ | ||
2164 | list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) { | ||
2165 | list_del(&cg_entry->links); | ||
2166 | put_css_set(cg_entry->cg); | ||
2167 | kfree(cg_entry); | ||
2168 | } | ||
2169 | out_cancel_attach: | ||
2170 | /* same deal as in cgroup_attach_task */ | ||
2171 | if (retval) { | ||
2172 | for_each_subsys(root, ss) { | ||
2173 | if (ss == failed_ss) { | ||
2174 | if (cancel_failed_ss && ss->cancel_attach) | ||
2175 | ss->cancel_attach(ss, cgrp, leader); | ||
2176 | break; | ||
2177 | } | ||
2178 | if (ss->cancel_attach) | ||
2179 | ss->cancel_attach(ss, cgrp, leader); | ||
2180 | } | ||
2181 | } | ||
2182 | /* clean up the array of referenced threads in the group. */ | ||
2183 | for (i = 0; i < group_size; i++) { | ||
2184 | tsk = flex_array_get_ptr(group, i); | ||
2185 | put_task_struct(tsk); | ||
2186 | } | ||
2187 | out_free_group_list: | ||
2188 | flex_array_free(group); | ||
2189 | return retval; | ||
2190 | } | ||
2191 | |||
2192 | /* | ||
2193 | * Find the task_struct of the task to attach by vpid and pass it along to the | ||
2194 | * function to attach either it or all tasks in its threadgroup. Will take | ||
2195 | * cgroup_mutex; may take task_lock of task. | ||
1878 | */ | 2196 | */ |
1879 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) | 2197 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) |
1880 | { | 2198 | { |
1881 | struct task_struct *tsk; | 2199 | struct task_struct *tsk; |
1882 | const struct cred *cred = current_cred(), *tcred; | 2200 | const struct cred *cred = current_cred(), *tcred; |
1883 | int ret; | 2201 | int ret; |
1884 | 2202 | ||
2203 | if (!cgroup_lock_live_group(cgrp)) | ||
2204 | return -ENODEV; | ||
2205 | |||
1885 | if (pid) { | 2206 | if (pid) { |
1886 | rcu_read_lock(); | 2207 | rcu_read_lock(); |
1887 | tsk = find_task_by_vpid(pid); | 2208 | tsk = find_task_by_vpid(pid); |
1888 | if (!tsk || tsk->flags & PF_EXITING) { | 2209 | if (!tsk) { |
1889 | rcu_read_unlock(); | 2210 | rcu_read_unlock(); |
2211 | cgroup_unlock(); | ||
2212 | return -ESRCH; | ||
2213 | } | ||
2214 | if (threadgroup) { | ||
2215 | /* | ||
2216 | * RCU protects this access, since tsk was found in the | ||
2217 | * tid map. a race with de_thread may cause group_leader | ||
2218 | * to stop being the leader, but cgroup_attach_proc will | ||
2219 | * detect it later. | ||
2220 | */ | ||
2221 | tsk = tsk->group_leader; | ||
2222 | } else if (tsk->flags & PF_EXITING) { | ||
2223 | /* optimization for the single-task-only case */ | ||
2224 | rcu_read_unlock(); | ||
2225 | cgroup_unlock(); | ||
1890 | return -ESRCH; | 2226 | return -ESRCH; |
1891 | } | 2227 | } |
1892 | 2228 | ||
2229 | /* | ||
2230 | * even if we're attaching all tasks in the thread group, we | ||
2231 | * only need to check permissions on one of them. | ||
2232 | */ | ||
1893 | tcred = __task_cred(tsk); | 2233 | tcred = __task_cred(tsk); |
1894 | if (cred->euid && | 2234 | if (cred->euid && |
1895 | cred->euid != tcred->uid && | 2235 | cred->euid != tcred->uid && |
1896 | cred->euid != tcred->suid) { | 2236 | cred->euid != tcred->suid) { |
1897 | rcu_read_unlock(); | 2237 | rcu_read_unlock(); |
2238 | cgroup_unlock(); | ||
1898 | return -EACCES; | 2239 | return -EACCES; |
1899 | } | 2240 | } |
1900 | get_task_struct(tsk); | 2241 | get_task_struct(tsk); |
1901 | rcu_read_unlock(); | 2242 | rcu_read_unlock(); |
1902 | } else { | 2243 | } else { |
1903 | tsk = current; | 2244 | if (threadgroup) |
2245 | tsk = current->group_leader; | ||
2246 | else | ||
2247 | tsk = current; | ||
1904 | get_task_struct(tsk); | 2248 | get_task_struct(tsk); |
1905 | } | 2249 | } |
1906 | 2250 | ||
1907 | ret = cgroup_attach_task(cgrp, tsk); | 2251 | if (threadgroup) { |
2252 | threadgroup_fork_write_lock(tsk); | ||
2253 | ret = cgroup_attach_proc(cgrp, tsk); | ||
2254 | threadgroup_fork_write_unlock(tsk); | ||
2255 | } else { | ||
2256 | ret = cgroup_attach_task(cgrp, tsk); | ||
2257 | } | ||
1908 | put_task_struct(tsk); | 2258 | put_task_struct(tsk); |
2259 | cgroup_unlock(); | ||
1909 | return ret; | 2260 | return ret; |
1910 | } | 2261 | } |
1911 | 2262 | ||
1912 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) | 2263 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) |
1913 | { | 2264 | { |
2265 | return attach_task_by_pid(cgrp, pid, false); | ||
2266 | } | ||
2267 | |||
2268 | static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) | ||
2269 | { | ||
1914 | int ret; | 2270 | int ret; |
1915 | if (!cgroup_lock_live_group(cgrp)) | 2271 | do { |
1916 | return -ENODEV; | 2272 | /* |
1917 | ret = attach_task_by_pid(cgrp, pid); | 2273 | * attach_proc fails with -EAGAIN if threadgroup leadership |
1918 | cgroup_unlock(); | 2274 | * changes in the middle of the operation, in which case we need |
2275 | * to find the task_struct for the new leader and start over. | ||
2276 | */ | ||
2277 | ret = attach_task_by_pid(cgrp, tgid, true); | ||
2278 | } while (ret == -EAGAIN); | ||
1919 | return ret; | 2279 | return ret; |
1920 | } | 2280 | } |
1921 | 2281 | ||
@@ -3272,9 +3632,9 @@ static struct cftype files[] = { | |||
3272 | { | 3632 | { |
3273 | .name = CGROUP_FILE_GENERIC_PREFIX "procs", | 3633 | .name = CGROUP_FILE_GENERIC_PREFIX "procs", |
3274 | .open = cgroup_procs_open, | 3634 | .open = cgroup_procs_open, |
3275 | /* .write_u64 = cgroup_procs_write, TODO */ | 3635 | .write_u64 = cgroup_procs_write, |
3276 | .release = cgroup_pidlist_release, | 3636 | .release = cgroup_pidlist_release, |
3277 | .mode = S_IRUGO, | 3637 | .mode = S_IRUGO | S_IWUSR, |
3278 | }, | 3638 | }, |
3279 | { | 3639 | { |
3280 | .name = "notify_on_release", | 3640 | .name = "notify_on_release", |
@@ -4270,122 +4630,6 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4270 | } | 4630 | } |
4271 | 4631 | ||
4272 | /** | 4632 | /** |
4273 | * cgroup_clone - clone the cgroup the given subsystem is attached to | ||
4274 | * @tsk: the task to be moved | ||
4275 | * @subsys: the given subsystem | ||
4276 | * @nodename: the name for the new cgroup | ||
4277 | * | ||
4278 | * Duplicate the current cgroup in the hierarchy that the given | ||
4279 | * subsystem is attached to, and move this task into the new | ||
4280 | * child. | ||
4281 | */ | ||
4282 | int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | ||
4283 | char *nodename) | ||
4284 | { | ||
4285 | struct dentry *dentry; | ||
4286 | int ret = 0; | ||
4287 | struct cgroup *parent, *child; | ||
4288 | struct inode *inode; | ||
4289 | struct css_set *cg; | ||
4290 | struct cgroupfs_root *root; | ||
4291 | struct cgroup_subsys *ss; | ||
4292 | |||
4293 | /* We shouldn't be called by an unregistered subsystem */ | ||
4294 | BUG_ON(!subsys->active); | ||
4295 | |||
4296 | /* First figure out what hierarchy and cgroup we're dealing | ||
4297 | * with, and pin them so we can drop cgroup_mutex */ | ||
4298 | mutex_lock(&cgroup_mutex); | ||
4299 | again: | ||
4300 | root = subsys->root; | ||
4301 | if (root == &rootnode) { | ||
4302 | mutex_unlock(&cgroup_mutex); | ||
4303 | return 0; | ||
4304 | } | ||
4305 | |||
4306 | /* Pin the hierarchy */ | ||
4307 | if (!atomic_inc_not_zero(&root->sb->s_active)) { | ||
4308 | /* We race with the final deactivate_super() */ | ||
4309 | mutex_unlock(&cgroup_mutex); | ||
4310 | return 0; | ||
4311 | } | ||
4312 | |||
4313 | /* Keep the cgroup alive */ | ||
4314 | task_lock(tsk); | ||
4315 | parent = task_cgroup(tsk, subsys->subsys_id); | ||
4316 | cg = tsk->cgroups; | ||
4317 | get_css_set(cg); | ||
4318 | task_unlock(tsk); | ||
4319 | |||
4320 | mutex_unlock(&cgroup_mutex); | ||
4321 | |||
4322 | /* Now do the VFS work to create a cgroup */ | ||
4323 | inode = parent->dentry->d_inode; | ||
4324 | |||
4325 | /* Hold the parent directory mutex across this operation to | ||
4326 | * stop anyone else deleting the new cgroup */ | ||
4327 | mutex_lock(&inode->i_mutex); | ||
4328 | dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename)); | ||
4329 | if (IS_ERR(dentry)) { | ||
4330 | printk(KERN_INFO | ||
4331 | "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename, | ||
4332 | PTR_ERR(dentry)); | ||
4333 | ret = PTR_ERR(dentry); | ||
4334 | goto out_release; | ||
4335 | } | ||
4336 | |||
4337 | /* Create the cgroup directory, which also creates the cgroup */ | ||
4338 | ret = vfs_mkdir(inode, dentry, 0755); | ||
4339 | child = __d_cgrp(dentry); | ||
4340 | dput(dentry); | ||
4341 | if (ret) { | ||
4342 | printk(KERN_INFO | ||
4343 | "Failed to create cgroup %s: %d\n", nodename, | ||
4344 | ret); | ||
4345 | goto out_release; | ||
4346 | } | ||
4347 | |||
4348 | /* The cgroup now exists. Retake cgroup_mutex and check | ||
4349 | * that we're still in the same state that we thought we | ||
4350 | * were. */ | ||
4351 | mutex_lock(&cgroup_mutex); | ||
4352 | if ((root != subsys->root) || | ||
4353 | (parent != task_cgroup(tsk, subsys->subsys_id))) { | ||
4354 | /* Aargh, we raced ... */ | ||
4355 | mutex_unlock(&inode->i_mutex); | ||
4356 | put_css_set(cg); | ||
4357 | |||
4358 | deactivate_super(root->sb); | ||
4359 | /* The cgroup is still accessible in the VFS, but | ||
4360 | * we're not going to try to rmdir() it at this | ||
4361 | * point. */ | ||
4362 | printk(KERN_INFO | ||
4363 | "Race in cgroup_clone() - leaking cgroup %s\n", | ||
4364 | nodename); | ||
4365 | goto again; | ||
4366 | } | ||
4367 | |||
4368 | /* do any required auto-setup */ | ||
4369 | for_each_subsys(root, ss) { | ||
4370 | if (ss->post_clone) | ||
4371 | ss->post_clone(ss, child); | ||
4372 | } | ||
4373 | |||
4374 | /* All seems fine. Finish by moving the task into the new cgroup */ | ||
4375 | ret = cgroup_attach_task(child, tsk); | ||
4376 | mutex_unlock(&cgroup_mutex); | ||
4377 | |||
4378 | out_release: | ||
4379 | mutex_unlock(&inode->i_mutex); | ||
4380 | |||
4381 | mutex_lock(&cgroup_mutex); | ||
4382 | put_css_set(cg); | ||
4383 | mutex_unlock(&cgroup_mutex); | ||
4384 | deactivate_super(root->sb); | ||
4385 | return ret; | ||
4386 | } | ||
4387 | |||
4388 | /** | ||
4389 | * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp | 4633 | * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp |
4390 | * @cgrp: the cgroup in question | 4634 | * @cgrp: the cgroup in question |
4391 | * @task: the task in question | 4635 | * @task: the task in question |
@@ -4623,14 +4867,6 @@ bool css_is_ancestor(struct cgroup_subsys_state *child, | |||
4623 | return ret; | 4867 | return ret; |
4624 | } | 4868 | } |
4625 | 4869 | ||
4626 | static void __free_css_id_cb(struct rcu_head *head) | ||
4627 | { | ||
4628 | struct css_id *id; | ||
4629 | |||
4630 | id = container_of(head, struct css_id, rcu_head); | ||
4631 | kfree(id); | ||
4632 | } | ||
4633 | |||
4634 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | 4870 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) |
4635 | { | 4871 | { |
4636 | struct css_id *id = css->id; | 4872 | struct css_id *id = css->id; |
@@ -4645,7 +4881,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | |||
4645 | spin_lock(&ss->id_lock); | 4881 | spin_lock(&ss->id_lock); |
4646 | idr_remove(&ss->idr, id->id); | 4882 | idr_remove(&ss->idr, id->id); |
4647 | spin_unlock(&ss->id_lock); | 4883 | spin_unlock(&ss->id_lock); |
4648 | call_rcu(&id->rcu_head, __free_css_id_cb); | 4884 | kfree_rcu(id, rcu_head); |
4649 | } | 4885 | } |
4650 | EXPORT_SYMBOL_GPL(free_css_id); | 4886 | EXPORT_SYMBOL_GPL(free_css_id); |
4651 | 4887 | ||