aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-09 15:59:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-09 15:59:24 -0500
commitdb0c2bf69aa095d4a6de7b1145f29fe9a7c0f6a3 (patch)
tree8f38957c01b18edddd44d49ecc3beeac08a20b4e /kernel
parentac69e0928054ff29a5049902fb477f9c7605c773 (diff)
parent0d19ea866562e46989412a0676412fa0983c9ce7 (diff)
Merge branch 'for-3.3' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
* 'for-3.3' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (21 commits) cgroup: fix to allow mounting a hierarchy by name cgroup: move assignement out of condition in cgroup_attach_proc() cgroup: Remove task_lock() from cgroup_post_fork() cgroup: add sparse annotation to cgroup_iter_start() and cgroup_iter_end() cgroup: mark cgroup_rmdir_waitq and cgroup_attach_proc() as static cgroup: only need to check oldcgrp==newgrp once cgroup: remove redundant get/put of task struct cgroup: remove redundant get/put of old css_set from migrate cgroup: Remove unnecessary task_lock before fetching css_set on migration cgroup: Drop task_lock(parent) on cgroup_fork() cgroups: remove redundant get/put of css_set from css_set_check_fetched() resource cgroups: remove bogus cast cgroup: kill subsys->can_attach_task(), pre_attach() and attach_task() cgroup, cpuset: don't use ss->pre_attach() cgroup: don't use subsys->can_attach_task() or ->attach_task() cgroup: introduce cgroup_taskset and use it in subsys->can_attach(), cancel_attach() and attach() cgroup: improve old cgroup handling in cgroup_attach_proc() cgroup: always lock threadgroup during migration threadgroup: extend threadgroup_lock() to cover exit and exec threadgroup: rename signal->threadgroup_fork_lock to ->group_rwsem ... Fix up conflict in kernel/cgroup.c due to commit e0197aae59e5: "cgroups: fix a css_set not found bug in cgroup_attach_proc" that already mentioned that the bug is fixed (differently) in Tejun's cgroup patchset. This one, in other words.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c401
-rw-r--r--kernel/cgroup_freezer.c16
-rw-r--r--kernel/cpuset.c105
-rw-r--r--kernel/events/core.c13
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/res_counter.c3
-rw-r--r--kernel/sched/core.c31
-rw-r--r--kernel/signal.c10
8 files changed, 326 insertions, 261 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7cab65f83f1d..a5d3b5325f77 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -63,7 +63,24 @@
63 63
64#include <linux/atomic.h> 64#include <linux/atomic.h>
65 65
66/*
67 * cgroup_mutex is the master lock. Any modification to cgroup or its
68 * hierarchy must be performed while holding it.
69 *
70 * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify
71 * cgroupfs_root of any cgroup hierarchy - subsys list, flags,
72 * release_agent_path and so on. Modifying requires both cgroup_mutex and
73 * cgroup_root_mutex. Readers can acquire either of the two. This is to
74 * break the following locking order cycle.
75 *
76 * A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
77 * B. namespace_sem -> cgroup_mutex
78 *
79 * B happens only through cgroup_show_options() and using cgroup_root_mutex
80 * breaks it.
81 */
66static DEFINE_MUTEX(cgroup_mutex); 82static DEFINE_MUTEX(cgroup_mutex);
83static DEFINE_MUTEX(cgroup_root_mutex);
67 84
68/* 85/*
69 * Generate an array of cgroup subsystem pointers. At boot time, this is 86 * Generate an array of cgroup subsystem pointers. At boot time, this is
@@ -921,7 +938,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
921 * 938 *
922 * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; 939 * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
923 */ 940 */
924DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); 941static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
925 942
926static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) 943static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
927{ 944{
@@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
953 int i; 970 int i;
954 971
955 BUG_ON(!mutex_is_locked(&cgroup_mutex)); 972 BUG_ON(!mutex_is_locked(&cgroup_mutex));
973 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
956 974
957 removed_bits = root->actual_subsys_bits & ~final_bits; 975 removed_bits = root->actual_subsys_bits & ~final_bits;
958 added_bits = final_bits & ~root->actual_subsys_bits; 976 added_bits = final_bits & ~root->actual_subsys_bits;
@@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1043 struct cgroupfs_root *root = dentry->d_sb->s_fs_info; 1061 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
1044 struct cgroup_subsys *ss; 1062 struct cgroup_subsys *ss;
1045 1063
1046 mutex_lock(&cgroup_mutex); 1064 mutex_lock(&cgroup_root_mutex);
1047 for_each_subsys(root, ss) 1065 for_each_subsys(root, ss)
1048 seq_printf(seq, ",%s", ss->name); 1066 seq_printf(seq, ",%s", ss->name);
1049 if (test_bit(ROOT_NOPREFIX, &root->flags)) 1067 if (test_bit(ROOT_NOPREFIX, &root->flags))
@@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1054 seq_puts(seq, ",clone_children"); 1072 seq_puts(seq, ",clone_children");
1055 if (strlen(root->name)) 1073 if (strlen(root->name))
1056 seq_printf(seq, ",name=%s", root->name); 1074 seq_printf(seq, ",name=%s", root->name);
1057 mutex_unlock(&cgroup_mutex); 1075 mutex_unlock(&cgroup_root_mutex);
1058 return 0; 1076 return 0;
1059} 1077}
1060 1078
@@ -1175,10 +1193,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1175 1193
1176 /* 1194 /*
1177 * If the 'all' option was specified select all the subsystems, 1195 * If the 'all' option was specified select all the subsystems,
1178 * otherwise 'all, 'none' and a subsystem name options were not 1196 * otherwise if 'none', 'name=' and a subsystem name options
1179 * specified, let's default to 'all' 1197 * were not specified, let's default to 'all'
1180 */ 1198 */
1181 if (all_ss || (!all_ss && !one_ss && !opts->none)) { 1199 if (all_ss || (!one_ss && !opts->none && !opts->name)) {
1182 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1200 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1183 struct cgroup_subsys *ss = subsys[i]; 1201 struct cgroup_subsys *ss = subsys[i];
1184 if (ss == NULL) 1202 if (ss == NULL)
@@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1269 1287
1270 mutex_lock(&cgrp->dentry->d_inode->i_mutex); 1288 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1271 mutex_lock(&cgroup_mutex); 1289 mutex_lock(&cgroup_mutex);
1290 mutex_lock(&cgroup_root_mutex);
1272 1291
1273 /* See what subsystems are wanted */ 1292 /* See what subsystems are wanted */
1274 ret = parse_cgroupfs_options(data, &opts); 1293 ret = parse_cgroupfs_options(data, &opts);
@@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1297 out_unlock: 1316 out_unlock:
1298 kfree(opts.release_agent); 1317 kfree(opts.release_agent);
1299 kfree(opts.name); 1318 kfree(opts.name);
1319 mutex_unlock(&cgroup_root_mutex);
1300 mutex_unlock(&cgroup_mutex); 1320 mutex_unlock(&cgroup_mutex);
1301 mutex_unlock(&cgrp->dentry->d_inode->i_mutex); 1321 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1302 return ret; 1322 return ret;
@@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1481 int ret = 0; 1501 int ret = 0;
1482 struct super_block *sb; 1502 struct super_block *sb;
1483 struct cgroupfs_root *new_root; 1503 struct cgroupfs_root *new_root;
1504 struct inode *inode;
1484 1505
1485 /* First find the desired set of subsystems */ 1506 /* First find the desired set of subsystems */
1486 mutex_lock(&cgroup_mutex); 1507 mutex_lock(&cgroup_mutex);
@@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1514 /* We used the new root structure, so this is a new hierarchy */ 1535 /* We used the new root structure, so this is a new hierarchy */
1515 struct list_head tmp_cg_links; 1536 struct list_head tmp_cg_links;
1516 struct cgroup *root_cgrp = &root->top_cgroup; 1537 struct cgroup *root_cgrp = &root->top_cgroup;
1517 struct inode *inode;
1518 struct cgroupfs_root *existing_root; 1538 struct cgroupfs_root *existing_root;
1519 const struct cred *cred; 1539 const struct cred *cred;
1520 int i; 1540 int i;
@@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1528 1548
1529 mutex_lock(&inode->i_mutex); 1549 mutex_lock(&inode->i_mutex);
1530 mutex_lock(&cgroup_mutex); 1550 mutex_lock(&cgroup_mutex);
1551 mutex_lock(&cgroup_root_mutex);
1531 1552
1532 if (strlen(root->name)) { 1553 /* Check for name clashes with existing mounts */
1533 /* Check for name clashes with existing mounts */ 1554 ret = -EBUSY;
1534 for_each_active_root(existing_root) { 1555 if (strlen(root->name))
1535 if (!strcmp(existing_root->name, root->name)) { 1556 for_each_active_root(existing_root)
1536 ret = -EBUSY; 1557 if (!strcmp(existing_root->name, root->name))
1537 mutex_unlock(&cgroup_mutex); 1558 goto unlock_drop;
1538 mutex_unlock(&inode->i_mutex);
1539 goto drop_new_super;
1540 }
1541 }
1542 }
1543 1559
1544 /* 1560 /*
1545 * We're accessing css_set_count without locking 1561 * We're accessing css_set_count without locking
@@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1549 * have some link structures left over 1565 * have some link structures left over
1550 */ 1566 */
1551 ret = allocate_cg_links(css_set_count, &tmp_cg_links); 1567 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1552 if (ret) { 1568 if (ret)
1553 mutex_unlock(&cgroup_mutex); 1569 goto unlock_drop;
1554 mutex_unlock(&inode->i_mutex);
1555 goto drop_new_super;
1556 }
1557 1570
1558 ret = rebind_subsystems(root, root->subsys_bits); 1571 ret = rebind_subsystems(root, root->subsys_bits);
1559 if (ret == -EBUSY) { 1572 if (ret == -EBUSY) {
1560 mutex_unlock(&cgroup_mutex);
1561 mutex_unlock(&inode->i_mutex);
1562 free_cg_links(&tmp_cg_links); 1573 free_cg_links(&tmp_cg_links);
1563 goto drop_new_super; 1574 goto unlock_drop;
1564 } 1575 }
1565 /* 1576 /*
1566 * There must be no failure case after here, since rebinding 1577 * There must be no failure case after here, since rebinding
@@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1599 cred = override_creds(&init_cred); 1610 cred = override_creds(&init_cred);
1600 cgroup_populate_dir(root_cgrp); 1611 cgroup_populate_dir(root_cgrp);
1601 revert_creds(cred); 1612 revert_creds(cred);
1613 mutex_unlock(&cgroup_root_mutex);
1602 mutex_unlock(&cgroup_mutex); 1614 mutex_unlock(&cgroup_mutex);
1603 mutex_unlock(&inode->i_mutex); 1615 mutex_unlock(&inode->i_mutex);
1604 } else { 1616 } else {
@@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1615 kfree(opts.name); 1627 kfree(opts.name);
1616 return dget(sb->s_root); 1628 return dget(sb->s_root);
1617 1629
1630 unlock_drop:
1631 mutex_unlock(&cgroup_root_mutex);
1632 mutex_unlock(&cgroup_mutex);
1633 mutex_unlock(&inode->i_mutex);
1618 drop_new_super: 1634 drop_new_super:
1619 deactivate_locked_super(sb); 1635 deactivate_locked_super(sb);
1620 drop_modules: 1636 drop_modules:
@@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
1639 BUG_ON(!list_empty(&cgrp->sibling)); 1655 BUG_ON(!list_empty(&cgrp->sibling));
1640 1656
1641 mutex_lock(&cgroup_mutex); 1657 mutex_lock(&cgroup_mutex);
1658 mutex_lock(&cgroup_root_mutex);
1642 1659
1643 /* Rebind all subsystems back to the default hierarchy */ 1660 /* Rebind all subsystems back to the default hierarchy */
1644 ret = rebind_subsystems(root, 0); 1661 ret = rebind_subsystems(root, 0);
@@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
1664 root_count--; 1681 root_count--;
1665 } 1682 }
1666 1683
1684 mutex_unlock(&cgroup_root_mutex);
1667 mutex_unlock(&cgroup_mutex); 1685 mutex_unlock(&cgroup_mutex);
1668 1686
1669 kill_litter_super(sb); 1687 kill_litter_super(sb);
@@ -1740,11 +1758,90 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1740EXPORT_SYMBOL_GPL(cgroup_path); 1758EXPORT_SYMBOL_GPL(cgroup_path);
1741 1759
1742/* 1760/*
1761 * Control Group taskset
1762 */
1763struct task_and_cgroup {
1764 struct task_struct *task;
1765 struct cgroup *cgrp;
1766};
1767
1768struct cgroup_taskset {
1769 struct task_and_cgroup single;
1770 struct flex_array *tc_array;
1771 int tc_array_len;
1772 int idx;
1773 struct cgroup *cur_cgrp;
1774};
1775
1776/**
1777 * cgroup_taskset_first - reset taskset and return the first task
1778 * @tset: taskset of interest
1779 *
1780 * @tset iteration is initialized and the first task is returned.
1781 */
1782struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
1783{
1784 if (tset->tc_array) {
1785 tset->idx = 0;
1786 return cgroup_taskset_next(tset);
1787 } else {
1788 tset->cur_cgrp = tset->single.cgrp;
1789 return tset->single.task;
1790 }
1791}
1792EXPORT_SYMBOL_GPL(cgroup_taskset_first);
1793
1794/**
1795 * cgroup_taskset_next - iterate to the next task in taskset
1796 * @tset: taskset of interest
1797 *
1798 * Return the next task in @tset. Iteration must have been initialized
1799 * with cgroup_taskset_first().
1800 */
1801struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
1802{
1803 struct task_and_cgroup *tc;
1804
1805 if (!tset->tc_array || tset->idx >= tset->tc_array_len)
1806 return NULL;
1807
1808 tc = flex_array_get(tset->tc_array, tset->idx++);
1809 tset->cur_cgrp = tc->cgrp;
1810 return tc->task;
1811}
1812EXPORT_SYMBOL_GPL(cgroup_taskset_next);
1813
1814/**
1815 * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task
1816 * @tset: taskset of interest
1817 *
1818 * Return the cgroup for the current (last returned) task of @tset. This
1819 * function must be preceded by either cgroup_taskset_first() or
1820 * cgroup_taskset_next().
1821 */
1822struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
1823{
1824 return tset->cur_cgrp;
1825}
1826EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
1827
1828/**
1829 * cgroup_taskset_size - return the number of tasks in taskset
1830 * @tset: taskset of interest
1831 */
1832int cgroup_taskset_size(struct cgroup_taskset *tset)
1833{
1834 return tset->tc_array ? tset->tc_array_len : 1;
1835}
1836EXPORT_SYMBOL_GPL(cgroup_taskset_size);
1837
1838
1839/*
1743 * cgroup_task_migrate - move a task from one cgroup to another. 1840 * cgroup_task_migrate - move a task from one cgroup to another.
1744 * 1841 *
1745 * 'guarantee' is set if the caller promises that a new css_set for the task 1842 * 'guarantee' is set if the caller promises that a new css_set for the task
1746 * will already exist. If not set, this function might sleep, and can fail with 1843 * will already exist. If not set, this function might sleep, and can fail with
1747 * -ENOMEM. Otherwise, it can only fail with -ESRCH. 1844 * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
1748 */ 1845 */
1749static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, 1846static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1750 struct task_struct *tsk, bool guarantee) 1847 struct task_struct *tsk, bool guarantee)
@@ -1753,14 +1850,12 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1753 struct css_set *newcg; 1850 struct css_set *newcg;
1754 1851
1755 /* 1852 /*
1756 * get old css_set. we need to take task_lock and refcount it, because 1853 * We are synchronized through threadgroup_lock() against PF_EXITING
1757 * an exiting task can change its css_set to init_css_set and drop its 1854 * setting such that we can't race against cgroup_exit() changing the
1758 * old one without taking cgroup_mutex. 1855 * css_set to init_css_set and dropping the old one.
1759 */ 1856 */
1760 task_lock(tsk); 1857 WARN_ON_ONCE(tsk->flags & PF_EXITING);
1761 oldcg = tsk->cgroups; 1858 oldcg = tsk->cgroups;
1762 get_css_set(oldcg);
1763 task_unlock(tsk);
1764 1859
1765 /* locate or allocate a new css_set for this task. */ 1860 /* locate or allocate a new css_set for this task. */
1766 if (guarantee) { 1861 if (guarantee) {
@@ -1775,20 +1870,11 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1775 might_sleep(); 1870 might_sleep();
1776 /* find_css_set will give us newcg already referenced. */ 1871 /* find_css_set will give us newcg already referenced. */
1777 newcg = find_css_set(oldcg, cgrp); 1872 newcg = find_css_set(oldcg, cgrp);
1778 if (!newcg) { 1873 if (!newcg)
1779 put_css_set(oldcg);
1780 return -ENOMEM; 1874 return -ENOMEM;
1781 }
1782 } 1875 }
1783 put_css_set(oldcg);
1784 1876
1785 /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */
1786 task_lock(tsk); 1877 task_lock(tsk);
1787 if (tsk->flags & PF_EXITING) {
1788 task_unlock(tsk);
1789 put_css_set(newcg);
1790 return -ESRCH;
1791 }
1792 rcu_assign_pointer(tsk->cgroups, newcg); 1878 rcu_assign_pointer(tsk->cgroups, newcg);
1793 task_unlock(tsk); 1879 task_unlock(tsk);
1794 1880
@@ -1814,8 +1900,8 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1814 * @cgrp: the cgroup the task is attaching to 1900 * @cgrp: the cgroup the task is attaching to
1815 * @tsk: the task to be attached 1901 * @tsk: the task to be attached
1816 * 1902 *
1817 * Call holding cgroup_mutex. May take task_lock of 1903 * Call with cgroup_mutex and threadgroup locked. May take task_lock of
1818 * the task 'tsk' during call. 1904 * @tsk during call.
1819 */ 1905 */
1820int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1906int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1821{ 1907{
@@ -1823,15 +1909,23 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1823 struct cgroup_subsys *ss, *failed_ss = NULL; 1909 struct cgroup_subsys *ss, *failed_ss = NULL;
1824 struct cgroup *oldcgrp; 1910 struct cgroup *oldcgrp;
1825 struct cgroupfs_root *root = cgrp->root; 1911 struct cgroupfs_root *root = cgrp->root;
1912 struct cgroup_taskset tset = { };
1913
1914 /* @tsk either already exited or can't exit until the end */
1915 if (tsk->flags & PF_EXITING)
1916 return -ESRCH;
1826 1917
1827 /* Nothing to do if the task is already in that cgroup */ 1918 /* Nothing to do if the task is already in that cgroup */
1828 oldcgrp = task_cgroup_from_root(tsk, root); 1919 oldcgrp = task_cgroup_from_root(tsk, root);
1829 if (cgrp == oldcgrp) 1920 if (cgrp == oldcgrp)
1830 return 0; 1921 return 0;
1831 1922
1923 tset.single.task = tsk;
1924 tset.single.cgrp = oldcgrp;
1925
1832 for_each_subsys(root, ss) { 1926 for_each_subsys(root, ss) {
1833 if (ss->can_attach) { 1927 if (ss->can_attach) {
1834 retval = ss->can_attach(ss, cgrp, tsk); 1928 retval = ss->can_attach(ss, cgrp, &tset);
1835 if (retval) { 1929 if (retval) {
1836 /* 1930 /*
1837 * Remember on which subsystem the can_attach() 1931 * Remember on which subsystem the can_attach()
@@ -1843,13 +1937,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1843 goto out; 1937 goto out;
1844 } 1938 }
1845 } 1939 }
1846 if (ss->can_attach_task) {
1847 retval = ss->can_attach_task(cgrp, tsk);
1848 if (retval) {
1849 failed_ss = ss;
1850 goto out;
1851 }
1852 }
1853 } 1940 }
1854 1941
1855 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); 1942 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
@@ -1857,12 +1944,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1857 goto out; 1944 goto out;
1858 1945
1859 for_each_subsys(root, ss) { 1946 for_each_subsys(root, ss) {
1860 if (ss->pre_attach)
1861 ss->pre_attach(cgrp);
1862 if (ss->attach_task)
1863 ss->attach_task(cgrp, tsk);
1864 if (ss->attach) 1947 if (ss->attach)
1865 ss->attach(ss, cgrp, oldcgrp, tsk); 1948 ss->attach(ss, cgrp, &tset);
1866 } 1949 }
1867 1950
1868 synchronize_rcu(); 1951 synchronize_rcu();
@@ -1884,7 +1967,7 @@ out:
1884 */ 1967 */
1885 break; 1968 break;
1886 if (ss->cancel_attach) 1969 if (ss->cancel_attach)
1887 ss->cancel_attach(ss, cgrp, tsk); 1970 ss->cancel_attach(ss, cgrp, &tset);
1888 } 1971 }
1889 } 1972 }
1890 return retval; 1973 return retval;
@@ -1935,23 +2018,17 @@ static bool css_set_check_fetched(struct cgroup *cgrp,
1935 2018
1936 read_lock(&css_set_lock); 2019 read_lock(&css_set_lock);
1937 newcg = find_existing_css_set(cg, cgrp, template); 2020 newcg = find_existing_css_set(cg, cgrp, template);
1938 if (newcg)
1939 get_css_set(newcg);
1940 read_unlock(&css_set_lock); 2021 read_unlock(&css_set_lock);
1941 2022
1942 /* doesn't exist at all? */ 2023 /* doesn't exist at all? */
1943 if (!newcg) 2024 if (!newcg)
1944 return false; 2025 return false;
1945 /* see if it's already in the list */ 2026 /* see if it's already in the list */
1946 list_for_each_entry(cg_entry, newcg_list, links) { 2027 list_for_each_entry(cg_entry, newcg_list, links)
1947 if (cg_entry->cg == newcg) { 2028 if (cg_entry->cg == newcg)
1948 put_css_set(newcg);
1949 return true; 2029 return true;
1950 }
1951 }
1952 2030
1953 /* not found */ 2031 /* not found */
1954 put_css_set(newcg);
1955 return false; 2032 return false;
1956} 2033}
1957 2034
@@ -1985,21 +2062,21 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
1985 * @cgrp: the cgroup to attach to 2062 * @cgrp: the cgroup to attach to
1986 * @leader: the threadgroup leader task_struct of the group to be attached 2063 * @leader: the threadgroup leader task_struct of the group to be attached
1987 * 2064 *
1988 * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will 2065 * Call holding cgroup_mutex and the group_rwsem of the leader. Will take
1989 * take task_lock of each thread in leader's threadgroup individually in turn. 2066 * task_lock of each thread in leader's threadgroup individually in turn.
1990 */ 2067 */
1991int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) 2068static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
1992{ 2069{
1993 int retval, i, group_size; 2070 int retval, i, group_size;
1994 struct cgroup_subsys *ss, *failed_ss = NULL; 2071 struct cgroup_subsys *ss, *failed_ss = NULL;
1995 bool cancel_failed_ss = false;
1996 /* guaranteed to be initialized later, but the compiler needs this */ 2072 /* guaranteed to be initialized later, but the compiler needs this */
1997 struct cgroup *oldcgrp = NULL;
1998 struct css_set *oldcg; 2073 struct css_set *oldcg;
1999 struct cgroupfs_root *root = cgrp->root; 2074 struct cgroupfs_root *root = cgrp->root;
2000 /* threadgroup list cursor and array */ 2075 /* threadgroup list cursor and array */
2001 struct task_struct *tsk; 2076 struct task_struct *tsk;
2077 struct task_and_cgroup *tc;
2002 struct flex_array *group; 2078 struct flex_array *group;
2079 struct cgroup_taskset tset = { };
2003 /* 2080 /*
2004 * we need to make sure we have css_sets for all the tasks we're 2081 * we need to make sure we have css_sets for all the tasks we're
2005 * going to move -before- we actually start moving them, so that in 2082 * going to move -before- we actually start moving them, so that in
@@ -2012,13 +2089,12 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2012 * step 0: in order to do expensive, possibly blocking operations for 2089 * step 0: in order to do expensive, possibly blocking operations for
2013 * every thread, we cannot iterate the thread group list, since it needs 2090 * every thread, we cannot iterate the thread group list, since it needs
2014 * rcu or tasklist locked. instead, build an array of all threads in the 2091 * rcu or tasklist locked. instead, build an array of all threads in the
2015 * group - threadgroup_fork_lock prevents new threads from appearing, 2092 * group - group_rwsem prevents new threads from appearing, and if
2016 * and if threads exit, this will just be an over-estimate. 2093 * threads exit, this will just be an over-estimate.
2017 */ 2094 */
2018 group_size = get_nr_threads(leader); 2095 group_size = get_nr_threads(leader);
2019 /* flex_array supports very large thread-groups better than kmalloc. */ 2096 /* flex_array supports very large thread-groups better than kmalloc. */
2020 group = flex_array_alloc(sizeof(struct task_struct *), group_size, 2097 group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
2021 GFP_KERNEL);
2022 if (!group) 2098 if (!group)
2023 return -ENOMEM; 2099 return -ENOMEM;
2024 /* pre-allocate to guarantee space while iterating in rcu read-side. */ 2100 /* pre-allocate to guarantee space while iterating in rcu read-side. */
@@ -2040,49 +2116,53 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2040 retval = -EAGAIN; 2116 retval = -EAGAIN;
2041 goto out_free_group_list; 2117 goto out_free_group_list;
2042 } 2118 }
2043 /* take a reference on each task in the group to go in the array. */ 2119
2044 tsk = leader; 2120 tsk = leader;
2045 i = 0; 2121 i = 0;
2046 do { 2122 do {
2123 struct task_and_cgroup ent;
2124
2125 /* @tsk either already exited or can't exit until the end */
2126 if (tsk->flags & PF_EXITING)
2127 continue;
2128
2047 /* as per above, nr_threads may decrease, but not increase. */ 2129 /* as per above, nr_threads may decrease, but not increase. */
2048 BUG_ON(i >= group_size); 2130 BUG_ON(i >= group_size);
2049 get_task_struct(tsk);
2050 /* 2131 /*
2051 * saying GFP_ATOMIC has no effect here because we did prealloc 2132 * saying GFP_ATOMIC has no effect here because we did prealloc
2052 * earlier, but it's good form to communicate our expectations. 2133 * earlier, but it's good form to communicate our expectations.
2053 */ 2134 */
2054 retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC); 2135 ent.task = tsk;
2136 ent.cgrp = task_cgroup_from_root(tsk, root);
2137 /* nothing to do if this task is already in the cgroup */
2138 if (ent.cgrp == cgrp)
2139 continue;
2140 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
2055 BUG_ON(retval != 0); 2141 BUG_ON(retval != 0);
2056 i++; 2142 i++;
2057 } while_each_thread(leader, tsk); 2143 } while_each_thread(leader, tsk);
2058 /* remember the number of threads in the array for later. */ 2144 /* remember the number of threads in the array for later. */
2059 group_size = i; 2145 group_size = i;
2146 tset.tc_array = group;
2147 tset.tc_array_len = group_size;
2060 read_unlock(&tasklist_lock); 2148 read_unlock(&tasklist_lock);
2061 2149
2150 /* methods shouldn't be called if no task is actually migrating */
2151 retval = 0;
2152 if (!group_size)
2153 goto out_free_group_list;
2154
2062 /* 2155 /*
2063 * step 1: check that we can legitimately attach to the cgroup. 2156 * step 1: check that we can legitimately attach to the cgroup.
2064 */ 2157 */
2065 for_each_subsys(root, ss) { 2158 for_each_subsys(root, ss) {
2066 if (ss->can_attach) { 2159 if (ss->can_attach) {
2067 retval = ss->can_attach(ss, cgrp, leader); 2160 retval = ss->can_attach(ss, cgrp, &tset);
2068 if (retval) { 2161 if (retval) {
2069 failed_ss = ss; 2162 failed_ss = ss;
2070 goto out_cancel_attach; 2163 goto out_cancel_attach;
2071 } 2164 }
2072 } 2165 }
2073 /* a callback to be run on every thread in the threadgroup. */
2074 if (ss->can_attach_task) {
2075 /* run on each task in the threadgroup. */
2076 for (i = 0; i < group_size; i++) {
2077 tsk = flex_array_get_ptr(group, i);
2078 retval = ss->can_attach_task(cgrp, tsk);
2079 if (retval) {
2080 failed_ss = ss;
2081 cancel_failed_ss = true;
2082 goto out_cancel_attach;
2083 }
2084 }
2085 }
2086 } 2166 }
2087 2167
2088 /* 2168 /*
@@ -2091,67 +2171,36 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2091 */ 2171 */
2092 INIT_LIST_HEAD(&newcg_list); 2172 INIT_LIST_HEAD(&newcg_list);
2093 for (i = 0; i < group_size; i++) { 2173 for (i = 0; i < group_size; i++) {
2094 tsk = flex_array_get_ptr(group, i); 2174 tc = flex_array_get(group, i);
2095 /* nothing to do if this task is already in the cgroup */ 2175 oldcg = tc->task->cgroups;
2096 oldcgrp = task_cgroup_from_root(tsk, root); 2176
2097 if (cgrp == oldcgrp) 2177 /* if we don't already have it in the list get a new one */
2098 continue; 2178 if (!css_set_check_fetched(cgrp, tc->task, oldcg,
2099 /* get old css_set pointer */ 2179 &newcg_list)) {
2100 task_lock(tsk);
2101 oldcg = tsk->cgroups;
2102 get_css_set(oldcg);
2103 task_unlock(tsk);
2104 /* see if the new one for us is already in the list? */
2105 if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
2106 /* was already there, nothing to do. */
2107 put_css_set(oldcg);
2108 } else {
2109 /* we don't already have it. get new one. */
2110 retval = css_set_prefetch(cgrp, oldcg, &newcg_list); 2180 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2111 put_css_set(oldcg);
2112 if (retval) 2181 if (retval)
2113 goto out_list_teardown; 2182 goto out_list_teardown;
2114 } 2183 }
2115 } 2184 }
2116 2185
2117 /* 2186 /*
2118 * step 3: now that we're guaranteed success wrt the css_sets, proceed 2187 * step 3: now that we're guaranteed success wrt the css_sets,
2119 * to move all tasks to the new cgroup, calling ss->attach_task for each 2188 * proceed to move all tasks to the new cgroup. There are no
2120 * one along the way. there are no failure cases after here, so this is 2189 * failure cases after here, so this is the commit point.
2121 * the commit point.
2122 */ 2190 */
2123 for_each_subsys(root, ss) {
2124 if (ss->pre_attach)
2125 ss->pre_attach(cgrp);
2126 }
2127 for (i = 0; i < group_size; i++) { 2191 for (i = 0; i < group_size; i++) {
2128 tsk = flex_array_get_ptr(group, i); 2192 tc = flex_array_get(group, i);
2129 /* leave current thread as it is if it's already there */ 2193 retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true);
2130 oldcgrp = task_cgroup_from_root(tsk, root); 2194 BUG_ON(retval);
2131 if (cgrp == oldcgrp)
2132 continue;
2133 /* if the thread is PF_EXITING, it can just get skipped. */
2134 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2135 if (retval == 0) {
2136 /* attach each task to each subsystem */
2137 for_each_subsys(root, ss) {
2138 if (ss->attach_task)
2139 ss->attach_task(cgrp, tsk);
2140 }
2141 } else {
2142 BUG_ON(retval != -ESRCH);
2143 }
2144 } 2195 }
2145 /* nothing is sensitive to fork() after this point. */ 2196 /* nothing is sensitive to fork() after this point. */
2146 2197
2147 /* 2198 /*
2148 * step 4: do expensive, non-thread-specific subsystem callbacks. 2199 * step 4: do subsystem attach callbacks.
2149 * TODO: if ever a subsystem needs to know the oldcgrp for each task
2150 * being moved, this call will need to be reworked to communicate that.
2151 */ 2200 */
2152 for_each_subsys(root, ss) { 2201 for_each_subsys(root, ss) {
2153 if (ss->attach) 2202 if (ss->attach)
2154 ss->attach(ss, cgrp, oldcgrp, leader); 2203 ss->attach(ss, cgrp, &tset);
2155 } 2204 }
2156 2205
2157 /* 2206 /*
@@ -2171,20 +2220,12 @@ out_cancel_attach:
2171 /* same deal as in cgroup_attach_task */ 2220 /* same deal as in cgroup_attach_task */
2172 if (retval) { 2221 if (retval) {
2173 for_each_subsys(root, ss) { 2222 for_each_subsys(root, ss) {
2174 if (ss == failed_ss) { 2223 if (ss == failed_ss)
2175 if (cancel_failed_ss && ss->cancel_attach)
2176 ss->cancel_attach(ss, cgrp, leader);
2177 break; 2224 break;
2178 }
2179 if (ss->cancel_attach) 2225 if (ss->cancel_attach)
2180 ss->cancel_attach(ss, cgrp, leader); 2226 ss->cancel_attach(ss, cgrp, &tset);
2181 } 2227 }
2182 } 2228 }
2183 /* clean up the array of referenced threads in the group. */
2184 for (i = 0; i < group_size; i++) {
2185 tsk = flex_array_get_ptr(group, i);
2186 put_task_struct(tsk);
2187 }
2188out_free_group_list: 2229out_free_group_list:
2189 flex_array_free(group); 2230 flex_array_free(group);
2190 return retval; 2231 return retval;
@@ -2192,8 +2233,8 @@ out_free_group_list:
2192 2233
2193/* 2234/*
2194 * Find the task_struct of the task to attach by vpid and pass it along to the 2235 * Find the task_struct of the task to attach by vpid and pass it along to the
2195 * function to attach either it or all tasks in its threadgroup. Will take 2236 * function to attach either it or all tasks in its threadgroup. Will lock
2196 * cgroup_mutex; may take task_lock of task. 2237 * cgroup_mutex and threadgroup; may take task_lock of task.
2197 */ 2238 */
2198static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) 2239static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2199{ 2240{
@@ -2220,13 +2261,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2220 * detect it later. 2261 * detect it later.
2221 */ 2262 */
2222 tsk = tsk->group_leader; 2263 tsk = tsk->group_leader;
2223 } else if (tsk->flags & PF_EXITING) {
2224 /* optimization for the single-task-only case */
2225 rcu_read_unlock();
2226 cgroup_unlock();
2227 return -ESRCH;
2228 } 2264 }
2229
2230 /* 2265 /*
2231 * even if we're attaching all tasks in the thread group, we 2266 * even if we're attaching all tasks in the thread group, we
2232 * only need to check permissions on one of them. 2267 * only need to check permissions on one of them.
@@ -2249,13 +2284,15 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2249 get_task_struct(tsk); 2284 get_task_struct(tsk);
2250 } 2285 }
2251 2286
2252 if (threadgroup) { 2287 threadgroup_lock(tsk);
2253 threadgroup_fork_write_lock(tsk); 2288
2289 if (threadgroup)
2254 ret = cgroup_attach_proc(cgrp, tsk); 2290 ret = cgroup_attach_proc(cgrp, tsk);
2255 threadgroup_fork_write_unlock(tsk); 2291 else
2256 } else {
2257 ret = cgroup_attach_task(cgrp, tsk); 2292 ret = cgroup_attach_task(cgrp, tsk);
2258 } 2293
2294 threadgroup_unlock(tsk);
2295
2259 put_task_struct(tsk); 2296 put_task_struct(tsk);
2260 cgroup_unlock(); 2297 cgroup_unlock();
2261 return ret; 2298 return ret;
@@ -2306,7 +2343,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2306 return -EINVAL; 2343 return -EINVAL;
2307 if (!cgroup_lock_live_group(cgrp)) 2344 if (!cgroup_lock_live_group(cgrp))
2308 return -ENODEV; 2345 return -ENODEV;
2346 mutex_lock(&cgroup_root_mutex);
2309 strcpy(cgrp->root->release_agent_path, buffer); 2347 strcpy(cgrp->root->release_agent_path, buffer);
2348 mutex_unlock(&cgroup_root_mutex);
2310 cgroup_unlock(); 2349 cgroup_unlock();
2311 return 0; 2350 return 0;
2312} 2351}
@@ -2789,6 +2828,7 @@ static void cgroup_enable_task_cg_lists(void)
2789} 2828}
2790 2829
2791void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) 2830void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2831 __acquires(css_set_lock)
2792{ 2832{
2793 /* 2833 /*
2794 * The first time anyone tries to iterate across a cgroup, 2834 * The first time anyone tries to iterate across a cgroup,
@@ -2828,6 +2868,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2828} 2868}
2829 2869
2830void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) 2870void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2871 __releases(css_set_lock)
2831{ 2872{
2832 read_unlock(&css_set_lock); 2873 read_unlock(&css_set_lock);
2833} 2874}
@@ -4491,20 +4532,31 @@ static const struct file_operations proc_cgroupstats_operations = {
4491 * 4532 *
4492 * A pointer to the shared css_set was automatically copied in 4533 * A pointer to the shared css_set was automatically copied in
4493 * fork.c by dup_task_struct(). However, we ignore that copy, since 4534 * fork.c by dup_task_struct(). However, we ignore that copy, since
4494 * it was not made under the protection of RCU or cgroup_mutex, so 4535 * it was not made under the protection of RCU, cgroup_mutex or
4495 * might no longer be a valid cgroup pointer. cgroup_attach_task() might 4536 * threadgroup_change_begin(), so it might no longer be a valid
4496 * have already changed current->cgroups, allowing the previously 4537 * cgroup pointer. cgroup_attach_task() might have already changed
4497 * referenced cgroup group to be removed and freed. 4538 * current->cgroups, allowing the previously referenced cgroup
4539 * group to be removed and freed.
4540 *
4541 * Outside the pointer validity we also need to process the css_set
4542 * inheritance between threadgoup_change_begin() and
4543 * threadgoup_change_end(), this way there is no leak in any process
4544 * wide migration performed by cgroup_attach_proc() that could otherwise
4545 * miss a thread because it is too early or too late in the fork stage.
4498 * 4546 *
4499 * At the point that cgroup_fork() is called, 'current' is the parent 4547 * At the point that cgroup_fork() is called, 'current' is the parent
4500 * task, and the passed argument 'child' points to the child task. 4548 * task, and the passed argument 'child' points to the child task.
4501 */ 4549 */
4502void cgroup_fork(struct task_struct *child) 4550void cgroup_fork(struct task_struct *child)
4503{ 4551{
4504 task_lock(current); 4552 /*
4553 * We don't need to task_lock() current because current->cgroups
4554 * can't be changed concurrently here. The parent obviously hasn't
4555 * exited and called cgroup_exit(), and we are synchronized against
4556 * cgroup migration through threadgroup_change_begin().
4557 */
4505 child->cgroups = current->cgroups; 4558 child->cgroups = current->cgroups;
4506 get_css_set(child->cgroups); 4559 get_css_set(child->cgroups);
4507 task_unlock(current);
4508 INIT_LIST_HEAD(&child->cg_list); 4560 INIT_LIST_HEAD(&child->cg_list);
4509} 4561}
4510 4562
@@ -4546,10 +4598,19 @@ void cgroup_post_fork(struct task_struct *child)
4546{ 4598{
4547 if (use_task_css_set_links) { 4599 if (use_task_css_set_links) {
4548 write_lock(&css_set_lock); 4600 write_lock(&css_set_lock);
4549 task_lock(child); 4601 if (list_empty(&child->cg_list)) {
4550 if (list_empty(&child->cg_list)) 4602 /*
4603 * It's safe to use child->cgroups without task_lock()
4604 * here because we are protected through
4605 * threadgroup_change_begin() against concurrent
4606 * css_set change in cgroup_task_migrate(). Also
4607 * the task can't exit at that point until
4608 * wake_up_new_task() is called, so we are protected
4609 * against cgroup_exit() setting child->cgroup to
4610 * init_css_set.
4611 */
4551 list_add(&child->cg_list, &child->cgroups->tasks); 4612 list_add(&child->cg_list, &child->cgroups->tasks);
4552 task_unlock(child); 4613 }
4553 write_unlock(&css_set_lock); 4614 write_unlock(&css_set_lock);
4554 } 4615 }
4555} 4616}
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index fcb93fca782d..fc0646b78a64 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -166,13 +166,17 @@ static bool is_task_frozen_enough(struct task_struct *task)
166 */ 166 */
167static int freezer_can_attach(struct cgroup_subsys *ss, 167static int freezer_can_attach(struct cgroup_subsys *ss,
168 struct cgroup *new_cgroup, 168 struct cgroup *new_cgroup,
169 struct task_struct *task) 169 struct cgroup_taskset *tset)
170{ 170{
171 struct freezer *freezer; 171 struct freezer *freezer;
172 struct task_struct *task;
172 173
173 /* 174 /*
174 * Anything frozen can't move or be moved to/from. 175 * Anything frozen can't move or be moved to/from.
175 */ 176 */
177 cgroup_taskset_for_each(task, new_cgroup, tset)
178 if (cgroup_freezing(task))
179 return -EBUSY;
176 180
177 freezer = cgroup_freezer(new_cgroup); 181 freezer = cgroup_freezer(new_cgroup);
178 if (freezer->state != CGROUP_THAWED) 182 if (freezer->state != CGROUP_THAWED)
@@ -181,11 +185,6 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
181 return 0; 185 return 0;
182} 186}
183 187
184static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
185{
186 return cgroup_freezing(tsk) ? -EBUSY : 0;
187}
188
189static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) 188static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
190{ 189{
191 struct freezer *freezer; 190 struct freezer *freezer;
@@ -381,10 +380,5 @@ struct cgroup_subsys freezer_subsys = {
381 .populate = freezer_populate, 380 .populate = freezer_populate,
382 .subsys_id = freezer_subsys_id, 381 .subsys_id = freezer_subsys_id,
383 .can_attach = freezer_can_attach, 382 .can_attach = freezer_can_attach,
384 .can_attach_task = freezer_can_attach_task,
385 .pre_attach = NULL,
386 .attach_task = NULL,
387 .attach = NULL,
388 .fork = freezer_fork, 383 .fork = freezer_fork,
389 .exit = NULL,
390}; 384};
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 0b1712dba587..a09ac2b9a661 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1389,79 +1389,73 @@ static int fmeter_getrate(struct fmeter *fmp)
1389 return val; 1389 return val;
1390} 1390}
1391 1391
1392/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1393static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1394 struct task_struct *tsk)
1395{
1396 struct cpuset *cs = cgroup_cs(cont);
1397
1398 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1399 return -ENOSPC;
1400
1401 /*
1402 * Kthreads bound to specific cpus cannot be moved to a new cpuset; we
1403 * cannot change their cpu affinity and isolating such threads by their
1404 * set of allowed nodes is unnecessary. Thus, cpusets are not
1405 * applicable for such threads. This prevents checking for success of
1406 * set_cpus_allowed_ptr() on all attached tasks before cpus_allowed may
1407 * be changed.
1408 */
1409 if (tsk->flags & PF_THREAD_BOUND)
1410 return -EINVAL;
1411
1412 return 0;
1413}
1414
1415static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
1416{
1417 return security_task_setscheduler(task);
1418}
1419
1420/* 1392/*
1421 * Protected by cgroup_lock. The nodemasks must be stored globally because 1393 * Protected by cgroup_lock. The nodemasks must be stored globally because
1422 * dynamically allocating them is not allowed in pre_attach, and they must 1394 * dynamically allocating them is not allowed in can_attach, and they must
1423 * persist among pre_attach, attach_task, and attach. 1395 * persist until attach.
1424 */ 1396 */
1425static cpumask_var_t cpus_attach; 1397static cpumask_var_t cpus_attach;
1426static nodemask_t cpuset_attach_nodemask_from; 1398static nodemask_t cpuset_attach_nodemask_from;
1427static nodemask_t cpuset_attach_nodemask_to; 1399static nodemask_t cpuset_attach_nodemask_to;
1428 1400
1429/* Set-up work for before attaching each task. */ 1401/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1430static void cpuset_pre_attach(struct cgroup *cont) 1402static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1403 struct cgroup_taskset *tset)
1431{ 1404{
1432 struct cpuset *cs = cgroup_cs(cont); 1405 struct cpuset *cs = cgroup_cs(cgrp);
1406 struct task_struct *task;
1407 int ret;
1408
1409 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1410 return -ENOSPC;
1411
1412 cgroup_taskset_for_each(task, cgrp, tset) {
1413 /*
1414 * Kthreads bound to specific cpus cannot be moved to a new
1415 * cpuset; we cannot change their cpu affinity and
1416 * isolating such threads by their set of allowed nodes is
1417 * unnecessary. Thus, cpusets are not applicable for such
1418 * threads. This prevents checking for success of
1419 * set_cpus_allowed_ptr() on all attached tasks before
1420 * cpus_allowed may be changed.
1421 */
1422 if (task->flags & PF_THREAD_BOUND)
1423 return -EINVAL;
1424 if ((ret = security_task_setscheduler(task)))
1425 return ret;
1426 }
1433 1427
1428 /* prepare for attach */
1434 if (cs == &top_cpuset) 1429 if (cs == &top_cpuset)
1435 cpumask_copy(cpus_attach, cpu_possible_mask); 1430 cpumask_copy(cpus_attach, cpu_possible_mask);
1436 else 1431 else
1437 guarantee_online_cpus(cs, cpus_attach); 1432 guarantee_online_cpus(cs, cpus_attach);
1438 1433
1439 guarantee_online_mems(cs, &cpuset_attach_nodemask_to); 1434 guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
1440}
1441
1442/* Per-thread attachment work. */
1443static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
1444{
1445 int err;
1446 struct cpuset *cs = cgroup_cs(cont);
1447 1435
1448 /* 1436 return 0;
1449 * can_attach beforehand should guarantee that this doesn't fail.
1450 * TODO: have a better way to handle failure here
1451 */
1452 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1453 WARN_ON_ONCE(err);
1454
1455 cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
1456 cpuset_update_task_spread_flag(cs, tsk);
1457} 1437}
1458 1438
1459static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, 1439static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1460 struct cgroup *oldcont, struct task_struct *tsk) 1440 struct cgroup_taskset *tset)
1461{ 1441{
1462 struct mm_struct *mm; 1442 struct mm_struct *mm;
1463 struct cpuset *cs = cgroup_cs(cont); 1443 struct task_struct *task;
1464 struct cpuset *oldcs = cgroup_cs(oldcont); 1444 struct task_struct *leader = cgroup_taskset_first(tset);
1445 struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset);
1446 struct cpuset *cs = cgroup_cs(cgrp);
1447 struct cpuset *oldcs = cgroup_cs(oldcgrp);
1448
1449 cgroup_taskset_for_each(task, cgrp, tset) {
1450 /*
1451 * can_attach beforehand should guarantee that this doesn't
1452 * fail. TODO: have a better way to handle failure here
1453 */
1454 WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
1455
1456 cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
1457 cpuset_update_task_spread_flag(cs, task);
1458 }
1465 1459
1466 /* 1460 /*
1467 * Change mm, possibly for multiple threads in a threadgroup. This is 1461 * Change mm, possibly for multiple threads in a threadgroup. This is
@@ -1469,7 +1463,7 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1469 */ 1463 */
1470 cpuset_attach_nodemask_from = oldcs->mems_allowed; 1464 cpuset_attach_nodemask_from = oldcs->mems_allowed;
1471 cpuset_attach_nodemask_to = cs->mems_allowed; 1465 cpuset_attach_nodemask_to = cs->mems_allowed;
1472 mm = get_task_mm(tsk); 1466 mm = get_task_mm(leader);
1473 if (mm) { 1467 if (mm) {
1474 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); 1468 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
1475 if (is_memory_migrate(cs)) 1469 if (is_memory_migrate(cs))
@@ -1925,9 +1919,6 @@ struct cgroup_subsys cpuset_subsys = {
1925 .create = cpuset_create, 1919 .create = cpuset_create,
1926 .destroy = cpuset_destroy, 1920 .destroy = cpuset_destroy,
1927 .can_attach = cpuset_can_attach, 1921 .can_attach = cpuset_can_attach,
1928 .can_attach_task = cpuset_can_attach_task,
1929 .pre_attach = cpuset_pre_attach,
1930 .attach_task = cpuset_attach_task,
1931 .attach = cpuset_attach, 1922 .attach = cpuset_attach,
1932 .populate = cpuset_populate, 1923 .populate = cpuset_populate,
1933 .post_clone = cpuset_post_clone, 1924 .post_clone = cpuset_post_clone,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3afc68c08433..a8f4ac001a00 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6941,10 +6941,13 @@ static int __perf_cgroup_move(void *info)
6941 return 0; 6941 return 0;
6942} 6942}
6943 6943
6944static void 6944static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
6945perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task) 6945 struct cgroup_taskset *tset)
6946{ 6946{
6947 task_function_call(task, __perf_cgroup_move, task); 6947 struct task_struct *task;
6948
6949 cgroup_taskset_for_each(task, cgrp, tset)
6950 task_function_call(task, __perf_cgroup_move, task);
6948} 6951}
6949 6952
6950static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, 6953static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
@@ -6958,7 +6961,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
6958 if (!(task->flags & PF_EXITING)) 6961 if (!(task->flags & PF_EXITING))
6959 return; 6962 return;
6960 6963
6961 perf_cgroup_attach_task(cgrp, task); 6964 task_function_call(task, __perf_cgroup_move, task);
6962} 6965}
6963 6966
6964struct cgroup_subsys perf_subsys = { 6967struct cgroup_subsys perf_subsys = {
@@ -6967,6 +6970,6 @@ struct cgroup_subsys perf_subsys = {
6967 .create = perf_cgroup_create, 6970 .create = perf_cgroup_create,
6968 .destroy = perf_cgroup_destroy, 6971 .destroy = perf_cgroup_destroy,
6969 .exit = perf_cgroup_exit, 6972 .exit = perf_cgroup_exit,
6970 .attach_task = perf_cgroup_attach_task, 6973 .attach = perf_cgroup_attach,
6971}; 6974};
6972#endif /* CONFIG_CGROUP_PERF */ 6975#endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/fork.c b/kernel/fork.c
index f34f894c4b98..b00711ce7c13 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -972,7 +972,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
972 sched_autogroup_fork(sig); 972 sched_autogroup_fork(sig);
973 973
974#ifdef CONFIG_CGROUPS 974#ifdef CONFIG_CGROUPS
975 init_rwsem(&sig->threadgroup_fork_lock); 975 init_rwsem(&sig->group_rwsem);
976#endif 976#endif
977 977
978 sig->oom_adj = current->signal->oom_adj; 978 sig->oom_adj = current->signal->oom_adj;
@@ -1153,7 +1153,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1153 p->io_context = NULL; 1153 p->io_context = NULL;
1154 p->audit_context = NULL; 1154 p->audit_context = NULL;
1155 if (clone_flags & CLONE_THREAD) 1155 if (clone_flags & CLONE_THREAD)
1156 threadgroup_fork_read_lock(current); 1156 threadgroup_change_begin(current);
1157 cgroup_fork(p); 1157 cgroup_fork(p);
1158#ifdef CONFIG_NUMA 1158#ifdef CONFIG_NUMA
1159 p->mempolicy = mpol_dup(p->mempolicy); 1159 p->mempolicy = mpol_dup(p->mempolicy);
@@ -1368,7 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1368 proc_fork_connector(p); 1368 proc_fork_connector(p);
1369 cgroup_post_fork(p); 1369 cgroup_post_fork(p);
1370 if (clone_flags & CLONE_THREAD) 1370 if (clone_flags & CLONE_THREAD)
1371 threadgroup_fork_read_unlock(current); 1371 threadgroup_change_end(current);
1372 perf_event_fork(p); 1372 perf_event_fork(p);
1373 return p; 1373 return p;
1374 1374
@@ -1403,7 +1403,7 @@ bad_fork_cleanup_policy:
1403bad_fork_cleanup_cgroup: 1403bad_fork_cleanup_cgroup:
1404#endif 1404#endif
1405 if (clone_flags & CLONE_THREAD) 1405 if (clone_flags & CLONE_THREAD)
1406 threadgroup_fork_read_unlock(current); 1406 threadgroup_change_end(current);
1407 cgroup_exit(p, cgroup_callbacks_done); 1407 cgroup_exit(p, cgroup_callbacks_done);
1408 delayacct_tsk_free(p); 1408 delayacct_tsk_free(p);
1409 module_put(task_thread_info(p)->exec_domain->module); 1409 module_put(task_thread_info(p)->exec_domain->module);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 34683efa2cce..6d269cce7aa1 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -159,8 +159,7 @@ int res_counter_memparse_write_strategy(const char *buf,
159 return 0; 159 return 0;
160 } 160 }
161 161
162 /* FIXME - make memparse() take const char* args */ 162 *res = memparse(buf, &end);
163 *res = memparse((char *)buf, &end);
164 if (*end != '\0') 163 if (*end != '\0')
165 return -EINVAL; 164 return -EINVAL;
166 165
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0ac0f811d623..cecbb64be05f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7563,24 +7563,31 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
7563 sched_destroy_group(tg); 7563 sched_destroy_group(tg);
7564} 7564}
7565 7565
7566static int 7566static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7567cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 7567 struct cgroup_taskset *tset)
7568{ 7568{
7569 struct task_struct *task;
7570
7571 cgroup_taskset_for_each(task, cgrp, tset) {
7569#ifdef CONFIG_RT_GROUP_SCHED 7572#ifdef CONFIG_RT_GROUP_SCHED
7570 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) 7573 if (!sched_rt_can_attach(cgroup_tg(cgrp), task))
7571 return -EINVAL; 7574 return -EINVAL;
7572#else 7575#else
7573 /* We don't support RT-tasks being in separate groups */ 7576 /* We don't support RT-tasks being in separate groups */
7574 if (tsk->sched_class != &fair_sched_class) 7577 if (task->sched_class != &fair_sched_class)
7575 return -EINVAL; 7578 return -EINVAL;
7576#endif 7579#endif
7580 }
7577 return 0; 7581 return 0;
7578} 7582}
7579 7583
7580static void 7584static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7581cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 7585 struct cgroup_taskset *tset)
7582{ 7586{
7583 sched_move_task(tsk); 7587 struct task_struct *task;
7588
7589 cgroup_taskset_for_each(task, cgrp, tset)
7590 sched_move_task(task);
7584} 7591}
7585 7592
7586static void 7593static void
@@ -7915,8 +7922,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
7915 .name = "cpu", 7922 .name = "cpu",
7916 .create = cpu_cgroup_create, 7923 .create = cpu_cgroup_create,
7917 .destroy = cpu_cgroup_destroy, 7924 .destroy = cpu_cgroup_destroy,
7918 .can_attach_task = cpu_cgroup_can_attach_task, 7925 .can_attach = cpu_cgroup_can_attach,
7919 .attach_task = cpu_cgroup_attach_task, 7926 .attach = cpu_cgroup_attach,
7920 .exit = cpu_cgroup_exit, 7927 .exit = cpu_cgroup_exit,
7921 .populate = cpu_cgroup_populate, 7928 .populate = cpu_cgroup_populate,
7922 .subsys_id = cpu_cgroup_subsys_id, 7929 .subsys_id = cpu_cgroup_subsys_id,
diff --git a/kernel/signal.c b/kernel/signal.c
index 56ce3a618b28..bb0efa5705ed 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2355,8 +2355,15 @@ void exit_signals(struct task_struct *tsk)
2355 int group_stop = 0; 2355 int group_stop = 0;
2356 sigset_t unblocked; 2356 sigset_t unblocked;
2357 2357
2358 /*
2359 * @tsk is about to have PF_EXITING set - lock out users which
2360 * expect stable threadgroup.
2361 */
2362 threadgroup_change_begin(tsk);
2363
2358 if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { 2364 if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
2359 tsk->flags |= PF_EXITING; 2365 tsk->flags |= PF_EXITING;
2366 threadgroup_change_end(tsk);
2360 return; 2367 return;
2361 } 2368 }
2362 2369
@@ -2366,6 +2373,9 @@ void exit_signals(struct task_struct *tsk)
2366 * see wants_signal(), do_signal_stop(). 2373 * see wants_signal(), do_signal_stop().
2367 */ 2374 */
2368 tsk->flags |= PF_EXITING; 2375 tsk->flags |= PF_EXITING;
2376
2377 threadgroup_change_end(tsk);
2378
2369 if (!signal_pending(tsk)) 2379 if (!signal_pending(tsk))
2370 goto out; 2380 goto out;
2371 2381