aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/cgroups.txt51
-rw-r--r--block/blk-cgroup.c45
-rw-r--r--include/linux/cgroup.h31
-rw-r--r--include/linux/init_task.h9
-rw-r--r--include/linux/sched.h73
-rw-r--r--kernel/cgroup.c401
-rw-r--r--kernel/cgroup_freezer.c16
-rw-r--r--kernel/cpuset.c105
-rw-r--r--kernel/events/core.c13
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/res_counter.c3
-rw-r--r--kernel/sched/core.c31
-rw-r--r--kernel/signal.c10
-rw-r--r--mm/memcontrol.c16
-rw-r--r--security/device_cgroup.c7
15 files changed, 470 insertions, 349 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 9c452ef2328c..a7c96ae5557c 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -594,53 +594,44 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
594called multiple times against a cgroup. 594called multiple times against a cgroup.
595 595
596int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 596int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
597 struct task_struct *task) 597 struct cgroup_taskset *tset)
598(cgroup_mutex held by caller) 598(cgroup_mutex held by caller)
599 599
600Called prior to moving a task into a cgroup; if the subsystem 600Called prior to moving one or more tasks into a cgroup; if the
601returns an error, this will abort the attach operation. If a NULL 601subsystem returns an error, this will abort the attach operation.
602task is passed, then a successful result indicates that *any* 602@tset contains the tasks to be attached and is guaranteed to have at
603unspecified task can be moved into the cgroup. Note that this isn't 603least one task in it.
604called on a fork. If this method returns 0 (success) then this should 604
605remain valid while the caller holds cgroup_mutex and it is ensured that either 605If there are multiple tasks in the taskset, then:
606 - it's guaranteed that all are from the same thread group
607 - @tset contains all tasks from the thread group whether or not
608 they're switching cgroups
609 - the first task is the leader
610
611Each @tset entry also contains the task's old cgroup and tasks which
612aren't switching cgroup can be skipped easily using the
613cgroup_taskset_for_each() iterator. Note that this isn't called on a
614fork. If this method returns 0 (success) then this should remain valid
615while the caller holds cgroup_mutex and it is ensured that either
606attach() or cancel_attach() will be called in future. 616attach() or cancel_attach() will be called in future.
607 617
608int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk);
609(cgroup_mutex held by caller)
610
611As can_attach, but for operations that must be run once per task to be
612attached (possibly many when using cgroup_attach_proc). Called after
613can_attach.
614
615void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 618void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
616 struct task_struct *task, bool threadgroup) 619 struct cgroup_taskset *tset)
617(cgroup_mutex held by caller) 620(cgroup_mutex held by caller)
618 621
619Called when a task attach operation has failed after can_attach() has succeeded. 622Called when a task attach operation has failed after can_attach() has succeeded.
620A subsystem whose can_attach() has some side-effects should provide this 623A subsystem whose can_attach() has some side-effects should provide this
621function, so that the subsystem can implement a rollback. If not, not necessary. 624function, so that the subsystem can implement a rollback. If not, not necessary.
622This will be called only about subsystems whose can_attach() operation have 625This will be called only about subsystems whose can_attach() operation have
623succeeded. 626succeeded. The parameters are identical to can_attach().
624
625void pre_attach(struct cgroup *cgrp);
626(cgroup_mutex held by caller)
627
628For any non-per-thread attachment work that needs to happen before
629attach_task. Needed by cpuset.
630 627
631void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 628void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
632 struct cgroup *old_cgrp, struct task_struct *task) 629 struct cgroup_taskset *tset)
633(cgroup_mutex held by caller) 630(cgroup_mutex held by caller)
634 631
635Called after the task has been attached to the cgroup, to allow any 632Called after the task has been attached to the cgroup, to allow any
636post-attachment activity that requires memory allocations or blocking. 633post-attachment activity that requires memory allocations or blocking.
637 634The parameters are identical to can_attach().
638void attach_task(struct cgroup *cgrp, struct task_struct *tsk);
639(cgroup_mutex held by caller)
640
641As attach, but for operations that must be run once per task to be attached,
642like can_attach_task. Called before attach. Currently does not support any
643subsystem that might need the old_cgrp for every thread in the group.
644 635
645void fork(struct cgroup_subsy *ss, struct task_struct *task) 636void fork(struct cgroup_subsy *ss, struct task_struct *task)
646 637
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8f630cec906e..b8c143d68ee0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -30,8 +30,10 @@ EXPORT_SYMBOL_GPL(blkio_root_cgroup);
30 30
31static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, 31static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
32 struct cgroup *); 32 struct cgroup *);
33static int blkiocg_can_attach_task(struct cgroup *, struct task_struct *); 33static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
34static void blkiocg_attach_task(struct cgroup *, struct task_struct *); 34 struct cgroup_taskset *);
35static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
36 struct cgroup_taskset *);
35static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); 37static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
36static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); 38static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
37 39
@@ -44,8 +46,8 @@ static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
44struct cgroup_subsys blkio_subsys = { 46struct cgroup_subsys blkio_subsys = {
45 .name = "blkio", 47 .name = "blkio",
46 .create = blkiocg_create, 48 .create = blkiocg_create,
47 .can_attach_task = blkiocg_can_attach_task, 49 .can_attach = blkiocg_can_attach,
48 .attach_task = blkiocg_attach_task, 50 .attach = blkiocg_attach,
49 .destroy = blkiocg_destroy, 51 .destroy = blkiocg_destroy,
50 .populate = blkiocg_populate, 52 .populate = blkiocg_populate,
51#ifdef CONFIG_BLK_CGROUP 53#ifdef CONFIG_BLK_CGROUP
@@ -1626,30 +1628,39 @@ done:
1626 * of the main cic data structures. For now we allow a task to change 1628 * of the main cic data structures. For now we allow a task to change
1627 * its cgroup only if it's the only owner of its ioc. 1629 * its cgroup only if it's the only owner of its ioc.
1628 */ 1630 */
1629static int blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1631static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1632 struct cgroup_taskset *tset)
1630{ 1633{
1634 struct task_struct *task;
1631 struct io_context *ioc; 1635 struct io_context *ioc;
1632 int ret = 0; 1636 int ret = 0;
1633 1637
1634 /* task_lock() is needed to avoid races with exit_io_context() */ 1638 /* task_lock() is needed to avoid races with exit_io_context() */
1635 task_lock(tsk); 1639 cgroup_taskset_for_each(task, cgrp, tset) {
1636 ioc = tsk->io_context; 1640 task_lock(task);
1637 if (ioc && atomic_read(&ioc->nr_tasks) > 1) 1641 ioc = task->io_context;
1638 ret = -EINVAL; 1642 if (ioc && atomic_read(&ioc->nr_tasks) > 1)
1639 task_unlock(tsk); 1643 ret = -EINVAL;
1640 1644 task_unlock(task);
1645 if (ret)
1646 break;
1647 }
1641 return ret; 1648 return ret;
1642} 1649}
1643 1650
1644static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1651static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1652 struct cgroup_taskset *tset)
1645{ 1653{
1654 struct task_struct *task;
1646 struct io_context *ioc; 1655 struct io_context *ioc;
1647 1656
1648 task_lock(tsk); 1657 cgroup_taskset_for_each(task, cgrp, tset) {
1649 ioc = tsk->io_context; 1658 task_lock(task);
1650 if (ioc) 1659 ioc = task->io_context;
1651 ioc->cgroup_changed = 1; 1660 if (ioc)
1652 task_unlock(tsk); 1661 ioc->cgroup_changed = 1;
1662 task_unlock(task);
1663 }
1653} 1664}
1654 1665
1655void blkio_policy_register(struct blkio_policy_type *blkiop) 1666void blkio_policy_register(struct blkio_policy_type *blkiop)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a17becc36ca1..e9b602151caf 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -457,6 +457,28 @@ void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
457void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); 457void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
458 458
459/* 459/*
460 * Control Group taskset, used to pass around set of tasks to cgroup_subsys
461 * methods.
462 */
463struct cgroup_taskset;
464struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
465struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
466struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset);
467int cgroup_taskset_size(struct cgroup_taskset *tset);
468
469/**
470 * cgroup_taskset_for_each - iterate cgroup_taskset
471 * @task: the loop cursor
472 * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all
473 * @tset: taskset to iterate
474 */
475#define cgroup_taskset_for_each(task, skip_cgrp, tset) \
476 for ((task) = cgroup_taskset_first((tset)); (task); \
477 (task) = cgroup_taskset_next((tset))) \
478 if (!(skip_cgrp) || \
479 cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp))
480
481/*
460 * Control Group subsystem type. 482 * Control Group subsystem type.
461 * See Documentation/cgroups/cgroups.txt for details 483 * See Documentation/cgroups/cgroups.txt for details
462 */ 484 */
@@ -467,14 +489,11 @@ struct cgroup_subsys {
467 int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); 489 int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
468 void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); 490 void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
469 int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, 491 int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
470 struct task_struct *tsk); 492 struct cgroup_taskset *tset);
471 int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
472 void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, 493 void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
473 struct task_struct *tsk); 494 struct cgroup_taskset *tset);
474 void (*pre_attach)(struct cgroup *cgrp);
475 void (*attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
476 void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, 495 void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
477 struct cgroup *old_cgrp, struct task_struct *tsk); 496 struct cgroup_taskset *tset);
478 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); 497 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
479 void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp, 498 void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
480 struct cgroup *old_cgrp, struct task_struct *task); 499 struct cgroup *old_cgrp, struct task_struct *task);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 32574eef9394..9c66b1ada9d7 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -23,11 +23,10 @@ extern struct files_struct init_files;
23extern struct fs_struct init_fs; 23extern struct fs_struct init_fs;
24 24
25#ifdef CONFIG_CGROUPS 25#ifdef CONFIG_CGROUPS
26#define INIT_THREADGROUP_FORK_LOCK(sig) \ 26#define INIT_GROUP_RWSEM(sig) \
27 .threadgroup_fork_lock = \ 27 .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem),
28 __RWSEM_INITIALIZER(sig.threadgroup_fork_lock),
29#else 28#else
30#define INIT_THREADGROUP_FORK_LOCK(sig) 29#define INIT_GROUP_RWSEM(sig)
31#endif 30#endif
32 31
33#define INIT_SIGNALS(sig) { \ 32#define INIT_SIGNALS(sig) { \
@@ -46,7 +45,7 @@ extern struct fs_struct init_fs;
46 }, \ 45 }, \
47 .cred_guard_mutex = \ 46 .cred_guard_mutex = \
48 __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ 47 __MUTEX_INITIALIZER(sig.cred_guard_mutex), \
49 INIT_THREADGROUP_FORK_LOCK(sig) \ 48 INIT_GROUP_RWSEM(sig) \
50} 49}
51 50
52extern struct nsproxy init_nsproxy; 51extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ad93e1ec8c65..f044f66018f2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -637,13 +637,15 @@ struct signal_struct {
637#endif 637#endif
638#ifdef CONFIG_CGROUPS 638#ifdef CONFIG_CGROUPS
639 /* 639 /*
640 * The threadgroup_fork_lock prevents threads from forking with 640 * group_rwsem prevents new tasks from entering the threadgroup and
641 * CLONE_THREAD while held for writing. Use this for fork-sensitive 641 * member tasks from exiting,a more specifically, setting of
642 * threadgroup-wide operations. It's taken for reading in fork.c in 642 * PF_EXITING. fork and exit paths are protected with this rwsem
643 * copy_process(). 643 * using threadgroup_change_begin/end(). Users which require
644 * Currently only needed write-side by cgroups. 644 * threadgroup to remain stable should use threadgroup_[un]lock()
645 * which also takes care of exec path. Currently, cgroup is the
646 * only user.
645 */ 647 */
646 struct rw_semaphore threadgroup_fork_lock; 648 struct rw_semaphore group_rwsem;
647#endif 649#endif
648 650
649 int oom_adj; /* OOM kill score adjustment (bit shift) */ 651 int oom_adj; /* OOM kill score adjustment (bit shift) */
@@ -2394,29 +2396,62 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
2394 spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); 2396 spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
2395} 2397}
2396 2398
2397/* See the declaration of threadgroup_fork_lock in signal_struct. */
2398#ifdef CONFIG_CGROUPS 2399#ifdef CONFIG_CGROUPS
2399static inline void threadgroup_fork_read_lock(struct task_struct *tsk) 2400static inline void threadgroup_change_begin(struct task_struct *tsk)
2400{ 2401{
2401 down_read(&tsk->signal->threadgroup_fork_lock); 2402 down_read(&tsk->signal->group_rwsem);
2402} 2403}
2403static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) 2404static inline void threadgroup_change_end(struct task_struct *tsk)
2404{ 2405{
2405 up_read(&tsk->signal->threadgroup_fork_lock); 2406 up_read(&tsk->signal->group_rwsem);
2406} 2407}
2407static inline void threadgroup_fork_write_lock(struct task_struct *tsk) 2408
2409/**
2410 * threadgroup_lock - lock threadgroup
2411 * @tsk: member task of the threadgroup to lock
2412 *
2413 * Lock the threadgroup @tsk belongs to. No new task is allowed to enter
2414 * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
2415 * perform exec. This is useful for cases where the threadgroup needs to
2416 * stay stable across blockable operations.
2417 *
2418 * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
2419 * synchronization. While held, no new task will be added to threadgroup
2420 * and no existing live task will have its PF_EXITING set.
2421 *
2422 * During exec, a task goes and puts its thread group through unusual
2423 * changes. After de-threading, exclusive access is assumed to resources
2424 * which are usually shared by tasks in the same group - e.g. sighand may
2425 * be replaced with a new one. Also, the exec'ing task takes over group
2426 * leader role including its pid. Exclude these changes while locked by
2427 * grabbing cred_guard_mutex which is used to synchronize exec path.
2428 */
2429static inline void threadgroup_lock(struct task_struct *tsk)
2408{ 2430{
2409 down_write(&tsk->signal->threadgroup_fork_lock); 2431 /*
2432 * exec uses exit for de-threading nesting group_rwsem inside
2433 * cred_guard_mutex. Grab cred_guard_mutex first.
2434 */
2435 mutex_lock(&tsk->signal->cred_guard_mutex);
2436 down_write(&tsk->signal->group_rwsem);
2410} 2437}
2411static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) 2438
2439/**
2440 * threadgroup_unlock - unlock threadgroup
2441 * @tsk: member task of the threadgroup to unlock
2442 *
2443 * Reverse threadgroup_lock().
2444 */
2445static inline void threadgroup_unlock(struct task_struct *tsk)
2412{ 2446{
2413 up_write(&tsk->signal->threadgroup_fork_lock); 2447 up_write(&tsk->signal->group_rwsem);
2448 mutex_unlock(&tsk->signal->cred_guard_mutex);
2414} 2449}
2415#else 2450#else
2416static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {} 2451static inline void threadgroup_change_begin(struct task_struct *tsk) {}
2417static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {} 2452static inline void threadgroup_change_end(struct task_struct *tsk) {}
2418static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {} 2453static inline void threadgroup_lock(struct task_struct *tsk) {}
2419static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {} 2454static inline void threadgroup_unlock(struct task_struct *tsk) {}
2420#endif 2455#endif
2421 2456
2422#ifndef __HAVE_THREAD_FUNCTIONS 2457#ifndef __HAVE_THREAD_FUNCTIONS
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7cab65f83f1d..a5d3b5325f77 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -63,7 +63,24 @@
63 63
64#include <linux/atomic.h> 64#include <linux/atomic.h>
65 65
66/*
67 * cgroup_mutex is the master lock. Any modification to cgroup or its
68 * hierarchy must be performed while holding it.
69 *
70 * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify
71 * cgroupfs_root of any cgroup hierarchy - subsys list, flags,
72 * release_agent_path and so on. Modifying requires both cgroup_mutex and
73 * cgroup_root_mutex. Readers can acquire either of the two. This is to
74 * break the following locking order cycle.
75 *
76 * A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
77 * B. namespace_sem -> cgroup_mutex
78 *
79 * B happens only through cgroup_show_options() and using cgroup_root_mutex
80 * breaks it.
81 */
66static DEFINE_MUTEX(cgroup_mutex); 82static DEFINE_MUTEX(cgroup_mutex);
83static DEFINE_MUTEX(cgroup_root_mutex);
67 84
68/* 85/*
69 * Generate an array of cgroup subsystem pointers. At boot time, this is 86 * Generate an array of cgroup subsystem pointers. At boot time, this is
@@ -921,7 +938,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
921 * 938 *
922 * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; 939 * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
923 */ 940 */
924DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); 941static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
925 942
926static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) 943static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
927{ 944{
@@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
953 int i; 970 int i;
954 971
955 BUG_ON(!mutex_is_locked(&cgroup_mutex)); 972 BUG_ON(!mutex_is_locked(&cgroup_mutex));
973 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
956 974
957 removed_bits = root->actual_subsys_bits & ~final_bits; 975 removed_bits = root->actual_subsys_bits & ~final_bits;
958 added_bits = final_bits & ~root->actual_subsys_bits; 976 added_bits = final_bits & ~root->actual_subsys_bits;
@@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1043 struct cgroupfs_root *root = dentry->d_sb->s_fs_info; 1061 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
1044 struct cgroup_subsys *ss; 1062 struct cgroup_subsys *ss;
1045 1063
1046 mutex_lock(&cgroup_mutex); 1064 mutex_lock(&cgroup_root_mutex);
1047 for_each_subsys(root, ss) 1065 for_each_subsys(root, ss)
1048 seq_printf(seq, ",%s", ss->name); 1066 seq_printf(seq, ",%s", ss->name);
1049 if (test_bit(ROOT_NOPREFIX, &root->flags)) 1067 if (test_bit(ROOT_NOPREFIX, &root->flags))
@@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1054 seq_puts(seq, ",clone_children"); 1072 seq_puts(seq, ",clone_children");
1055 if (strlen(root->name)) 1073 if (strlen(root->name))
1056 seq_printf(seq, ",name=%s", root->name); 1074 seq_printf(seq, ",name=%s", root->name);
1057 mutex_unlock(&cgroup_mutex); 1075 mutex_unlock(&cgroup_root_mutex);
1058 return 0; 1076 return 0;
1059} 1077}
1060 1078
@@ -1175,10 +1193,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1175 1193
1176 /* 1194 /*
1177 * If the 'all' option was specified select all the subsystems, 1195 * If the 'all' option was specified select all the subsystems,
1178 * otherwise 'all, 'none' and a subsystem name options were not 1196 * otherwise if 'none', 'name=' and a subsystem name options
1179 * specified, let's default to 'all' 1197 * were not specified, let's default to 'all'
1180 */ 1198 */
1181 if (all_ss || (!all_ss && !one_ss && !opts->none)) { 1199 if (all_ss || (!one_ss && !opts->none && !opts->name)) {
1182 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1200 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1183 struct cgroup_subsys *ss = subsys[i]; 1201 struct cgroup_subsys *ss = subsys[i];
1184 if (ss == NULL) 1202 if (ss == NULL)
@@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1269 1287
1270 mutex_lock(&cgrp->dentry->d_inode->i_mutex); 1288 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1271 mutex_lock(&cgroup_mutex); 1289 mutex_lock(&cgroup_mutex);
1290 mutex_lock(&cgroup_root_mutex);
1272 1291
1273 /* See what subsystems are wanted */ 1292 /* See what subsystems are wanted */
1274 ret = parse_cgroupfs_options(data, &opts); 1293 ret = parse_cgroupfs_options(data, &opts);
@@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1297 out_unlock: 1316 out_unlock:
1298 kfree(opts.release_agent); 1317 kfree(opts.release_agent);
1299 kfree(opts.name); 1318 kfree(opts.name);
1319 mutex_unlock(&cgroup_root_mutex);
1300 mutex_unlock(&cgroup_mutex); 1320 mutex_unlock(&cgroup_mutex);
1301 mutex_unlock(&cgrp->dentry->d_inode->i_mutex); 1321 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1302 return ret; 1322 return ret;
@@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1481 int ret = 0; 1501 int ret = 0;
1482 struct super_block *sb; 1502 struct super_block *sb;
1483 struct cgroupfs_root *new_root; 1503 struct cgroupfs_root *new_root;
1504 struct inode *inode;
1484 1505
1485 /* First find the desired set of subsystems */ 1506 /* First find the desired set of subsystems */
1486 mutex_lock(&cgroup_mutex); 1507 mutex_lock(&cgroup_mutex);
@@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1514 /* We used the new root structure, so this is a new hierarchy */ 1535 /* We used the new root structure, so this is a new hierarchy */
1515 struct list_head tmp_cg_links; 1536 struct list_head tmp_cg_links;
1516 struct cgroup *root_cgrp = &root->top_cgroup; 1537 struct cgroup *root_cgrp = &root->top_cgroup;
1517 struct inode *inode;
1518 struct cgroupfs_root *existing_root; 1538 struct cgroupfs_root *existing_root;
1519 const struct cred *cred; 1539 const struct cred *cred;
1520 int i; 1540 int i;
@@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1528 1548
1529 mutex_lock(&inode->i_mutex); 1549 mutex_lock(&inode->i_mutex);
1530 mutex_lock(&cgroup_mutex); 1550 mutex_lock(&cgroup_mutex);
1551 mutex_lock(&cgroup_root_mutex);
1531 1552
1532 if (strlen(root->name)) { 1553 /* Check for name clashes with existing mounts */
1533 /* Check for name clashes with existing mounts */ 1554 ret = -EBUSY;
1534 for_each_active_root(existing_root) { 1555 if (strlen(root->name))
1535 if (!strcmp(existing_root->name, root->name)) { 1556 for_each_active_root(existing_root)
1536 ret = -EBUSY; 1557 if (!strcmp(existing_root->name, root->name))
1537 mutex_unlock(&cgroup_mutex); 1558 goto unlock_drop;
1538 mutex_unlock(&inode->i_mutex);
1539 goto drop_new_super;
1540 }
1541 }
1542 }
1543 1559
1544 /* 1560 /*
1545 * We're accessing css_set_count without locking 1561 * We're accessing css_set_count without locking
@@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1549 * have some link structures left over 1565 * have some link structures left over
1550 */ 1566 */
1551 ret = allocate_cg_links(css_set_count, &tmp_cg_links); 1567 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1552 if (ret) { 1568 if (ret)
1553 mutex_unlock(&cgroup_mutex); 1569 goto unlock_drop;
1554 mutex_unlock(&inode->i_mutex);
1555 goto drop_new_super;
1556 }
1557 1570
1558 ret = rebind_subsystems(root, root->subsys_bits); 1571 ret = rebind_subsystems(root, root->subsys_bits);
1559 if (ret == -EBUSY) { 1572 if (ret == -EBUSY) {
1560 mutex_unlock(&cgroup_mutex);
1561 mutex_unlock(&inode->i_mutex);
1562 free_cg_links(&tmp_cg_links); 1573 free_cg_links(&tmp_cg_links);
1563 goto drop_new_super; 1574 goto unlock_drop;
1564 } 1575 }
1565 /* 1576 /*
1566 * There must be no failure case after here, since rebinding 1577 * There must be no failure case after here, since rebinding
@@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1599 cred = override_creds(&init_cred); 1610 cred = override_creds(&init_cred);
1600 cgroup_populate_dir(root_cgrp); 1611 cgroup_populate_dir(root_cgrp);
1601 revert_creds(cred); 1612 revert_creds(cred);
1613 mutex_unlock(&cgroup_root_mutex);
1602 mutex_unlock(&cgroup_mutex); 1614 mutex_unlock(&cgroup_mutex);
1603 mutex_unlock(&inode->i_mutex); 1615 mutex_unlock(&inode->i_mutex);
1604 } else { 1616 } else {
@@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1615 kfree(opts.name); 1627 kfree(opts.name);
1616 return dget(sb->s_root); 1628 return dget(sb->s_root);
1617 1629
1630 unlock_drop:
1631 mutex_unlock(&cgroup_root_mutex);
1632 mutex_unlock(&cgroup_mutex);
1633 mutex_unlock(&inode->i_mutex);
1618 drop_new_super: 1634 drop_new_super:
1619 deactivate_locked_super(sb); 1635 deactivate_locked_super(sb);
1620 drop_modules: 1636 drop_modules:
@@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
1639 BUG_ON(!list_empty(&cgrp->sibling)); 1655 BUG_ON(!list_empty(&cgrp->sibling));
1640 1656
1641 mutex_lock(&cgroup_mutex); 1657 mutex_lock(&cgroup_mutex);
1658 mutex_lock(&cgroup_root_mutex);
1642 1659
1643 /* Rebind all subsystems back to the default hierarchy */ 1660 /* Rebind all subsystems back to the default hierarchy */
1644 ret = rebind_subsystems(root, 0); 1661 ret = rebind_subsystems(root, 0);
@@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
1664 root_count--; 1681 root_count--;
1665 } 1682 }
1666 1683
1684 mutex_unlock(&cgroup_root_mutex);
1667 mutex_unlock(&cgroup_mutex); 1685 mutex_unlock(&cgroup_mutex);
1668 1686
1669 kill_litter_super(sb); 1687 kill_litter_super(sb);
@@ -1740,11 +1758,90 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1740EXPORT_SYMBOL_GPL(cgroup_path); 1758EXPORT_SYMBOL_GPL(cgroup_path);
1741 1759
1742/* 1760/*
1761 * Control Group taskset
1762 */
1763struct task_and_cgroup {
1764 struct task_struct *task;
1765 struct cgroup *cgrp;
1766};
1767
1768struct cgroup_taskset {
1769 struct task_and_cgroup single;
1770 struct flex_array *tc_array;
1771 int tc_array_len;
1772 int idx;
1773 struct cgroup *cur_cgrp;
1774};
1775
1776/**
1777 * cgroup_taskset_first - reset taskset and return the first task
1778 * @tset: taskset of interest
1779 *
1780 * @tset iteration is initialized and the first task is returned.
1781 */
1782struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
1783{
1784 if (tset->tc_array) {
1785 tset->idx = 0;
1786 return cgroup_taskset_next(tset);
1787 } else {
1788 tset->cur_cgrp = tset->single.cgrp;
1789 return tset->single.task;
1790 }
1791}
1792EXPORT_SYMBOL_GPL(cgroup_taskset_first);
1793
1794/**
1795 * cgroup_taskset_next - iterate to the next task in taskset
1796 * @tset: taskset of interest
1797 *
1798 * Return the next task in @tset. Iteration must have been initialized
1799 * with cgroup_taskset_first().
1800 */
1801struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
1802{
1803 struct task_and_cgroup *tc;
1804
1805 if (!tset->tc_array || tset->idx >= tset->tc_array_len)
1806 return NULL;
1807
1808 tc = flex_array_get(tset->tc_array, tset->idx++);
1809 tset->cur_cgrp = tc->cgrp;
1810 return tc->task;
1811}
1812EXPORT_SYMBOL_GPL(cgroup_taskset_next);
1813
1814/**
1815 * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task
1816 * @tset: taskset of interest
1817 *
1818 * Return the cgroup for the current (last returned) task of @tset. This
1819 * function must be preceded by either cgroup_taskset_first() or
1820 * cgroup_taskset_next().
1821 */
1822struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
1823{
1824 return tset->cur_cgrp;
1825}
1826EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
1827
1828/**
1829 * cgroup_taskset_size - return the number of tasks in taskset
1830 * @tset: taskset of interest
1831 */
1832int cgroup_taskset_size(struct cgroup_taskset *tset)
1833{
1834 return tset->tc_array ? tset->tc_array_len : 1;
1835}
1836EXPORT_SYMBOL_GPL(cgroup_taskset_size);
1837
1838
1839/*
1743 * cgroup_task_migrate - move a task from one cgroup to another. 1840 * cgroup_task_migrate - move a task from one cgroup to another.
1744 * 1841 *
1745 * 'guarantee' is set if the caller promises that a new css_set for the task 1842 * 'guarantee' is set if the caller promises that a new css_set for the task
1746 * will already exist. If not set, this function might sleep, and can fail with 1843 * will already exist. If not set, this function might sleep, and can fail with
1747 * -ENOMEM. Otherwise, it can only fail with -ESRCH. 1844 * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
1748 */ 1845 */
1749static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, 1846static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1750 struct task_struct *tsk, bool guarantee) 1847 struct task_struct *tsk, bool guarantee)
@@ -1753,14 +1850,12 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1753 struct css_set *newcg; 1850 struct css_set *newcg;
1754 1851
1755 /* 1852 /*
1756 * get old css_set. we need to take task_lock and refcount it, because 1853 * We are synchronized through threadgroup_lock() against PF_EXITING
1757 * an exiting task can change its css_set to init_css_set and drop its 1854 * setting such that we can't race against cgroup_exit() changing the
1758 * old one without taking cgroup_mutex. 1855 * css_set to init_css_set and dropping the old one.
1759 */ 1856 */
1760 task_lock(tsk); 1857 WARN_ON_ONCE(tsk->flags & PF_EXITING);
1761 oldcg = tsk->cgroups; 1858 oldcg = tsk->cgroups;
1762 get_css_set(oldcg);
1763 task_unlock(tsk);
1764 1859
1765 /* locate or allocate a new css_set for this task. */ 1860 /* locate or allocate a new css_set for this task. */
1766 if (guarantee) { 1861 if (guarantee) {
@@ -1775,20 +1870,11 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1775 might_sleep(); 1870 might_sleep();
1776 /* find_css_set will give us newcg already referenced. */ 1871 /* find_css_set will give us newcg already referenced. */
1777 newcg = find_css_set(oldcg, cgrp); 1872 newcg = find_css_set(oldcg, cgrp);
1778 if (!newcg) { 1873 if (!newcg)
1779 put_css_set(oldcg);
1780 return -ENOMEM; 1874 return -ENOMEM;
1781 }
1782 } 1875 }
1783 put_css_set(oldcg);
1784 1876
1785 /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */
1786 task_lock(tsk); 1877 task_lock(tsk);
1787 if (tsk->flags & PF_EXITING) {
1788 task_unlock(tsk);
1789 put_css_set(newcg);
1790 return -ESRCH;
1791 }
1792 rcu_assign_pointer(tsk->cgroups, newcg); 1878 rcu_assign_pointer(tsk->cgroups, newcg);
1793 task_unlock(tsk); 1879 task_unlock(tsk);
1794 1880
@@ -1814,8 +1900,8 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1814 * @cgrp: the cgroup the task is attaching to 1900 * @cgrp: the cgroup the task is attaching to
1815 * @tsk: the task to be attached 1901 * @tsk: the task to be attached
1816 * 1902 *
1817 * Call holding cgroup_mutex. May take task_lock of 1903 * Call with cgroup_mutex and threadgroup locked. May take task_lock of
1818 * the task 'tsk' during call. 1904 * @tsk during call.
1819 */ 1905 */
1820int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1906int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1821{ 1907{
@@ -1823,15 +1909,23 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1823 struct cgroup_subsys *ss, *failed_ss = NULL; 1909 struct cgroup_subsys *ss, *failed_ss = NULL;
1824 struct cgroup *oldcgrp; 1910 struct cgroup *oldcgrp;
1825 struct cgroupfs_root *root = cgrp->root; 1911 struct cgroupfs_root *root = cgrp->root;
1912 struct cgroup_taskset tset = { };
1913
1914 /* @tsk either already exited or can't exit until the end */
1915 if (tsk->flags & PF_EXITING)
1916 return -ESRCH;
1826 1917
1827 /* Nothing to do if the task is already in that cgroup */ 1918 /* Nothing to do if the task is already in that cgroup */
1828 oldcgrp = task_cgroup_from_root(tsk, root); 1919 oldcgrp = task_cgroup_from_root(tsk, root);
1829 if (cgrp == oldcgrp) 1920 if (cgrp == oldcgrp)
1830 return 0; 1921 return 0;
1831 1922
1923 tset.single.task = tsk;
1924 tset.single.cgrp = oldcgrp;
1925
1832 for_each_subsys(root, ss) { 1926 for_each_subsys(root, ss) {
1833 if (ss->can_attach) { 1927 if (ss->can_attach) {
1834 retval = ss->can_attach(ss, cgrp, tsk); 1928 retval = ss->can_attach(ss, cgrp, &tset);
1835 if (retval) { 1929 if (retval) {
1836 /* 1930 /*
1837 * Remember on which subsystem the can_attach() 1931 * Remember on which subsystem the can_attach()
@@ -1843,13 +1937,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1843 goto out; 1937 goto out;
1844 } 1938 }
1845 } 1939 }
1846 if (ss->can_attach_task) {
1847 retval = ss->can_attach_task(cgrp, tsk);
1848 if (retval) {
1849 failed_ss = ss;
1850 goto out;
1851 }
1852 }
1853 } 1940 }
1854 1941
1855 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); 1942 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
@@ -1857,12 +1944,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1857 goto out; 1944 goto out;
1858 1945
1859 for_each_subsys(root, ss) { 1946 for_each_subsys(root, ss) {
1860 if (ss->pre_attach)
1861 ss->pre_attach(cgrp);
1862 if (ss->attach_task)
1863 ss->attach_task(cgrp, tsk);
1864 if (ss->attach) 1947 if (ss->attach)
1865 ss->attach(ss, cgrp, oldcgrp, tsk); 1948 ss->attach(ss, cgrp, &tset);
1866 } 1949 }
1867 1950
1868 synchronize_rcu(); 1951 synchronize_rcu();
@@ -1884,7 +1967,7 @@ out:
1884 */ 1967 */
1885 break; 1968 break;
1886 if (ss->cancel_attach) 1969 if (ss->cancel_attach)
1887 ss->cancel_attach(ss, cgrp, tsk); 1970 ss->cancel_attach(ss, cgrp, &tset);
1888 } 1971 }
1889 } 1972 }
1890 return retval; 1973 return retval;
@@ -1935,23 +2018,17 @@ static bool css_set_check_fetched(struct cgroup *cgrp,
1935 2018
1936 read_lock(&css_set_lock); 2019 read_lock(&css_set_lock);
1937 newcg = find_existing_css_set(cg, cgrp, template); 2020 newcg = find_existing_css_set(cg, cgrp, template);
1938 if (newcg)
1939 get_css_set(newcg);
1940 read_unlock(&css_set_lock); 2021 read_unlock(&css_set_lock);
1941 2022
1942 /* doesn't exist at all? */ 2023 /* doesn't exist at all? */
1943 if (!newcg) 2024 if (!newcg)
1944 return false; 2025 return false;
1945 /* see if it's already in the list */ 2026 /* see if it's already in the list */
1946 list_for_each_entry(cg_entry, newcg_list, links) { 2027 list_for_each_entry(cg_entry, newcg_list, links)
1947 if (cg_entry->cg == newcg) { 2028 if (cg_entry->cg == newcg)
1948 put_css_set(newcg);
1949 return true; 2029 return true;
1950 }
1951 }
1952 2030
1953 /* not found */ 2031 /* not found */
1954 put_css_set(newcg);
1955 return false; 2032 return false;
1956} 2033}
1957 2034
@@ -1985,21 +2062,21 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
1985 * @cgrp: the cgroup to attach to 2062 * @cgrp: the cgroup to attach to
1986 * @leader: the threadgroup leader task_struct of the group to be attached 2063 * @leader: the threadgroup leader task_struct of the group to be attached
1987 * 2064 *
1988 * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will 2065 * Call holding cgroup_mutex and the group_rwsem of the leader. Will take
1989 * take task_lock of each thread in leader's threadgroup individually in turn. 2066 * task_lock of each thread in leader's threadgroup individually in turn.
1990 */ 2067 */
1991int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) 2068static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
1992{ 2069{
1993 int retval, i, group_size; 2070 int retval, i, group_size;
1994 struct cgroup_subsys *ss, *failed_ss = NULL; 2071 struct cgroup_subsys *ss, *failed_ss = NULL;
1995 bool cancel_failed_ss = false;
1996 /* guaranteed to be initialized later, but the compiler needs this */ 2072 /* guaranteed to be initialized later, but the compiler needs this */
1997 struct cgroup *oldcgrp = NULL;
1998 struct css_set *oldcg; 2073 struct css_set *oldcg;
1999 struct cgroupfs_root *root = cgrp->root; 2074 struct cgroupfs_root *root = cgrp->root;
2000 /* threadgroup list cursor and array */ 2075 /* threadgroup list cursor and array */
2001 struct task_struct *tsk; 2076 struct task_struct *tsk;
2077 struct task_and_cgroup *tc;
2002 struct flex_array *group; 2078 struct flex_array *group;
2079 struct cgroup_taskset tset = { };
2003 /* 2080 /*
2004 * we need to make sure we have css_sets for all the tasks we're 2081 * we need to make sure we have css_sets for all the tasks we're
2005 * going to move -before- we actually start moving them, so that in 2082 * going to move -before- we actually start moving them, so that in
@@ -2012,13 +2089,12 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2012 * step 0: in order to do expensive, possibly blocking operations for 2089 * step 0: in order to do expensive, possibly blocking operations for
2013 * every thread, we cannot iterate the thread group list, since it needs 2090 * every thread, we cannot iterate the thread group list, since it needs
2014 * rcu or tasklist locked. instead, build an array of all threads in the 2091 * rcu or tasklist locked. instead, build an array of all threads in the
2015 * group - threadgroup_fork_lock prevents new threads from appearing, 2092 * group - group_rwsem prevents new threads from appearing, and if
2016 * and if threads exit, this will just be an over-estimate. 2093 * threads exit, this will just be an over-estimate.
2017 */ 2094 */
2018 group_size = get_nr_threads(leader); 2095 group_size = get_nr_threads(leader);
2019 /* flex_array supports very large thread-groups better than kmalloc. */ 2096 /* flex_array supports very large thread-groups better than kmalloc. */
2020 group = flex_array_alloc(sizeof(struct task_struct *), group_size, 2097 group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
2021 GFP_KERNEL);
2022 if (!group) 2098 if (!group)
2023 return -ENOMEM; 2099 return -ENOMEM;
2024 /* pre-allocate to guarantee space while iterating in rcu read-side. */ 2100 /* pre-allocate to guarantee space while iterating in rcu read-side. */
@@ -2040,49 +2116,53 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2040 retval = -EAGAIN; 2116 retval = -EAGAIN;
2041 goto out_free_group_list; 2117 goto out_free_group_list;
2042 } 2118 }
2043 /* take a reference on each task in the group to go in the array. */ 2119
2044 tsk = leader; 2120 tsk = leader;
2045 i = 0; 2121 i = 0;
2046 do { 2122 do {
2123 struct task_and_cgroup ent;
2124
2125 /* @tsk either already exited or can't exit until the end */
2126 if (tsk->flags & PF_EXITING)
2127 continue;
2128
2047 /* as per above, nr_threads may decrease, but not increase. */ 2129 /* as per above, nr_threads may decrease, but not increase. */
2048 BUG_ON(i >= group_size); 2130 BUG_ON(i >= group_size);
2049 get_task_struct(tsk);
2050 /* 2131 /*
2051 * saying GFP_ATOMIC has no effect here because we did prealloc 2132 * saying GFP_ATOMIC has no effect here because we did prealloc
2052 * earlier, but it's good form to communicate our expectations. 2133 * earlier, but it's good form to communicate our expectations.
2053 */ 2134 */
2054 retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC); 2135 ent.task = tsk;
2136 ent.cgrp = task_cgroup_from_root(tsk, root);
2137 /* nothing to do if this task is already in the cgroup */
2138 if (ent.cgrp == cgrp)
2139 continue;
2140 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
2055 BUG_ON(retval != 0); 2141 BUG_ON(retval != 0);
2056 i++; 2142 i++;
2057 } while_each_thread(leader, tsk); 2143 } while_each_thread(leader, tsk);
2058 /* remember the number of threads in the array for later. */ 2144 /* remember the number of threads in the array for later. */
2059 group_size = i; 2145 group_size = i;
2146 tset.tc_array = group;
2147 tset.tc_array_len = group_size;
2060 read_unlock(&tasklist_lock); 2148 read_unlock(&tasklist_lock);
2061 2149
2150 /* methods shouldn't be called if no task is actually migrating */
2151 retval = 0;
2152 if (!group_size)
2153 goto out_free_group_list;
2154
2062 /* 2155 /*
2063 * step 1: check that we can legitimately attach to the cgroup. 2156 * step 1: check that we can legitimately attach to the cgroup.
2064 */ 2157 */
2065 for_each_subsys(root, ss) { 2158 for_each_subsys(root, ss) {
2066 if (ss->can_attach) { 2159 if (ss->can_attach) {
2067 retval = ss->can_attach(ss, cgrp, leader); 2160 retval = ss->can_attach(ss, cgrp, &tset);
2068 if (retval) { 2161 if (retval) {
2069 failed_ss = ss; 2162 failed_ss = ss;
2070 goto out_cancel_attach; 2163 goto out_cancel_attach;
2071 } 2164 }
2072 } 2165 }
2073 /* a callback to be run on every thread in the threadgroup. */
2074 if (ss->can_attach_task) {
2075 /* run on each task in the threadgroup. */
2076 for (i = 0; i < group_size; i++) {
2077 tsk = flex_array_get_ptr(group, i);
2078 retval = ss->can_attach_task(cgrp, tsk);
2079 if (retval) {
2080 failed_ss = ss;
2081 cancel_failed_ss = true;
2082 goto out_cancel_attach;
2083 }
2084 }
2085 }
2086 } 2166 }
2087 2167
2088 /* 2168 /*
@@ -2091,67 +2171,36 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2091 */ 2171 */
2092 INIT_LIST_HEAD(&newcg_list); 2172 INIT_LIST_HEAD(&newcg_list);
2093 for (i = 0; i < group_size; i++) { 2173 for (i = 0; i < group_size; i++) {
2094 tsk = flex_array_get_ptr(group, i); 2174 tc = flex_array_get(group, i);
2095 /* nothing to do if this task is already in the cgroup */ 2175 oldcg = tc->task->cgroups;
2096 oldcgrp = task_cgroup_from_root(tsk, root); 2176
2097 if (cgrp == oldcgrp) 2177 /* if we don't already have it in the list get a new one */
2098 continue; 2178 if (!css_set_check_fetched(cgrp, tc->task, oldcg,
2099 /* get old css_set pointer */ 2179 &newcg_list)) {
2100 task_lock(tsk);
2101 oldcg = tsk->cgroups;
2102 get_css_set(oldcg);
2103 task_unlock(tsk);
2104 /* see if the new one for us is already in the list? */
2105 if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
2106 /* was already there, nothing to do. */
2107 put_css_set(oldcg);
2108 } else {
2109 /* we don't already have it. get new one. */
2110 retval = css_set_prefetch(cgrp, oldcg, &newcg_list); 2180 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2111 put_css_set(oldcg);
2112 if (retval) 2181 if (retval)
2113 goto out_list_teardown; 2182 goto out_list_teardown;
2114 } 2183 }
2115 } 2184 }
2116 2185
2117 /* 2186 /*
2118 * step 3: now that we're guaranteed success wrt the css_sets, proceed 2187 * step 3: now that we're guaranteed success wrt the css_sets,
2119 * to move all tasks to the new cgroup, calling ss->attach_task for each 2188 * proceed to move all tasks to the new cgroup. There are no
2120 * one along the way. there are no failure cases after here, so this is 2189 * failure cases after here, so this is the commit point.
2121 * the commit point.
2122 */ 2190 */
2123 for_each_subsys(root, ss) {
2124 if (ss->pre_attach)
2125 ss->pre_attach(cgrp);
2126 }
2127 for (i = 0; i < group_size; i++) { 2191 for (i = 0; i < group_size; i++) {
2128 tsk = flex_array_get_ptr(group, i); 2192 tc = flex_array_get(group, i);
2129 /* leave current thread as it is if it's already there */ 2193 retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true);
2130 oldcgrp = task_cgroup_from_root(tsk, root); 2194 BUG_ON(retval);
2131 if (cgrp == oldcgrp)
2132 continue;
2133 /* if the thread is PF_EXITING, it can just get skipped. */
2134 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2135 if (retval == 0) {
2136 /* attach each task to each subsystem */
2137 for_each_subsys(root, ss) {
2138 if (ss->attach_task)
2139 ss->attach_task(cgrp, tsk);
2140 }
2141 } else {
2142 BUG_ON(retval != -ESRCH);
2143 }
2144 } 2195 }
2145 /* nothing is sensitive to fork() after this point. */ 2196 /* nothing is sensitive to fork() after this point. */
2146 2197
2147 /* 2198 /*
2148 * step 4: do expensive, non-thread-specific subsystem callbacks. 2199 * step 4: do subsystem attach callbacks.
2149 * TODO: if ever a subsystem needs to know the oldcgrp for each task
2150 * being moved, this call will need to be reworked to communicate that.
2151 */ 2200 */
2152 for_each_subsys(root, ss) { 2201 for_each_subsys(root, ss) {
2153 if (ss->attach) 2202 if (ss->attach)
2154 ss->attach(ss, cgrp, oldcgrp, leader); 2203 ss->attach(ss, cgrp, &tset);
2155 } 2204 }
2156 2205
2157 /* 2206 /*
@@ -2171,20 +2220,12 @@ out_cancel_attach:
2171 /* same deal as in cgroup_attach_task */ 2220 /* same deal as in cgroup_attach_task */
2172 if (retval) { 2221 if (retval) {
2173 for_each_subsys(root, ss) { 2222 for_each_subsys(root, ss) {
2174 if (ss == failed_ss) { 2223 if (ss == failed_ss)
2175 if (cancel_failed_ss && ss->cancel_attach)
2176 ss->cancel_attach(ss, cgrp, leader);
2177 break; 2224 break;
2178 }
2179 if (ss->cancel_attach) 2225 if (ss->cancel_attach)
2180 ss->cancel_attach(ss, cgrp, leader); 2226 ss->cancel_attach(ss, cgrp, &tset);
2181 } 2227 }
2182 } 2228 }
2183 /* clean up the array of referenced threads in the group. */
2184 for (i = 0; i < group_size; i++) {
2185 tsk = flex_array_get_ptr(group, i);
2186 put_task_struct(tsk);
2187 }
2188out_free_group_list: 2229out_free_group_list:
2189 flex_array_free(group); 2230 flex_array_free(group);
2190 return retval; 2231 return retval;
@@ -2192,8 +2233,8 @@ out_free_group_list:
2192 2233
2193/* 2234/*
2194 * Find the task_struct of the task to attach by vpid and pass it along to the 2235 * Find the task_struct of the task to attach by vpid and pass it along to the
2195 * function to attach either it or all tasks in its threadgroup. Will take 2236 * function to attach either it or all tasks in its threadgroup. Will lock
2196 * cgroup_mutex; may take task_lock of task. 2237 * cgroup_mutex and threadgroup; may take task_lock of task.
2197 */ 2238 */
2198static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) 2239static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2199{ 2240{
@@ -2220,13 +2261,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2220 * detect it later. 2261 * detect it later.
2221 */ 2262 */
2222 tsk = tsk->group_leader; 2263 tsk = tsk->group_leader;
2223 } else if (tsk->flags & PF_EXITING) {
2224 /* optimization for the single-task-only case */
2225 rcu_read_unlock();
2226 cgroup_unlock();
2227 return -ESRCH;
2228 } 2264 }
2229
2230 /* 2265 /*
2231 * even if we're attaching all tasks in the thread group, we 2266 * even if we're attaching all tasks in the thread group, we
2232 * only need to check permissions on one of them. 2267 * only need to check permissions on one of them.
@@ -2249,13 +2284,15 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2249 get_task_struct(tsk); 2284 get_task_struct(tsk);
2250 } 2285 }
2251 2286
2252 if (threadgroup) { 2287 threadgroup_lock(tsk);
2253 threadgroup_fork_write_lock(tsk); 2288
2289 if (threadgroup)
2254 ret = cgroup_attach_proc(cgrp, tsk); 2290 ret = cgroup_attach_proc(cgrp, tsk);
2255 threadgroup_fork_write_unlock(tsk); 2291 else
2256 } else {
2257 ret = cgroup_attach_task(cgrp, tsk); 2292 ret = cgroup_attach_task(cgrp, tsk);
2258 } 2293
2294 threadgroup_unlock(tsk);
2295
2259 put_task_struct(tsk); 2296 put_task_struct(tsk);
2260 cgroup_unlock(); 2297 cgroup_unlock();
2261 return ret; 2298 return ret;
@@ -2306,7 +2343,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2306 return -EINVAL; 2343 return -EINVAL;
2307 if (!cgroup_lock_live_group(cgrp)) 2344 if (!cgroup_lock_live_group(cgrp))
2308 return -ENODEV; 2345 return -ENODEV;
2346 mutex_lock(&cgroup_root_mutex);
2309 strcpy(cgrp->root->release_agent_path, buffer); 2347 strcpy(cgrp->root->release_agent_path, buffer);
2348 mutex_unlock(&cgroup_root_mutex);
2310 cgroup_unlock(); 2349 cgroup_unlock();
2311 return 0; 2350 return 0;
2312} 2351}
@@ -2789,6 +2828,7 @@ static void cgroup_enable_task_cg_lists(void)
2789} 2828}
2790 2829
2791void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) 2830void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2831 __acquires(css_set_lock)
2792{ 2832{
2793 /* 2833 /*
2794 * The first time anyone tries to iterate across a cgroup, 2834 * The first time anyone tries to iterate across a cgroup,
@@ -2828,6 +2868,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2828} 2868}
2829 2869
2830void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) 2870void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2871 __releases(css_set_lock)
2831{ 2872{
2832 read_unlock(&css_set_lock); 2873 read_unlock(&css_set_lock);
2833} 2874}
@@ -4491,20 +4532,31 @@ static const struct file_operations proc_cgroupstats_operations = {
4491 * 4532 *
4492 * A pointer to the shared css_set was automatically copied in 4533 * A pointer to the shared css_set was automatically copied in
4493 * fork.c by dup_task_struct(). However, we ignore that copy, since 4534 * fork.c by dup_task_struct(). However, we ignore that copy, since
4494 * it was not made under the protection of RCU or cgroup_mutex, so 4535 * it was not made under the protection of RCU, cgroup_mutex or
4495 * might no longer be a valid cgroup pointer. cgroup_attach_task() might 4536 * threadgroup_change_begin(), so it might no longer be a valid
4496 * have already changed current->cgroups, allowing the previously 4537 * cgroup pointer. cgroup_attach_task() might have already changed
4497 * referenced cgroup group to be removed and freed. 4538 * current->cgroups, allowing the previously referenced cgroup
4539 * group to be removed and freed.
4540 *
4541 * Outside the pointer validity we also need to process the css_set
4542 * inheritance between threadgoup_change_begin() and
4543 * threadgoup_change_end(), this way there is no leak in any process
4544 * wide migration performed by cgroup_attach_proc() that could otherwise
4545 * miss a thread because it is too early or too late in the fork stage.
4498 * 4546 *
4499 * At the point that cgroup_fork() is called, 'current' is the parent 4547 * At the point that cgroup_fork() is called, 'current' is the parent
4500 * task, and the passed argument 'child' points to the child task. 4548 * task, and the passed argument 'child' points to the child task.
4501 */ 4549 */
4502void cgroup_fork(struct task_struct *child) 4550void cgroup_fork(struct task_struct *child)
4503{ 4551{
4504 task_lock(current); 4552 /*
4553 * We don't need to task_lock() current because current->cgroups
4554 * can't be changed concurrently here. The parent obviously hasn't
4555 * exited and called cgroup_exit(), and we are synchronized against
4556 * cgroup migration through threadgroup_change_begin().
4557 */
4505 child->cgroups = current->cgroups; 4558 child->cgroups = current->cgroups;
4506 get_css_set(child->cgroups); 4559 get_css_set(child->cgroups);
4507 task_unlock(current);
4508 INIT_LIST_HEAD(&child->cg_list); 4560 INIT_LIST_HEAD(&child->cg_list);
4509} 4561}
4510 4562
@@ -4546,10 +4598,19 @@ void cgroup_post_fork(struct task_struct *child)
4546{ 4598{
4547 if (use_task_css_set_links) { 4599 if (use_task_css_set_links) {
4548 write_lock(&css_set_lock); 4600 write_lock(&css_set_lock);
4549 task_lock(child); 4601 if (list_empty(&child->cg_list)) {
4550 if (list_empty(&child->cg_list)) 4602 /*
4603 * It's safe to use child->cgroups without task_lock()
4604 * here because we are protected through
4605 * threadgroup_change_begin() against concurrent
4606 * css_set change in cgroup_task_migrate(). Also
4607 * the task can't exit at that point until
4608 * wake_up_new_task() is called, so we are protected
4609 * against cgroup_exit() setting child->cgroup to
4610 * init_css_set.
4611 */
4551 list_add(&child->cg_list, &child->cgroups->tasks); 4612 list_add(&child->cg_list, &child->cgroups->tasks);
4552 task_unlock(child); 4613 }
4553 write_unlock(&css_set_lock); 4614 write_unlock(&css_set_lock);
4554 } 4615 }
4555} 4616}
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index fcb93fca782d..fc0646b78a64 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -166,13 +166,17 @@ static bool is_task_frozen_enough(struct task_struct *task)
166 */ 166 */
167static int freezer_can_attach(struct cgroup_subsys *ss, 167static int freezer_can_attach(struct cgroup_subsys *ss,
168 struct cgroup *new_cgroup, 168 struct cgroup *new_cgroup,
169 struct task_struct *task) 169 struct cgroup_taskset *tset)
170{ 170{
171 struct freezer *freezer; 171 struct freezer *freezer;
172 struct task_struct *task;
172 173
173 /* 174 /*
174 * Anything frozen can't move or be moved to/from. 175 * Anything frozen can't move or be moved to/from.
175 */ 176 */
177 cgroup_taskset_for_each(task, new_cgroup, tset)
178 if (cgroup_freezing(task))
179 return -EBUSY;
176 180
177 freezer = cgroup_freezer(new_cgroup); 181 freezer = cgroup_freezer(new_cgroup);
178 if (freezer->state != CGROUP_THAWED) 182 if (freezer->state != CGROUP_THAWED)
@@ -181,11 +185,6 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
181 return 0; 185 return 0;
182} 186}
183 187
184static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
185{
186 return cgroup_freezing(tsk) ? -EBUSY : 0;
187}
188
189static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) 188static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
190{ 189{
191 struct freezer *freezer; 190 struct freezer *freezer;
@@ -381,10 +380,5 @@ struct cgroup_subsys freezer_subsys = {
381 .populate = freezer_populate, 380 .populate = freezer_populate,
382 .subsys_id = freezer_subsys_id, 381 .subsys_id = freezer_subsys_id,
383 .can_attach = freezer_can_attach, 382 .can_attach = freezer_can_attach,
384 .can_attach_task = freezer_can_attach_task,
385 .pre_attach = NULL,
386 .attach_task = NULL,
387 .attach = NULL,
388 .fork = freezer_fork, 383 .fork = freezer_fork,
389 .exit = NULL,
390}; 384};
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 0b1712dba587..a09ac2b9a661 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1389,79 +1389,73 @@ static int fmeter_getrate(struct fmeter *fmp)
1389 return val; 1389 return val;
1390} 1390}
1391 1391
1392/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1393static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1394 struct task_struct *tsk)
1395{
1396 struct cpuset *cs = cgroup_cs(cont);
1397
1398 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1399 return -ENOSPC;
1400
1401 /*
1402 * Kthreads bound to specific cpus cannot be moved to a new cpuset; we
1403 * cannot change their cpu affinity and isolating such threads by their
1404 * set of allowed nodes is unnecessary. Thus, cpusets are not
1405 * applicable for such threads. This prevents checking for success of
1406 * set_cpus_allowed_ptr() on all attached tasks before cpus_allowed may
1407 * be changed.
1408 */
1409 if (tsk->flags & PF_THREAD_BOUND)
1410 return -EINVAL;
1411
1412 return 0;
1413}
1414
1415static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
1416{
1417 return security_task_setscheduler(task);
1418}
1419
1420/* 1392/*
1421 * Protected by cgroup_lock. The nodemasks must be stored globally because 1393 * Protected by cgroup_lock. The nodemasks must be stored globally because
1422 * dynamically allocating them is not allowed in pre_attach, and they must 1394 * dynamically allocating them is not allowed in can_attach, and they must
1423 * persist among pre_attach, attach_task, and attach. 1395 * persist until attach.
1424 */ 1396 */
1425static cpumask_var_t cpus_attach; 1397static cpumask_var_t cpus_attach;
1426static nodemask_t cpuset_attach_nodemask_from; 1398static nodemask_t cpuset_attach_nodemask_from;
1427static nodemask_t cpuset_attach_nodemask_to; 1399static nodemask_t cpuset_attach_nodemask_to;
1428 1400
1429/* Set-up work for before attaching each task. */ 1401/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1430static void cpuset_pre_attach(struct cgroup *cont) 1402static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1403 struct cgroup_taskset *tset)
1431{ 1404{
1432 struct cpuset *cs = cgroup_cs(cont); 1405 struct cpuset *cs = cgroup_cs(cgrp);
1406 struct task_struct *task;
1407 int ret;
1408
1409 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
1410 return -ENOSPC;
1411
1412 cgroup_taskset_for_each(task, cgrp, tset) {
1413 /*
1414 * Kthreads bound to specific cpus cannot be moved to a new
1415 * cpuset; we cannot change their cpu affinity and
1416 * isolating such threads by their set of allowed nodes is
1417 * unnecessary. Thus, cpusets are not applicable for such
1418 * threads. This prevents checking for success of
1419 * set_cpus_allowed_ptr() on all attached tasks before
1420 * cpus_allowed may be changed.
1421 */
1422 if (task->flags & PF_THREAD_BOUND)
1423 return -EINVAL;
1424 if ((ret = security_task_setscheduler(task)))
1425 return ret;
1426 }
1433 1427
1428 /* prepare for attach */
1434 if (cs == &top_cpuset) 1429 if (cs == &top_cpuset)
1435 cpumask_copy(cpus_attach, cpu_possible_mask); 1430 cpumask_copy(cpus_attach, cpu_possible_mask);
1436 else 1431 else
1437 guarantee_online_cpus(cs, cpus_attach); 1432 guarantee_online_cpus(cs, cpus_attach);
1438 1433
1439 guarantee_online_mems(cs, &cpuset_attach_nodemask_to); 1434 guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
1440}
1441
1442/* Per-thread attachment work. */
1443static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
1444{
1445 int err;
1446 struct cpuset *cs = cgroup_cs(cont);
1447 1435
1448 /* 1436 return 0;
1449 * can_attach beforehand should guarantee that this doesn't fail.
1450 * TODO: have a better way to handle failure here
1451 */
1452 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1453 WARN_ON_ONCE(err);
1454
1455 cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
1456 cpuset_update_task_spread_flag(cs, tsk);
1457} 1437}
1458 1438
1459static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, 1439static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1460 struct cgroup *oldcont, struct task_struct *tsk) 1440 struct cgroup_taskset *tset)
1461{ 1441{
1462 struct mm_struct *mm; 1442 struct mm_struct *mm;
1463 struct cpuset *cs = cgroup_cs(cont); 1443 struct task_struct *task;
1464 struct cpuset *oldcs = cgroup_cs(oldcont); 1444 struct task_struct *leader = cgroup_taskset_first(tset);
1445 struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset);
1446 struct cpuset *cs = cgroup_cs(cgrp);
1447 struct cpuset *oldcs = cgroup_cs(oldcgrp);
1448
1449 cgroup_taskset_for_each(task, cgrp, tset) {
1450 /*
1451 * can_attach beforehand should guarantee that this doesn't
1452 * fail. TODO: have a better way to handle failure here
1453 */
1454 WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
1455
1456 cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
1457 cpuset_update_task_spread_flag(cs, task);
1458 }
1465 1459
1466 /* 1460 /*
1467 * Change mm, possibly for multiple threads in a threadgroup. This is 1461 * Change mm, possibly for multiple threads in a threadgroup. This is
@@ -1469,7 +1463,7 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1469 */ 1463 */
1470 cpuset_attach_nodemask_from = oldcs->mems_allowed; 1464 cpuset_attach_nodemask_from = oldcs->mems_allowed;
1471 cpuset_attach_nodemask_to = cs->mems_allowed; 1465 cpuset_attach_nodemask_to = cs->mems_allowed;
1472 mm = get_task_mm(tsk); 1466 mm = get_task_mm(leader);
1473 if (mm) { 1467 if (mm) {
1474 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); 1468 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
1475 if (is_memory_migrate(cs)) 1469 if (is_memory_migrate(cs))
@@ -1925,9 +1919,6 @@ struct cgroup_subsys cpuset_subsys = {
1925 .create = cpuset_create, 1919 .create = cpuset_create,
1926 .destroy = cpuset_destroy, 1920 .destroy = cpuset_destroy,
1927 .can_attach = cpuset_can_attach, 1921 .can_attach = cpuset_can_attach,
1928 .can_attach_task = cpuset_can_attach_task,
1929 .pre_attach = cpuset_pre_attach,
1930 .attach_task = cpuset_attach_task,
1931 .attach = cpuset_attach, 1922 .attach = cpuset_attach,
1932 .populate = cpuset_populate, 1923 .populate = cpuset_populate,
1933 .post_clone = cpuset_post_clone, 1924 .post_clone = cpuset_post_clone,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3afc68c08433..a8f4ac001a00 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6941,10 +6941,13 @@ static int __perf_cgroup_move(void *info)
6941 return 0; 6941 return 0;
6942} 6942}
6943 6943
6944static void 6944static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
6945perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task) 6945 struct cgroup_taskset *tset)
6946{ 6946{
6947 task_function_call(task, __perf_cgroup_move, task); 6947 struct task_struct *task;
6948
6949 cgroup_taskset_for_each(task, cgrp, tset)
6950 task_function_call(task, __perf_cgroup_move, task);
6948} 6951}
6949 6952
6950static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, 6953static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
@@ -6958,7 +6961,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
6958 if (!(task->flags & PF_EXITING)) 6961 if (!(task->flags & PF_EXITING))
6959 return; 6962 return;
6960 6963
6961 perf_cgroup_attach_task(cgrp, task); 6964 task_function_call(task, __perf_cgroup_move, task);
6962} 6965}
6963 6966
6964struct cgroup_subsys perf_subsys = { 6967struct cgroup_subsys perf_subsys = {
@@ -6967,6 +6970,6 @@ struct cgroup_subsys perf_subsys = {
6967 .create = perf_cgroup_create, 6970 .create = perf_cgroup_create,
6968 .destroy = perf_cgroup_destroy, 6971 .destroy = perf_cgroup_destroy,
6969 .exit = perf_cgroup_exit, 6972 .exit = perf_cgroup_exit,
6970 .attach_task = perf_cgroup_attach_task, 6973 .attach = perf_cgroup_attach,
6971}; 6974};
6972#endif /* CONFIG_CGROUP_PERF */ 6975#endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/fork.c b/kernel/fork.c
index f34f894c4b98..b00711ce7c13 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -972,7 +972,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
972 sched_autogroup_fork(sig); 972 sched_autogroup_fork(sig);
973 973
974#ifdef CONFIG_CGROUPS 974#ifdef CONFIG_CGROUPS
975 init_rwsem(&sig->threadgroup_fork_lock); 975 init_rwsem(&sig->group_rwsem);
976#endif 976#endif
977 977
978 sig->oom_adj = current->signal->oom_adj; 978 sig->oom_adj = current->signal->oom_adj;
@@ -1153,7 +1153,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1153 p->io_context = NULL; 1153 p->io_context = NULL;
1154 p->audit_context = NULL; 1154 p->audit_context = NULL;
1155 if (clone_flags & CLONE_THREAD) 1155 if (clone_flags & CLONE_THREAD)
1156 threadgroup_fork_read_lock(current); 1156 threadgroup_change_begin(current);
1157 cgroup_fork(p); 1157 cgroup_fork(p);
1158#ifdef CONFIG_NUMA 1158#ifdef CONFIG_NUMA
1159 p->mempolicy = mpol_dup(p->mempolicy); 1159 p->mempolicy = mpol_dup(p->mempolicy);
@@ -1368,7 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1368 proc_fork_connector(p); 1368 proc_fork_connector(p);
1369 cgroup_post_fork(p); 1369 cgroup_post_fork(p);
1370 if (clone_flags & CLONE_THREAD) 1370 if (clone_flags & CLONE_THREAD)
1371 threadgroup_fork_read_unlock(current); 1371 threadgroup_change_end(current);
1372 perf_event_fork(p); 1372 perf_event_fork(p);
1373 return p; 1373 return p;
1374 1374
@@ -1403,7 +1403,7 @@ bad_fork_cleanup_policy:
1403bad_fork_cleanup_cgroup: 1403bad_fork_cleanup_cgroup:
1404#endif 1404#endif
1405 if (clone_flags & CLONE_THREAD) 1405 if (clone_flags & CLONE_THREAD)
1406 threadgroup_fork_read_unlock(current); 1406 threadgroup_change_end(current);
1407 cgroup_exit(p, cgroup_callbacks_done); 1407 cgroup_exit(p, cgroup_callbacks_done);
1408 delayacct_tsk_free(p); 1408 delayacct_tsk_free(p);
1409 module_put(task_thread_info(p)->exec_domain->module); 1409 module_put(task_thread_info(p)->exec_domain->module);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 34683efa2cce..6d269cce7aa1 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -159,8 +159,7 @@ int res_counter_memparse_write_strategy(const char *buf,
159 return 0; 159 return 0;
160 } 160 }
161 161
162 /* FIXME - make memparse() take const char* args */ 162 *res = memparse(buf, &end);
163 *res = memparse((char *)buf, &end);
164 if (*end != '\0') 163 if (*end != '\0')
165 return -EINVAL; 164 return -EINVAL;
166 165
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0ac0f811d623..cecbb64be05f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7563,24 +7563,31 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
7563 sched_destroy_group(tg); 7563 sched_destroy_group(tg);
7564} 7564}
7565 7565
7566static int 7566static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7567cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 7567 struct cgroup_taskset *tset)
7568{ 7568{
7569 struct task_struct *task;
7570
7571 cgroup_taskset_for_each(task, cgrp, tset) {
7569#ifdef CONFIG_RT_GROUP_SCHED 7572#ifdef CONFIG_RT_GROUP_SCHED
7570 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) 7573 if (!sched_rt_can_attach(cgroup_tg(cgrp), task))
7571 return -EINVAL; 7574 return -EINVAL;
7572#else 7575#else
7573 /* We don't support RT-tasks being in separate groups */ 7576 /* We don't support RT-tasks being in separate groups */
7574 if (tsk->sched_class != &fair_sched_class) 7577 if (task->sched_class != &fair_sched_class)
7575 return -EINVAL; 7578 return -EINVAL;
7576#endif 7579#endif
7580 }
7577 return 0; 7581 return 0;
7578} 7582}
7579 7583
7580static void 7584static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7581cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 7585 struct cgroup_taskset *tset)
7582{ 7586{
7583 sched_move_task(tsk); 7587 struct task_struct *task;
7588
7589 cgroup_taskset_for_each(task, cgrp, tset)
7590 sched_move_task(task);
7584} 7591}
7585 7592
7586static void 7593static void
@@ -7915,8 +7922,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
7915 .name = "cpu", 7922 .name = "cpu",
7916 .create = cpu_cgroup_create, 7923 .create = cpu_cgroup_create,
7917 .destroy = cpu_cgroup_destroy, 7924 .destroy = cpu_cgroup_destroy,
7918 .can_attach_task = cpu_cgroup_can_attach_task, 7925 .can_attach = cpu_cgroup_can_attach,
7919 .attach_task = cpu_cgroup_attach_task, 7926 .attach = cpu_cgroup_attach,
7920 .exit = cpu_cgroup_exit, 7927 .exit = cpu_cgroup_exit,
7921 .populate = cpu_cgroup_populate, 7928 .populate = cpu_cgroup_populate,
7922 .subsys_id = cpu_cgroup_subsys_id, 7929 .subsys_id = cpu_cgroup_subsys_id,
diff --git a/kernel/signal.c b/kernel/signal.c
index 56ce3a618b28..bb0efa5705ed 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2355,8 +2355,15 @@ void exit_signals(struct task_struct *tsk)
2355 int group_stop = 0; 2355 int group_stop = 0;
2356 sigset_t unblocked; 2356 sigset_t unblocked;
2357 2357
2358 /*
2359 * @tsk is about to have PF_EXITING set - lock out users which
2360 * expect stable threadgroup.
2361 */
2362 threadgroup_change_begin(tsk);
2363
2358 if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { 2364 if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
2359 tsk->flags |= PF_EXITING; 2365 tsk->flags |= PF_EXITING;
2366 threadgroup_change_end(tsk);
2360 return; 2367 return;
2361 } 2368 }
2362 2369
@@ -2366,6 +2373,9 @@ void exit_signals(struct task_struct *tsk)
2366 * see wants_signal(), do_signal_stop(). 2373 * see wants_signal(), do_signal_stop().
2367 */ 2374 */
2368 tsk->flags |= PF_EXITING; 2375 tsk->flags |= PF_EXITING;
2376
2377 threadgroup_change_end(tsk);
2378
2369 if (!signal_pending(tsk)) 2379 if (!signal_pending(tsk))
2370 goto out; 2380 goto out;
2371 2381
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 94da8ee9e2c2..00d4fa27d3e6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5391,8 +5391,9 @@ static void mem_cgroup_clear_mc(void)
5391 5391
5392static int mem_cgroup_can_attach(struct cgroup_subsys *ss, 5392static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
5393 struct cgroup *cgroup, 5393 struct cgroup *cgroup,
5394 struct task_struct *p) 5394 struct cgroup_taskset *tset)
5395{ 5395{
5396 struct task_struct *p = cgroup_taskset_first(tset);
5396 int ret = 0; 5397 int ret = 0;
5397 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup); 5398 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
5398 5399
@@ -5430,7 +5431,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
5430 5431
5431static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, 5432static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
5432 struct cgroup *cgroup, 5433 struct cgroup *cgroup,
5433 struct task_struct *p) 5434 struct cgroup_taskset *tset)
5434{ 5435{
5435 mem_cgroup_clear_mc(); 5436 mem_cgroup_clear_mc();
5436} 5437}
@@ -5547,9 +5548,9 @@ retry:
5547 5548
5548static void mem_cgroup_move_task(struct cgroup_subsys *ss, 5549static void mem_cgroup_move_task(struct cgroup_subsys *ss,
5549 struct cgroup *cont, 5550 struct cgroup *cont,
5550 struct cgroup *old_cont, 5551 struct cgroup_taskset *tset)
5551 struct task_struct *p)
5552{ 5552{
5553 struct task_struct *p = cgroup_taskset_first(tset);
5553 struct mm_struct *mm = get_task_mm(p); 5554 struct mm_struct *mm = get_task_mm(p);
5554 5555
5555 if (mm) { 5556 if (mm) {
@@ -5564,19 +5565,18 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
5564#else /* !CONFIG_MMU */ 5565#else /* !CONFIG_MMU */
5565static int mem_cgroup_can_attach(struct cgroup_subsys *ss, 5566static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
5566 struct cgroup *cgroup, 5567 struct cgroup *cgroup,
5567 struct task_struct *p) 5568 struct cgroup_taskset *tset)
5568{ 5569{
5569 return 0; 5570 return 0;
5570} 5571}
5571static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, 5572static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
5572 struct cgroup *cgroup, 5573 struct cgroup *cgroup,
5573 struct task_struct *p) 5574 struct cgroup_taskset *tset)
5574{ 5575{
5575} 5576}
5576static void mem_cgroup_move_task(struct cgroup_subsys *ss, 5577static void mem_cgroup_move_task(struct cgroup_subsys *ss,
5577 struct cgroup *cont, 5578 struct cgroup *cont,
5578 struct cgroup *old_cont, 5579 struct cgroup_taskset *tset)
5579 struct task_struct *p)
5580{ 5580{
5581} 5581}
5582#endif 5582#endif
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 4450fbeec411..8b5b5d8612c6 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -62,11 +62,12 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
62struct cgroup_subsys devices_subsys; 62struct cgroup_subsys devices_subsys;
63 63
64static int devcgroup_can_attach(struct cgroup_subsys *ss, 64static int devcgroup_can_attach(struct cgroup_subsys *ss,
65 struct cgroup *new_cgroup, struct task_struct *task) 65 struct cgroup *new_cgrp, struct cgroup_taskset *set)
66{ 66{
67 if (current != task && !capable(CAP_SYS_ADMIN)) 67 struct task_struct *task = cgroup_taskset_first(set);
68 return -EPERM;
69 68
69 if (current != task && !capable(CAP_SYS_ADMIN))
70 return -EPERM;
70 return 0; 71 return 0;
71} 72}
72 73