diff options
-rw-r--r-- | Documentation/cgroups/cgroups.txt | 51 | ||||
-rw-r--r-- | block/blk-cgroup.c | 45 | ||||
-rw-r--r-- | include/linux/cgroup.h | 31 | ||||
-rw-r--r-- | include/linux/init_task.h | 9 | ||||
-rw-r--r-- | include/linux/sched.h | 73 | ||||
-rw-r--r-- | kernel/cgroup.c | 401 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 16 | ||||
-rw-r--r-- | kernel/cpuset.c | 105 | ||||
-rw-r--r-- | kernel/events/core.c | 13 | ||||
-rw-r--r-- | kernel/fork.c | 8 | ||||
-rw-r--r-- | kernel/res_counter.c | 3 | ||||
-rw-r--r-- | kernel/sched/core.c | 31 | ||||
-rw-r--r-- | kernel/signal.c | 10 | ||||
-rw-r--r-- | mm/memcontrol.c | 16 | ||||
-rw-r--r-- | security/device_cgroup.c | 7 |
15 files changed, 470 insertions, 349 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 9c452ef2328c..a7c96ae5557c 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -594,53 +594,44 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be | |||
594 | called multiple times against a cgroup. | 594 | called multiple times against a cgroup. |
595 | 595 | ||
596 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 596 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
597 | struct task_struct *task) | 597 | struct cgroup_taskset *tset) |
598 | (cgroup_mutex held by caller) | 598 | (cgroup_mutex held by caller) |
599 | 599 | ||
600 | Called prior to moving a task into a cgroup; if the subsystem | 600 | Called prior to moving one or more tasks into a cgroup; if the |
601 | returns an error, this will abort the attach operation. If a NULL | 601 | subsystem returns an error, this will abort the attach operation. |
602 | task is passed, then a successful result indicates that *any* | 602 | @tset contains the tasks to be attached and is guaranteed to have at |
603 | unspecified task can be moved into the cgroup. Note that this isn't | 603 | least one task in it. |
604 | called on a fork. If this method returns 0 (success) then this should | 604 | |
605 | remain valid while the caller holds cgroup_mutex and it is ensured that either | 605 | If there are multiple tasks in the taskset, then: |
606 | - it's guaranteed that all are from the same thread group | ||
607 | - @tset contains all tasks from the thread group whether or not | ||
608 | they're switching cgroups | ||
609 | - the first task is the leader | ||
610 | |||
611 | Each @tset entry also contains the task's old cgroup and tasks which | ||
612 | aren't switching cgroup can be skipped easily using the | ||
613 | cgroup_taskset_for_each() iterator. Note that this isn't called on a | ||
614 | fork. If this method returns 0 (success) then this should remain valid | ||
615 | while the caller holds cgroup_mutex and it is ensured that either | ||
606 | attach() or cancel_attach() will be called in future. | 616 | attach() or cancel_attach() will be called in future. |
607 | 617 | ||
608 | int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk); | ||
609 | (cgroup_mutex held by caller) | ||
610 | |||
611 | As can_attach, but for operations that must be run once per task to be | ||
612 | attached (possibly many when using cgroup_attach_proc). Called after | ||
613 | can_attach. | ||
614 | |||
615 | void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 618 | void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
616 | struct task_struct *task, bool threadgroup) | 619 | struct cgroup_taskset *tset) |
617 | (cgroup_mutex held by caller) | 620 | (cgroup_mutex held by caller) |
618 | 621 | ||
619 | Called when a task attach operation has failed after can_attach() has succeeded. | 622 | Called when a task attach operation has failed after can_attach() has succeeded. |
620 | A subsystem whose can_attach() has some side-effects should provide this | 623 | A subsystem whose can_attach() has some side-effects should provide this |
621 | function, so that the subsystem can implement a rollback. If not, not necessary. | 624 | function, so that the subsystem can implement a rollback. If not, not necessary. |
622 | This will be called only about subsystems whose can_attach() operation have | 625 | This will be called only about subsystems whose can_attach() operation have |
623 | succeeded. | 626 | succeeded. The parameters are identical to can_attach(). |
624 | |||
625 | void pre_attach(struct cgroup *cgrp); | ||
626 | (cgroup_mutex held by caller) | ||
627 | |||
628 | For any non-per-thread attachment work that needs to happen before | ||
629 | attach_task. Needed by cpuset. | ||
630 | 627 | ||
631 | void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 628 | void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
632 | struct cgroup *old_cgrp, struct task_struct *task) | 629 | struct cgroup_taskset *tset) |
633 | (cgroup_mutex held by caller) | 630 | (cgroup_mutex held by caller) |
634 | 631 | ||
635 | Called after the task has been attached to the cgroup, to allow any | 632 | Called after the task has been attached to the cgroup, to allow any |
636 | post-attachment activity that requires memory allocations or blocking. | 633 | post-attachment activity that requires memory allocations or blocking. |
637 | 634 | The parameters are identical to can_attach(). | |
638 | void attach_task(struct cgroup *cgrp, struct task_struct *tsk); | ||
639 | (cgroup_mutex held by caller) | ||
640 | |||
641 | As attach, but for operations that must be run once per task to be attached, | ||
642 | like can_attach_task. Called before attach. Currently does not support any | ||
643 | subsystem that might need the old_cgrp for every thread in the group. | ||
644 | 635 | ||
645 | void fork(struct cgroup_subsy *ss, struct task_struct *task) | 636 | void fork(struct cgroup_subsy *ss, struct task_struct *task) |
646 | 637 | ||
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8f630cec906e..b8c143d68ee0 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -30,8 +30,10 @@ EXPORT_SYMBOL_GPL(blkio_root_cgroup); | |||
30 | 30 | ||
31 | static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, | 31 | static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, |
32 | struct cgroup *); | 32 | struct cgroup *); |
33 | static int blkiocg_can_attach_task(struct cgroup *, struct task_struct *); | 33 | static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, |
34 | static void blkiocg_attach_task(struct cgroup *, struct task_struct *); | 34 | struct cgroup_taskset *); |
35 | static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, | ||
36 | struct cgroup_taskset *); | ||
35 | static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); | 37 | static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); |
36 | static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); | 38 | static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); |
37 | 39 | ||
@@ -44,8 +46,8 @@ static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); | |||
44 | struct cgroup_subsys blkio_subsys = { | 46 | struct cgroup_subsys blkio_subsys = { |
45 | .name = "blkio", | 47 | .name = "blkio", |
46 | .create = blkiocg_create, | 48 | .create = blkiocg_create, |
47 | .can_attach_task = blkiocg_can_attach_task, | 49 | .can_attach = blkiocg_can_attach, |
48 | .attach_task = blkiocg_attach_task, | 50 | .attach = blkiocg_attach, |
49 | .destroy = blkiocg_destroy, | 51 | .destroy = blkiocg_destroy, |
50 | .populate = blkiocg_populate, | 52 | .populate = blkiocg_populate, |
51 | #ifdef CONFIG_BLK_CGROUP | 53 | #ifdef CONFIG_BLK_CGROUP |
@@ -1626,30 +1628,39 @@ done: | |||
1626 | * of the main cic data structures. For now we allow a task to change | 1628 | * of the main cic data structures. For now we allow a task to change |
1627 | * its cgroup only if it's the only owner of its ioc. | 1629 | * its cgroup only if it's the only owner of its ioc. |
1628 | */ | 1630 | */ |
1629 | static int blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1631 | static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
1632 | struct cgroup_taskset *tset) | ||
1630 | { | 1633 | { |
1634 | struct task_struct *task; | ||
1631 | struct io_context *ioc; | 1635 | struct io_context *ioc; |
1632 | int ret = 0; | 1636 | int ret = 0; |
1633 | 1637 | ||
1634 | /* task_lock() is needed to avoid races with exit_io_context() */ | 1638 | /* task_lock() is needed to avoid races with exit_io_context() */ |
1635 | task_lock(tsk); | 1639 | cgroup_taskset_for_each(task, cgrp, tset) { |
1636 | ioc = tsk->io_context; | 1640 | task_lock(task); |
1637 | if (ioc && atomic_read(&ioc->nr_tasks) > 1) | 1641 | ioc = task->io_context; |
1638 | ret = -EINVAL; | 1642 | if (ioc && atomic_read(&ioc->nr_tasks) > 1) |
1639 | task_unlock(tsk); | 1643 | ret = -EINVAL; |
1640 | 1644 | task_unlock(task); | |
1645 | if (ret) | ||
1646 | break; | ||
1647 | } | ||
1641 | return ret; | 1648 | return ret; |
1642 | } | 1649 | } |
1643 | 1650 | ||
1644 | static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1651 | static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
1652 | struct cgroup_taskset *tset) | ||
1645 | { | 1653 | { |
1654 | struct task_struct *task; | ||
1646 | struct io_context *ioc; | 1655 | struct io_context *ioc; |
1647 | 1656 | ||
1648 | task_lock(tsk); | 1657 | cgroup_taskset_for_each(task, cgrp, tset) { |
1649 | ioc = tsk->io_context; | 1658 | task_lock(task); |
1650 | if (ioc) | 1659 | ioc = task->io_context; |
1651 | ioc->cgroup_changed = 1; | 1660 | if (ioc) |
1652 | task_unlock(tsk); | 1661 | ioc->cgroup_changed = 1; |
1662 | task_unlock(task); | ||
1663 | } | ||
1653 | } | 1664 | } |
1654 | 1665 | ||
1655 | void blkio_policy_register(struct blkio_policy_type *blkiop) | 1666 | void blkio_policy_register(struct blkio_policy_type *blkiop) |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a17becc36ca1..e9b602151caf 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -457,6 +457,28 @@ void cgroup_exclude_rmdir(struct cgroup_subsys_state *css); | |||
457 | void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); | 457 | void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); |
458 | 458 | ||
459 | /* | 459 | /* |
460 | * Control Group taskset, used to pass around set of tasks to cgroup_subsys | ||
461 | * methods. | ||
462 | */ | ||
463 | struct cgroup_taskset; | ||
464 | struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); | ||
465 | struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); | ||
466 | struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset); | ||
467 | int cgroup_taskset_size(struct cgroup_taskset *tset); | ||
468 | |||
469 | /** | ||
470 | * cgroup_taskset_for_each - iterate cgroup_taskset | ||
471 | * @task: the loop cursor | ||
472 | * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all | ||
473 | * @tset: taskset to iterate | ||
474 | */ | ||
475 | #define cgroup_taskset_for_each(task, skip_cgrp, tset) \ | ||
476 | for ((task) = cgroup_taskset_first((tset)); (task); \ | ||
477 | (task) = cgroup_taskset_next((tset))) \ | ||
478 | if (!(skip_cgrp) || \ | ||
479 | cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp)) | ||
480 | |||
481 | /* | ||
460 | * Control Group subsystem type. | 482 | * Control Group subsystem type. |
461 | * See Documentation/cgroups/cgroups.txt for details | 483 | * See Documentation/cgroups/cgroups.txt for details |
462 | */ | 484 | */ |
@@ -467,14 +489,11 @@ struct cgroup_subsys { | |||
467 | int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); | 489 | int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
468 | void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); | 490 | void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
469 | int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, | 491 | int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, |
470 | struct task_struct *tsk); | 492 | struct cgroup_taskset *tset); |
471 | int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk); | ||
472 | void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, | 493 | void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, |
473 | struct task_struct *tsk); | 494 | struct cgroup_taskset *tset); |
474 | void (*pre_attach)(struct cgroup *cgrp); | ||
475 | void (*attach_task)(struct cgroup *cgrp, struct task_struct *tsk); | ||
476 | void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, | 495 | void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, |
477 | struct cgroup *old_cgrp, struct task_struct *tsk); | 496 | struct cgroup_taskset *tset); |
478 | void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); | 497 | void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); |
479 | void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp, | 498 | void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp, |
480 | struct cgroup *old_cgrp, struct task_struct *task); | 499 | struct cgroup *old_cgrp, struct task_struct *task); |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 32574eef9394..9c66b1ada9d7 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -23,11 +23,10 @@ extern struct files_struct init_files; | |||
23 | extern struct fs_struct init_fs; | 23 | extern struct fs_struct init_fs; |
24 | 24 | ||
25 | #ifdef CONFIG_CGROUPS | 25 | #ifdef CONFIG_CGROUPS |
26 | #define INIT_THREADGROUP_FORK_LOCK(sig) \ | 26 | #define INIT_GROUP_RWSEM(sig) \ |
27 | .threadgroup_fork_lock = \ | 27 | .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), |
28 | __RWSEM_INITIALIZER(sig.threadgroup_fork_lock), | ||
29 | #else | 28 | #else |
30 | #define INIT_THREADGROUP_FORK_LOCK(sig) | 29 | #define INIT_GROUP_RWSEM(sig) |
31 | #endif | 30 | #endif |
32 | 31 | ||
33 | #define INIT_SIGNALS(sig) { \ | 32 | #define INIT_SIGNALS(sig) { \ |
@@ -46,7 +45,7 @@ extern struct fs_struct init_fs; | |||
46 | }, \ | 45 | }, \ |
47 | .cred_guard_mutex = \ | 46 | .cred_guard_mutex = \ |
48 | __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ | 47 | __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ |
49 | INIT_THREADGROUP_FORK_LOCK(sig) \ | 48 | INIT_GROUP_RWSEM(sig) \ |
50 | } | 49 | } |
51 | 50 | ||
52 | extern struct nsproxy init_nsproxy; | 51 | extern struct nsproxy init_nsproxy; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index ad93e1ec8c65..f044f66018f2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -637,13 +637,15 @@ struct signal_struct { | |||
637 | #endif | 637 | #endif |
638 | #ifdef CONFIG_CGROUPS | 638 | #ifdef CONFIG_CGROUPS |
639 | /* | 639 | /* |
640 | * The threadgroup_fork_lock prevents threads from forking with | 640 | * group_rwsem prevents new tasks from entering the threadgroup and |
641 | * CLONE_THREAD while held for writing. Use this for fork-sensitive | 641 | * member tasks from exiting,a more specifically, setting of |
642 | * threadgroup-wide operations. It's taken for reading in fork.c in | 642 | * PF_EXITING. fork and exit paths are protected with this rwsem |
643 | * copy_process(). | 643 | * using threadgroup_change_begin/end(). Users which require |
644 | * Currently only needed write-side by cgroups. | 644 | * threadgroup to remain stable should use threadgroup_[un]lock() |
645 | * which also takes care of exec path. Currently, cgroup is the | ||
646 | * only user. | ||
645 | */ | 647 | */ |
646 | struct rw_semaphore threadgroup_fork_lock; | 648 | struct rw_semaphore group_rwsem; |
647 | #endif | 649 | #endif |
648 | 650 | ||
649 | int oom_adj; /* OOM kill score adjustment (bit shift) */ | 651 | int oom_adj; /* OOM kill score adjustment (bit shift) */ |
@@ -2394,29 +2396,62 @@ static inline void unlock_task_sighand(struct task_struct *tsk, | |||
2394 | spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); | 2396 | spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); |
2395 | } | 2397 | } |
2396 | 2398 | ||
2397 | /* See the declaration of threadgroup_fork_lock in signal_struct. */ | ||
2398 | #ifdef CONFIG_CGROUPS | 2399 | #ifdef CONFIG_CGROUPS |
2399 | static inline void threadgroup_fork_read_lock(struct task_struct *tsk) | 2400 | static inline void threadgroup_change_begin(struct task_struct *tsk) |
2400 | { | 2401 | { |
2401 | down_read(&tsk->signal->threadgroup_fork_lock); | 2402 | down_read(&tsk->signal->group_rwsem); |
2402 | } | 2403 | } |
2403 | static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) | 2404 | static inline void threadgroup_change_end(struct task_struct *tsk) |
2404 | { | 2405 | { |
2405 | up_read(&tsk->signal->threadgroup_fork_lock); | 2406 | up_read(&tsk->signal->group_rwsem); |
2406 | } | 2407 | } |
2407 | static inline void threadgroup_fork_write_lock(struct task_struct *tsk) | 2408 | |
2409 | /** | ||
2410 | * threadgroup_lock - lock threadgroup | ||
2411 | * @tsk: member task of the threadgroup to lock | ||
2412 | * | ||
2413 | * Lock the threadgroup @tsk belongs to. No new task is allowed to enter | ||
2414 | * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or | ||
2415 | * perform exec. This is useful for cases where the threadgroup needs to | ||
2416 | * stay stable across blockable operations. | ||
2417 | * | ||
2418 | * fork and exit paths explicitly call threadgroup_change_{begin|end}() for | ||
2419 | * synchronization. While held, no new task will be added to threadgroup | ||
2420 | * and no existing live task will have its PF_EXITING set. | ||
2421 | * | ||
2422 | * During exec, a task goes and puts its thread group through unusual | ||
2423 | * changes. After de-threading, exclusive access is assumed to resources | ||
2424 | * which are usually shared by tasks in the same group - e.g. sighand may | ||
2425 | * be replaced with a new one. Also, the exec'ing task takes over group | ||
2426 | * leader role including its pid. Exclude these changes while locked by | ||
2427 | * grabbing cred_guard_mutex which is used to synchronize exec path. | ||
2428 | */ | ||
2429 | static inline void threadgroup_lock(struct task_struct *tsk) | ||
2408 | { | 2430 | { |
2409 | down_write(&tsk->signal->threadgroup_fork_lock); | 2431 | /* |
2432 | * exec uses exit for de-threading nesting group_rwsem inside | ||
2433 | * cred_guard_mutex. Grab cred_guard_mutex first. | ||
2434 | */ | ||
2435 | mutex_lock(&tsk->signal->cred_guard_mutex); | ||
2436 | down_write(&tsk->signal->group_rwsem); | ||
2410 | } | 2437 | } |
2411 | static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) | 2438 | |
2439 | /** | ||
2440 | * threadgroup_unlock - unlock threadgroup | ||
2441 | * @tsk: member task of the threadgroup to unlock | ||
2442 | * | ||
2443 | * Reverse threadgroup_lock(). | ||
2444 | */ | ||
2445 | static inline void threadgroup_unlock(struct task_struct *tsk) | ||
2412 | { | 2446 | { |
2413 | up_write(&tsk->signal->threadgroup_fork_lock); | 2447 | up_write(&tsk->signal->group_rwsem); |
2448 | mutex_unlock(&tsk->signal->cred_guard_mutex); | ||
2414 | } | 2449 | } |
2415 | #else | 2450 | #else |
2416 | static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {} | 2451 | static inline void threadgroup_change_begin(struct task_struct *tsk) {} |
2417 | static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {} | 2452 | static inline void threadgroup_change_end(struct task_struct *tsk) {} |
2418 | static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {} | 2453 | static inline void threadgroup_lock(struct task_struct *tsk) {} |
2419 | static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {} | 2454 | static inline void threadgroup_unlock(struct task_struct *tsk) {} |
2420 | #endif | 2455 | #endif |
2421 | 2456 | ||
2422 | #ifndef __HAVE_THREAD_FUNCTIONS | 2457 | #ifndef __HAVE_THREAD_FUNCTIONS |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7cab65f83f1d..a5d3b5325f77 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -63,7 +63,24 @@ | |||
63 | 63 | ||
64 | #include <linux/atomic.h> | 64 | #include <linux/atomic.h> |
65 | 65 | ||
66 | /* | ||
67 | * cgroup_mutex is the master lock. Any modification to cgroup or its | ||
68 | * hierarchy must be performed while holding it. | ||
69 | * | ||
70 | * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify | ||
71 | * cgroupfs_root of any cgroup hierarchy - subsys list, flags, | ||
72 | * release_agent_path and so on. Modifying requires both cgroup_mutex and | ||
73 | * cgroup_root_mutex. Readers can acquire either of the two. This is to | ||
74 | * break the following locking order cycle. | ||
75 | * | ||
76 | * A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem | ||
77 | * B. namespace_sem -> cgroup_mutex | ||
78 | * | ||
79 | * B happens only through cgroup_show_options() and using cgroup_root_mutex | ||
80 | * breaks it. | ||
81 | */ | ||
66 | static DEFINE_MUTEX(cgroup_mutex); | 82 | static DEFINE_MUTEX(cgroup_mutex); |
83 | static DEFINE_MUTEX(cgroup_root_mutex); | ||
67 | 84 | ||
68 | /* | 85 | /* |
69 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 86 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
@@ -921,7 +938,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
921 | * | 938 | * |
922 | * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; | 939 | * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; |
923 | */ | 940 | */ |
924 | DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); | 941 | static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); |
925 | 942 | ||
926 | static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) | 943 | static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) |
927 | { | 944 | { |
@@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
953 | int i; | 970 | int i; |
954 | 971 | ||
955 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 972 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
973 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); | ||
956 | 974 | ||
957 | removed_bits = root->actual_subsys_bits & ~final_bits; | 975 | removed_bits = root->actual_subsys_bits & ~final_bits; |
958 | added_bits = final_bits & ~root->actual_subsys_bits; | 976 | added_bits = final_bits & ~root->actual_subsys_bits; |
@@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1043 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; | 1061 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; |
1044 | struct cgroup_subsys *ss; | 1062 | struct cgroup_subsys *ss; |
1045 | 1063 | ||
1046 | mutex_lock(&cgroup_mutex); | 1064 | mutex_lock(&cgroup_root_mutex); |
1047 | for_each_subsys(root, ss) | 1065 | for_each_subsys(root, ss) |
1048 | seq_printf(seq, ",%s", ss->name); | 1066 | seq_printf(seq, ",%s", ss->name); |
1049 | if (test_bit(ROOT_NOPREFIX, &root->flags)) | 1067 | if (test_bit(ROOT_NOPREFIX, &root->flags)) |
@@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1054 | seq_puts(seq, ",clone_children"); | 1072 | seq_puts(seq, ",clone_children"); |
1055 | if (strlen(root->name)) | 1073 | if (strlen(root->name)) |
1056 | seq_printf(seq, ",name=%s", root->name); | 1074 | seq_printf(seq, ",name=%s", root->name); |
1057 | mutex_unlock(&cgroup_mutex); | 1075 | mutex_unlock(&cgroup_root_mutex); |
1058 | return 0; | 1076 | return 0; |
1059 | } | 1077 | } |
1060 | 1078 | ||
@@ -1175,10 +1193,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1175 | 1193 | ||
1176 | /* | 1194 | /* |
1177 | * If the 'all' option was specified select all the subsystems, | 1195 | * If the 'all' option was specified select all the subsystems, |
1178 | * otherwise 'all, 'none' and a subsystem name options were not | 1196 | * otherwise if 'none', 'name=' and a subsystem name options |
1179 | * specified, let's default to 'all' | 1197 | * were not specified, let's default to 'all' |
1180 | */ | 1198 | */ |
1181 | if (all_ss || (!all_ss && !one_ss && !opts->none)) { | 1199 | if (all_ss || (!one_ss && !opts->none && !opts->name)) { |
1182 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1200 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
1183 | struct cgroup_subsys *ss = subsys[i]; | 1201 | struct cgroup_subsys *ss = subsys[i]; |
1184 | if (ss == NULL) | 1202 | if (ss == NULL) |
@@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1269 | 1287 | ||
1270 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | 1288 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); |
1271 | mutex_lock(&cgroup_mutex); | 1289 | mutex_lock(&cgroup_mutex); |
1290 | mutex_lock(&cgroup_root_mutex); | ||
1272 | 1291 | ||
1273 | /* See what subsystems are wanted */ | 1292 | /* See what subsystems are wanted */ |
1274 | ret = parse_cgroupfs_options(data, &opts); | 1293 | ret = parse_cgroupfs_options(data, &opts); |
@@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1297 | out_unlock: | 1316 | out_unlock: |
1298 | kfree(opts.release_agent); | 1317 | kfree(opts.release_agent); |
1299 | kfree(opts.name); | 1318 | kfree(opts.name); |
1319 | mutex_unlock(&cgroup_root_mutex); | ||
1300 | mutex_unlock(&cgroup_mutex); | 1320 | mutex_unlock(&cgroup_mutex); |
1301 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1321 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
1302 | return ret; | 1322 | return ret; |
@@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1481 | int ret = 0; | 1501 | int ret = 0; |
1482 | struct super_block *sb; | 1502 | struct super_block *sb; |
1483 | struct cgroupfs_root *new_root; | 1503 | struct cgroupfs_root *new_root; |
1504 | struct inode *inode; | ||
1484 | 1505 | ||
1485 | /* First find the desired set of subsystems */ | 1506 | /* First find the desired set of subsystems */ |
1486 | mutex_lock(&cgroup_mutex); | 1507 | mutex_lock(&cgroup_mutex); |
@@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1514 | /* We used the new root structure, so this is a new hierarchy */ | 1535 | /* We used the new root structure, so this is a new hierarchy */ |
1515 | struct list_head tmp_cg_links; | 1536 | struct list_head tmp_cg_links; |
1516 | struct cgroup *root_cgrp = &root->top_cgroup; | 1537 | struct cgroup *root_cgrp = &root->top_cgroup; |
1517 | struct inode *inode; | ||
1518 | struct cgroupfs_root *existing_root; | 1538 | struct cgroupfs_root *existing_root; |
1519 | const struct cred *cred; | 1539 | const struct cred *cred; |
1520 | int i; | 1540 | int i; |
@@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1528 | 1548 | ||
1529 | mutex_lock(&inode->i_mutex); | 1549 | mutex_lock(&inode->i_mutex); |
1530 | mutex_lock(&cgroup_mutex); | 1550 | mutex_lock(&cgroup_mutex); |
1551 | mutex_lock(&cgroup_root_mutex); | ||
1531 | 1552 | ||
1532 | if (strlen(root->name)) { | 1553 | /* Check for name clashes with existing mounts */ |
1533 | /* Check for name clashes with existing mounts */ | 1554 | ret = -EBUSY; |
1534 | for_each_active_root(existing_root) { | 1555 | if (strlen(root->name)) |
1535 | if (!strcmp(existing_root->name, root->name)) { | 1556 | for_each_active_root(existing_root) |
1536 | ret = -EBUSY; | 1557 | if (!strcmp(existing_root->name, root->name)) |
1537 | mutex_unlock(&cgroup_mutex); | 1558 | goto unlock_drop; |
1538 | mutex_unlock(&inode->i_mutex); | ||
1539 | goto drop_new_super; | ||
1540 | } | ||
1541 | } | ||
1542 | } | ||
1543 | 1559 | ||
1544 | /* | 1560 | /* |
1545 | * We're accessing css_set_count without locking | 1561 | * We're accessing css_set_count without locking |
@@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1549 | * have some link structures left over | 1565 | * have some link structures left over |
1550 | */ | 1566 | */ |
1551 | ret = allocate_cg_links(css_set_count, &tmp_cg_links); | 1567 | ret = allocate_cg_links(css_set_count, &tmp_cg_links); |
1552 | if (ret) { | 1568 | if (ret) |
1553 | mutex_unlock(&cgroup_mutex); | 1569 | goto unlock_drop; |
1554 | mutex_unlock(&inode->i_mutex); | ||
1555 | goto drop_new_super; | ||
1556 | } | ||
1557 | 1570 | ||
1558 | ret = rebind_subsystems(root, root->subsys_bits); | 1571 | ret = rebind_subsystems(root, root->subsys_bits); |
1559 | if (ret == -EBUSY) { | 1572 | if (ret == -EBUSY) { |
1560 | mutex_unlock(&cgroup_mutex); | ||
1561 | mutex_unlock(&inode->i_mutex); | ||
1562 | free_cg_links(&tmp_cg_links); | 1573 | free_cg_links(&tmp_cg_links); |
1563 | goto drop_new_super; | 1574 | goto unlock_drop; |
1564 | } | 1575 | } |
1565 | /* | 1576 | /* |
1566 | * There must be no failure case after here, since rebinding | 1577 | * There must be no failure case after here, since rebinding |
@@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1599 | cred = override_creds(&init_cred); | 1610 | cred = override_creds(&init_cred); |
1600 | cgroup_populate_dir(root_cgrp); | 1611 | cgroup_populate_dir(root_cgrp); |
1601 | revert_creds(cred); | 1612 | revert_creds(cred); |
1613 | mutex_unlock(&cgroup_root_mutex); | ||
1602 | mutex_unlock(&cgroup_mutex); | 1614 | mutex_unlock(&cgroup_mutex); |
1603 | mutex_unlock(&inode->i_mutex); | 1615 | mutex_unlock(&inode->i_mutex); |
1604 | } else { | 1616 | } else { |
@@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1615 | kfree(opts.name); | 1627 | kfree(opts.name); |
1616 | return dget(sb->s_root); | 1628 | return dget(sb->s_root); |
1617 | 1629 | ||
1630 | unlock_drop: | ||
1631 | mutex_unlock(&cgroup_root_mutex); | ||
1632 | mutex_unlock(&cgroup_mutex); | ||
1633 | mutex_unlock(&inode->i_mutex); | ||
1618 | drop_new_super: | 1634 | drop_new_super: |
1619 | deactivate_locked_super(sb); | 1635 | deactivate_locked_super(sb); |
1620 | drop_modules: | 1636 | drop_modules: |
@@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1639 | BUG_ON(!list_empty(&cgrp->sibling)); | 1655 | BUG_ON(!list_empty(&cgrp->sibling)); |
1640 | 1656 | ||
1641 | mutex_lock(&cgroup_mutex); | 1657 | mutex_lock(&cgroup_mutex); |
1658 | mutex_lock(&cgroup_root_mutex); | ||
1642 | 1659 | ||
1643 | /* Rebind all subsystems back to the default hierarchy */ | 1660 | /* Rebind all subsystems back to the default hierarchy */ |
1644 | ret = rebind_subsystems(root, 0); | 1661 | ret = rebind_subsystems(root, 0); |
@@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1664 | root_count--; | 1681 | root_count--; |
1665 | } | 1682 | } |
1666 | 1683 | ||
1684 | mutex_unlock(&cgroup_root_mutex); | ||
1667 | mutex_unlock(&cgroup_mutex); | 1685 | mutex_unlock(&cgroup_mutex); |
1668 | 1686 | ||
1669 | kill_litter_super(sb); | 1687 | kill_litter_super(sb); |
@@ -1740,11 +1758,90 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1740 | EXPORT_SYMBOL_GPL(cgroup_path); | 1758 | EXPORT_SYMBOL_GPL(cgroup_path); |
1741 | 1759 | ||
1742 | /* | 1760 | /* |
1761 | * Control Group taskset | ||
1762 | */ | ||
1763 | struct task_and_cgroup { | ||
1764 | struct task_struct *task; | ||
1765 | struct cgroup *cgrp; | ||
1766 | }; | ||
1767 | |||
1768 | struct cgroup_taskset { | ||
1769 | struct task_and_cgroup single; | ||
1770 | struct flex_array *tc_array; | ||
1771 | int tc_array_len; | ||
1772 | int idx; | ||
1773 | struct cgroup *cur_cgrp; | ||
1774 | }; | ||
1775 | |||
1776 | /** | ||
1777 | * cgroup_taskset_first - reset taskset and return the first task | ||
1778 | * @tset: taskset of interest | ||
1779 | * | ||
1780 | * @tset iteration is initialized and the first task is returned. | ||
1781 | */ | ||
1782 | struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset) | ||
1783 | { | ||
1784 | if (tset->tc_array) { | ||
1785 | tset->idx = 0; | ||
1786 | return cgroup_taskset_next(tset); | ||
1787 | } else { | ||
1788 | tset->cur_cgrp = tset->single.cgrp; | ||
1789 | return tset->single.task; | ||
1790 | } | ||
1791 | } | ||
1792 | EXPORT_SYMBOL_GPL(cgroup_taskset_first); | ||
1793 | |||
1794 | /** | ||
1795 | * cgroup_taskset_next - iterate to the next task in taskset | ||
1796 | * @tset: taskset of interest | ||
1797 | * | ||
1798 | * Return the next task in @tset. Iteration must have been initialized | ||
1799 | * with cgroup_taskset_first(). | ||
1800 | */ | ||
1801 | struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) | ||
1802 | { | ||
1803 | struct task_and_cgroup *tc; | ||
1804 | |||
1805 | if (!tset->tc_array || tset->idx >= tset->tc_array_len) | ||
1806 | return NULL; | ||
1807 | |||
1808 | tc = flex_array_get(tset->tc_array, tset->idx++); | ||
1809 | tset->cur_cgrp = tc->cgrp; | ||
1810 | return tc->task; | ||
1811 | } | ||
1812 | EXPORT_SYMBOL_GPL(cgroup_taskset_next); | ||
1813 | |||
1814 | /** | ||
1815 | * cgroup_taskset_cur_cgroup - return the matching cgroup for the current task | ||
1816 | * @tset: taskset of interest | ||
1817 | * | ||
1818 | * Return the cgroup for the current (last returned) task of @tset. This | ||
1819 | * function must be preceded by either cgroup_taskset_first() or | ||
1820 | * cgroup_taskset_next(). | ||
1821 | */ | ||
1822 | struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset) | ||
1823 | { | ||
1824 | return tset->cur_cgrp; | ||
1825 | } | ||
1826 | EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup); | ||
1827 | |||
1828 | /** | ||
1829 | * cgroup_taskset_size - return the number of tasks in taskset | ||
1830 | * @tset: taskset of interest | ||
1831 | */ | ||
1832 | int cgroup_taskset_size(struct cgroup_taskset *tset) | ||
1833 | { | ||
1834 | return tset->tc_array ? tset->tc_array_len : 1; | ||
1835 | } | ||
1836 | EXPORT_SYMBOL_GPL(cgroup_taskset_size); | ||
1837 | |||
1838 | |||
1839 | /* | ||
1743 | * cgroup_task_migrate - move a task from one cgroup to another. | 1840 | * cgroup_task_migrate - move a task from one cgroup to another. |
1744 | * | 1841 | * |
1745 | * 'guarantee' is set if the caller promises that a new css_set for the task | 1842 | * 'guarantee' is set if the caller promises that a new css_set for the task |
1746 | * will already exist. If not set, this function might sleep, and can fail with | 1843 | * will already exist. If not set, this function might sleep, and can fail with |
1747 | * -ENOMEM. Otherwise, it can only fail with -ESRCH. | 1844 | * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked. |
1748 | */ | 1845 | */ |
1749 | static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | 1846 | static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, |
1750 | struct task_struct *tsk, bool guarantee) | 1847 | struct task_struct *tsk, bool guarantee) |
@@ -1753,14 +1850,12 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
1753 | struct css_set *newcg; | 1850 | struct css_set *newcg; |
1754 | 1851 | ||
1755 | /* | 1852 | /* |
1756 | * get old css_set. we need to take task_lock and refcount it, because | 1853 | * We are synchronized through threadgroup_lock() against PF_EXITING |
1757 | * an exiting task can change its css_set to init_css_set and drop its | 1854 | * setting such that we can't race against cgroup_exit() changing the |
1758 | * old one without taking cgroup_mutex. | 1855 | * css_set to init_css_set and dropping the old one. |
1759 | */ | 1856 | */ |
1760 | task_lock(tsk); | 1857 | WARN_ON_ONCE(tsk->flags & PF_EXITING); |
1761 | oldcg = tsk->cgroups; | 1858 | oldcg = tsk->cgroups; |
1762 | get_css_set(oldcg); | ||
1763 | task_unlock(tsk); | ||
1764 | 1859 | ||
1765 | /* locate or allocate a new css_set for this task. */ | 1860 | /* locate or allocate a new css_set for this task. */ |
1766 | if (guarantee) { | 1861 | if (guarantee) { |
@@ -1775,20 +1870,11 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
1775 | might_sleep(); | 1870 | might_sleep(); |
1776 | /* find_css_set will give us newcg already referenced. */ | 1871 | /* find_css_set will give us newcg already referenced. */ |
1777 | newcg = find_css_set(oldcg, cgrp); | 1872 | newcg = find_css_set(oldcg, cgrp); |
1778 | if (!newcg) { | 1873 | if (!newcg) |
1779 | put_css_set(oldcg); | ||
1780 | return -ENOMEM; | 1874 | return -ENOMEM; |
1781 | } | ||
1782 | } | 1875 | } |
1783 | put_css_set(oldcg); | ||
1784 | 1876 | ||
1785 | /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */ | ||
1786 | task_lock(tsk); | 1877 | task_lock(tsk); |
1787 | if (tsk->flags & PF_EXITING) { | ||
1788 | task_unlock(tsk); | ||
1789 | put_css_set(newcg); | ||
1790 | return -ESRCH; | ||
1791 | } | ||
1792 | rcu_assign_pointer(tsk->cgroups, newcg); | 1878 | rcu_assign_pointer(tsk->cgroups, newcg); |
1793 | task_unlock(tsk); | 1879 | task_unlock(tsk); |
1794 | 1880 | ||
@@ -1814,8 +1900,8 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
1814 | * @cgrp: the cgroup the task is attaching to | 1900 | * @cgrp: the cgroup the task is attaching to |
1815 | * @tsk: the task to be attached | 1901 | * @tsk: the task to be attached |
1816 | * | 1902 | * |
1817 | * Call holding cgroup_mutex. May take task_lock of | 1903 | * Call with cgroup_mutex and threadgroup locked. May take task_lock of |
1818 | * the task 'tsk' during call. | 1904 | * @tsk during call. |
1819 | */ | 1905 | */ |
1820 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1906 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
1821 | { | 1907 | { |
@@ -1823,15 +1909,23 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1823 | struct cgroup_subsys *ss, *failed_ss = NULL; | 1909 | struct cgroup_subsys *ss, *failed_ss = NULL; |
1824 | struct cgroup *oldcgrp; | 1910 | struct cgroup *oldcgrp; |
1825 | struct cgroupfs_root *root = cgrp->root; | 1911 | struct cgroupfs_root *root = cgrp->root; |
1912 | struct cgroup_taskset tset = { }; | ||
1913 | |||
1914 | /* @tsk either already exited or can't exit until the end */ | ||
1915 | if (tsk->flags & PF_EXITING) | ||
1916 | return -ESRCH; | ||
1826 | 1917 | ||
1827 | /* Nothing to do if the task is already in that cgroup */ | 1918 | /* Nothing to do if the task is already in that cgroup */ |
1828 | oldcgrp = task_cgroup_from_root(tsk, root); | 1919 | oldcgrp = task_cgroup_from_root(tsk, root); |
1829 | if (cgrp == oldcgrp) | 1920 | if (cgrp == oldcgrp) |
1830 | return 0; | 1921 | return 0; |
1831 | 1922 | ||
1923 | tset.single.task = tsk; | ||
1924 | tset.single.cgrp = oldcgrp; | ||
1925 | |||
1832 | for_each_subsys(root, ss) { | 1926 | for_each_subsys(root, ss) { |
1833 | if (ss->can_attach) { | 1927 | if (ss->can_attach) { |
1834 | retval = ss->can_attach(ss, cgrp, tsk); | 1928 | retval = ss->can_attach(ss, cgrp, &tset); |
1835 | if (retval) { | 1929 | if (retval) { |
1836 | /* | 1930 | /* |
1837 | * Remember on which subsystem the can_attach() | 1931 | * Remember on which subsystem the can_attach() |
@@ -1843,13 +1937,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1843 | goto out; | 1937 | goto out; |
1844 | } | 1938 | } |
1845 | } | 1939 | } |
1846 | if (ss->can_attach_task) { | ||
1847 | retval = ss->can_attach_task(cgrp, tsk); | ||
1848 | if (retval) { | ||
1849 | failed_ss = ss; | ||
1850 | goto out; | ||
1851 | } | ||
1852 | } | ||
1853 | } | 1940 | } |
1854 | 1941 | ||
1855 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); | 1942 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); |
@@ -1857,12 +1944,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1857 | goto out; | 1944 | goto out; |
1858 | 1945 | ||
1859 | for_each_subsys(root, ss) { | 1946 | for_each_subsys(root, ss) { |
1860 | if (ss->pre_attach) | ||
1861 | ss->pre_attach(cgrp); | ||
1862 | if (ss->attach_task) | ||
1863 | ss->attach_task(cgrp, tsk); | ||
1864 | if (ss->attach) | 1947 | if (ss->attach) |
1865 | ss->attach(ss, cgrp, oldcgrp, tsk); | 1948 | ss->attach(ss, cgrp, &tset); |
1866 | } | 1949 | } |
1867 | 1950 | ||
1868 | synchronize_rcu(); | 1951 | synchronize_rcu(); |
@@ -1884,7 +1967,7 @@ out: | |||
1884 | */ | 1967 | */ |
1885 | break; | 1968 | break; |
1886 | if (ss->cancel_attach) | 1969 | if (ss->cancel_attach) |
1887 | ss->cancel_attach(ss, cgrp, tsk); | 1970 | ss->cancel_attach(ss, cgrp, &tset); |
1888 | } | 1971 | } |
1889 | } | 1972 | } |
1890 | return retval; | 1973 | return retval; |
@@ -1935,23 +2018,17 @@ static bool css_set_check_fetched(struct cgroup *cgrp, | |||
1935 | 2018 | ||
1936 | read_lock(&css_set_lock); | 2019 | read_lock(&css_set_lock); |
1937 | newcg = find_existing_css_set(cg, cgrp, template); | 2020 | newcg = find_existing_css_set(cg, cgrp, template); |
1938 | if (newcg) | ||
1939 | get_css_set(newcg); | ||
1940 | read_unlock(&css_set_lock); | 2021 | read_unlock(&css_set_lock); |
1941 | 2022 | ||
1942 | /* doesn't exist at all? */ | 2023 | /* doesn't exist at all? */ |
1943 | if (!newcg) | 2024 | if (!newcg) |
1944 | return false; | 2025 | return false; |
1945 | /* see if it's already in the list */ | 2026 | /* see if it's already in the list */ |
1946 | list_for_each_entry(cg_entry, newcg_list, links) { | 2027 | list_for_each_entry(cg_entry, newcg_list, links) |
1947 | if (cg_entry->cg == newcg) { | 2028 | if (cg_entry->cg == newcg) |
1948 | put_css_set(newcg); | ||
1949 | return true; | 2029 | return true; |
1950 | } | ||
1951 | } | ||
1952 | 2030 | ||
1953 | /* not found */ | 2031 | /* not found */ |
1954 | put_css_set(newcg); | ||
1955 | return false; | 2032 | return false; |
1956 | } | 2033 | } |
1957 | 2034 | ||
@@ -1985,21 +2062,21 @@ static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, | |||
1985 | * @cgrp: the cgroup to attach to | 2062 | * @cgrp: the cgroup to attach to |
1986 | * @leader: the threadgroup leader task_struct of the group to be attached | 2063 | * @leader: the threadgroup leader task_struct of the group to be attached |
1987 | * | 2064 | * |
1988 | * Call holding cgroup_mutex and the threadgroup_fork_lock of the leader. Will | 2065 | * Call holding cgroup_mutex and the group_rwsem of the leader. Will take |
1989 | * take task_lock of each thread in leader's threadgroup individually in turn. | 2066 | * task_lock of each thread in leader's threadgroup individually in turn. |
1990 | */ | 2067 | */ |
1991 | int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | 2068 | static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) |
1992 | { | 2069 | { |
1993 | int retval, i, group_size; | 2070 | int retval, i, group_size; |
1994 | struct cgroup_subsys *ss, *failed_ss = NULL; | 2071 | struct cgroup_subsys *ss, *failed_ss = NULL; |
1995 | bool cancel_failed_ss = false; | ||
1996 | /* guaranteed to be initialized later, but the compiler needs this */ | 2072 | /* guaranteed to be initialized later, but the compiler needs this */ |
1997 | struct cgroup *oldcgrp = NULL; | ||
1998 | struct css_set *oldcg; | 2073 | struct css_set *oldcg; |
1999 | struct cgroupfs_root *root = cgrp->root; | 2074 | struct cgroupfs_root *root = cgrp->root; |
2000 | /* threadgroup list cursor and array */ | 2075 | /* threadgroup list cursor and array */ |
2001 | struct task_struct *tsk; | 2076 | struct task_struct *tsk; |
2077 | struct task_and_cgroup *tc; | ||
2002 | struct flex_array *group; | 2078 | struct flex_array *group; |
2079 | struct cgroup_taskset tset = { }; | ||
2003 | /* | 2080 | /* |
2004 | * we need to make sure we have css_sets for all the tasks we're | 2081 | * we need to make sure we have css_sets for all the tasks we're |
2005 | * going to move -before- we actually start moving them, so that in | 2082 | * going to move -before- we actually start moving them, so that in |
@@ -2012,13 +2089,12 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2012 | * step 0: in order to do expensive, possibly blocking operations for | 2089 | * step 0: in order to do expensive, possibly blocking operations for |
2013 | * every thread, we cannot iterate the thread group list, since it needs | 2090 | * every thread, we cannot iterate the thread group list, since it needs |
2014 | * rcu or tasklist locked. instead, build an array of all threads in the | 2091 | * rcu or tasklist locked. instead, build an array of all threads in the |
2015 | * group - threadgroup_fork_lock prevents new threads from appearing, | 2092 | * group - group_rwsem prevents new threads from appearing, and if |
2016 | * and if threads exit, this will just be an over-estimate. | 2093 | * threads exit, this will just be an over-estimate. |
2017 | */ | 2094 | */ |
2018 | group_size = get_nr_threads(leader); | 2095 | group_size = get_nr_threads(leader); |
2019 | /* flex_array supports very large thread-groups better than kmalloc. */ | 2096 | /* flex_array supports very large thread-groups better than kmalloc. */ |
2020 | group = flex_array_alloc(sizeof(struct task_struct *), group_size, | 2097 | group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL); |
2021 | GFP_KERNEL); | ||
2022 | if (!group) | 2098 | if (!group) |
2023 | return -ENOMEM; | 2099 | return -ENOMEM; |
2024 | /* pre-allocate to guarantee space while iterating in rcu read-side. */ | 2100 | /* pre-allocate to guarantee space while iterating in rcu read-side. */ |
@@ -2040,49 +2116,53 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2040 | retval = -EAGAIN; | 2116 | retval = -EAGAIN; |
2041 | goto out_free_group_list; | 2117 | goto out_free_group_list; |
2042 | } | 2118 | } |
2043 | /* take a reference on each task in the group to go in the array. */ | 2119 | |
2044 | tsk = leader; | 2120 | tsk = leader; |
2045 | i = 0; | 2121 | i = 0; |
2046 | do { | 2122 | do { |
2123 | struct task_and_cgroup ent; | ||
2124 | |||
2125 | /* @tsk either already exited or can't exit until the end */ | ||
2126 | if (tsk->flags & PF_EXITING) | ||
2127 | continue; | ||
2128 | |||
2047 | /* as per above, nr_threads may decrease, but not increase. */ | 2129 | /* as per above, nr_threads may decrease, but not increase. */ |
2048 | BUG_ON(i >= group_size); | 2130 | BUG_ON(i >= group_size); |
2049 | get_task_struct(tsk); | ||
2050 | /* | 2131 | /* |
2051 | * saying GFP_ATOMIC has no effect here because we did prealloc | 2132 | * saying GFP_ATOMIC has no effect here because we did prealloc |
2052 | * earlier, but it's good form to communicate our expectations. | 2133 | * earlier, but it's good form to communicate our expectations. |
2053 | */ | 2134 | */ |
2054 | retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC); | 2135 | ent.task = tsk; |
2136 | ent.cgrp = task_cgroup_from_root(tsk, root); | ||
2137 | /* nothing to do if this task is already in the cgroup */ | ||
2138 | if (ent.cgrp == cgrp) | ||
2139 | continue; | ||
2140 | retval = flex_array_put(group, i, &ent, GFP_ATOMIC); | ||
2055 | BUG_ON(retval != 0); | 2141 | BUG_ON(retval != 0); |
2056 | i++; | 2142 | i++; |
2057 | } while_each_thread(leader, tsk); | 2143 | } while_each_thread(leader, tsk); |
2058 | /* remember the number of threads in the array for later. */ | 2144 | /* remember the number of threads in the array for later. */ |
2059 | group_size = i; | 2145 | group_size = i; |
2146 | tset.tc_array = group; | ||
2147 | tset.tc_array_len = group_size; | ||
2060 | read_unlock(&tasklist_lock); | 2148 | read_unlock(&tasklist_lock); |
2061 | 2149 | ||
2150 | /* methods shouldn't be called if no task is actually migrating */ | ||
2151 | retval = 0; | ||
2152 | if (!group_size) | ||
2153 | goto out_free_group_list; | ||
2154 | |||
2062 | /* | 2155 | /* |
2063 | * step 1: check that we can legitimately attach to the cgroup. | 2156 | * step 1: check that we can legitimately attach to the cgroup. |
2064 | */ | 2157 | */ |
2065 | for_each_subsys(root, ss) { | 2158 | for_each_subsys(root, ss) { |
2066 | if (ss->can_attach) { | 2159 | if (ss->can_attach) { |
2067 | retval = ss->can_attach(ss, cgrp, leader); | 2160 | retval = ss->can_attach(ss, cgrp, &tset); |
2068 | if (retval) { | 2161 | if (retval) { |
2069 | failed_ss = ss; | 2162 | failed_ss = ss; |
2070 | goto out_cancel_attach; | 2163 | goto out_cancel_attach; |
2071 | } | 2164 | } |
2072 | } | 2165 | } |
2073 | /* a callback to be run on every thread in the threadgroup. */ | ||
2074 | if (ss->can_attach_task) { | ||
2075 | /* run on each task in the threadgroup. */ | ||
2076 | for (i = 0; i < group_size; i++) { | ||
2077 | tsk = flex_array_get_ptr(group, i); | ||
2078 | retval = ss->can_attach_task(cgrp, tsk); | ||
2079 | if (retval) { | ||
2080 | failed_ss = ss; | ||
2081 | cancel_failed_ss = true; | ||
2082 | goto out_cancel_attach; | ||
2083 | } | ||
2084 | } | ||
2085 | } | ||
2086 | } | 2166 | } |
2087 | 2167 | ||
2088 | /* | 2168 | /* |
@@ -2091,67 +2171,36 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2091 | */ | 2171 | */ |
2092 | INIT_LIST_HEAD(&newcg_list); | 2172 | INIT_LIST_HEAD(&newcg_list); |
2093 | for (i = 0; i < group_size; i++) { | 2173 | for (i = 0; i < group_size; i++) { |
2094 | tsk = flex_array_get_ptr(group, i); | 2174 | tc = flex_array_get(group, i); |
2095 | /* nothing to do if this task is already in the cgroup */ | 2175 | oldcg = tc->task->cgroups; |
2096 | oldcgrp = task_cgroup_from_root(tsk, root); | 2176 | |
2097 | if (cgrp == oldcgrp) | 2177 | /* if we don't already have it in the list get a new one */ |
2098 | continue; | 2178 | if (!css_set_check_fetched(cgrp, tc->task, oldcg, |
2099 | /* get old css_set pointer */ | 2179 | &newcg_list)) { |
2100 | task_lock(tsk); | ||
2101 | oldcg = tsk->cgroups; | ||
2102 | get_css_set(oldcg); | ||
2103 | task_unlock(tsk); | ||
2104 | /* see if the new one for us is already in the list? */ | ||
2105 | if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) { | ||
2106 | /* was already there, nothing to do. */ | ||
2107 | put_css_set(oldcg); | ||
2108 | } else { | ||
2109 | /* we don't already have it. get new one. */ | ||
2110 | retval = css_set_prefetch(cgrp, oldcg, &newcg_list); | 2180 | retval = css_set_prefetch(cgrp, oldcg, &newcg_list); |
2111 | put_css_set(oldcg); | ||
2112 | if (retval) | 2181 | if (retval) |
2113 | goto out_list_teardown; | 2182 | goto out_list_teardown; |
2114 | } | 2183 | } |
2115 | } | 2184 | } |
2116 | 2185 | ||
2117 | /* | 2186 | /* |
2118 | * step 3: now that we're guaranteed success wrt the css_sets, proceed | 2187 | * step 3: now that we're guaranteed success wrt the css_sets, |
2119 | * to move all tasks to the new cgroup, calling ss->attach_task for each | 2188 | * proceed to move all tasks to the new cgroup. There are no |
2120 | * one along the way. there are no failure cases after here, so this is | 2189 | * failure cases after here, so this is the commit point. |
2121 | * the commit point. | ||
2122 | */ | 2190 | */ |
2123 | for_each_subsys(root, ss) { | ||
2124 | if (ss->pre_attach) | ||
2125 | ss->pre_attach(cgrp); | ||
2126 | } | ||
2127 | for (i = 0; i < group_size; i++) { | 2191 | for (i = 0; i < group_size; i++) { |
2128 | tsk = flex_array_get_ptr(group, i); | 2192 | tc = flex_array_get(group, i); |
2129 | /* leave current thread as it is if it's already there */ | 2193 | retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true); |
2130 | oldcgrp = task_cgroup_from_root(tsk, root); | 2194 | BUG_ON(retval); |
2131 | if (cgrp == oldcgrp) | ||
2132 | continue; | ||
2133 | /* if the thread is PF_EXITING, it can just get skipped. */ | ||
2134 | retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); | ||
2135 | if (retval == 0) { | ||
2136 | /* attach each task to each subsystem */ | ||
2137 | for_each_subsys(root, ss) { | ||
2138 | if (ss->attach_task) | ||
2139 | ss->attach_task(cgrp, tsk); | ||
2140 | } | ||
2141 | } else { | ||
2142 | BUG_ON(retval != -ESRCH); | ||
2143 | } | ||
2144 | } | 2195 | } |
2145 | /* nothing is sensitive to fork() after this point. */ | 2196 | /* nothing is sensitive to fork() after this point. */ |
2146 | 2197 | ||
2147 | /* | 2198 | /* |
2148 | * step 4: do expensive, non-thread-specific subsystem callbacks. | 2199 | * step 4: do subsystem attach callbacks. |
2149 | * TODO: if ever a subsystem needs to know the oldcgrp for each task | ||
2150 | * being moved, this call will need to be reworked to communicate that. | ||
2151 | */ | 2200 | */ |
2152 | for_each_subsys(root, ss) { | 2201 | for_each_subsys(root, ss) { |
2153 | if (ss->attach) | 2202 | if (ss->attach) |
2154 | ss->attach(ss, cgrp, oldcgrp, leader); | 2203 | ss->attach(ss, cgrp, &tset); |
2155 | } | 2204 | } |
2156 | 2205 | ||
2157 | /* | 2206 | /* |
@@ -2171,20 +2220,12 @@ out_cancel_attach: | |||
2171 | /* same deal as in cgroup_attach_task */ | 2220 | /* same deal as in cgroup_attach_task */ |
2172 | if (retval) { | 2221 | if (retval) { |
2173 | for_each_subsys(root, ss) { | 2222 | for_each_subsys(root, ss) { |
2174 | if (ss == failed_ss) { | 2223 | if (ss == failed_ss) |
2175 | if (cancel_failed_ss && ss->cancel_attach) | ||
2176 | ss->cancel_attach(ss, cgrp, leader); | ||
2177 | break; | 2224 | break; |
2178 | } | ||
2179 | if (ss->cancel_attach) | 2225 | if (ss->cancel_attach) |
2180 | ss->cancel_attach(ss, cgrp, leader); | 2226 | ss->cancel_attach(ss, cgrp, &tset); |
2181 | } | 2227 | } |
2182 | } | 2228 | } |
2183 | /* clean up the array of referenced threads in the group. */ | ||
2184 | for (i = 0; i < group_size; i++) { | ||
2185 | tsk = flex_array_get_ptr(group, i); | ||
2186 | put_task_struct(tsk); | ||
2187 | } | ||
2188 | out_free_group_list: | 2229 | out_free_group_list: |
2189 | flex_array_free(group); | 2230 | flex_array_free(group); |
2190 | return retval; | 2231 | return retval; |
@@ -2192,8 +2233,8 @@ out_free_group_list: | |||
2192 | 2233 | ||
2193 | /* | 2234 | /* |
2194 | * Find the task_struct of the task to attach by vpid and pass it along to the | 2235 | * Find the task_struct of the task to attach by vpid and pass it along to the |
2195 | * function to attach either it or all tasks in its threadgroup. Will take | 2236 | * function to attach either it or all tasks in its threadgroup. Will lock |
2196 | * cgroup_mutex; may take task_lock of task. | 2237 | * cgroup_mutex and threadgroup; may take task_lock of task. |
2197 | */ | 2238 | */ |
2198 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) | 2239 | static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) |
2199 | { | 2240 | { |
@@ -2220,13 +2261,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) | |||
2220 | * detect it later. | 2261 | * detect it later. |
2221 | */ | 2262 | */ |
2222 | tsk = tsk->group_leader; | 2263 | tsk = tsk->group_leader; |
2223 | } else if (tsk->flags & PF_EXITING) { | ||
2224 | /* optimization for the single-task-only case */ | ||
2225 | rcu_read_unlock(); | ||
2226 | cgroup_unlock(); | ||
2227 | return -ESRCH; | ||
2228 | } | 2264 | } |
2229 | |||
2230 | /* | 2265 | /* |
2231 | * even if we're attaching all tasks in the thread group, we | 2266 | * even if we're attaching all tasks in the thread group, we |
2232 | * only need to check permissions on one of them. | 2267 | * only need to check permissions on one of them. |
@@ -2249,13 +2284,15 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) | |||
2249 | get_task_struct(tsk); | 2284 | get_task_struct(tsk); |
2250 | } | 2285 | } |
2251 | 2286 | ||
2252 | if (threadgroup) { | 2287 | threadgroup_lock(tsk); |
2253 | threadgroup_fork_write_lock(tsk); | 2288 | |
2289 | if (threadgroup) | ||
2254 | ret = cgroup_attach_proc(cgrp, tsk); | 2290 | ret = cgroup_attach_proc(cgrp, tsk); |
2255 | threadgroup_fork_write_unlock(tsk); | 2291 | else |
2256 | } else { | ||
2257 | ret = cgroup_attach_task(cgrp, tsk); | 2292 | ret = cgroup_attach_task(cgrp, tsk); |
2258 | } | 2293 | |
2294 | threadgroup_unlock(tsk); | ||
2295 | |||
2259 | put_task_struct(tsk); | 2296 | put_task_struct(tsk); |
2260 | cgroup_unlock(); | 2297 | cgroup_unlock(); |
2261 | return ret; | 2298 | return ret; |
@@ -2306,7 +2343,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | |||
2306 | return -EINVAL; | 2343 | return -EINVAL; |
2307 | if (!cgroup_lock_live_group(cgrp)) | 2344 | if (!cgroup_lock_live_group(cgrp)) |
2308 | return -ENODEV; | 2345 | return -ENODEV; |
2346 | mutex_lock(&cgroup_root_mutex); | ||
2309 | strcpy(cgrp->root->release_agent_path, buffer); | 2347 | strcpy(cgrp->root->release_agent_path, buffer); |
2348 | mutex_unlock(&cgroup_root_mutex); | ||
2310 | cgroup_unlock(); | 2349 | cgroup_unlock(); |
2311 | return 0; | 2350 | return 0; |
2312 | } | 2351 | } |
@@ -2789,6 +2828,7 @@ static void cgroup_enable_task_cg_lists(void) | |||
2789 | } | 2828 | } |
2790 | 2829 | ||
2791 | void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) | 2830 | void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) |
2831 | __acquires(css_set_lock) | ||
2792 | { | 2832 | { |
2793 | /* | 2833 | /* |
2794 | * The first time anyone tries to iterate across a cgroup, | 2834 | * The first time anyone tries to iterate across a cgroup, |
@@ -2828,6 +2868,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
2828 | } | 2868 | } |
2829 | 2869 | ||
2830 | void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) | 2870 | void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it) |
2871 | __releases(css_set_lock) | ||
2831 | { | 2872 | { |
2832 | read_unlock(&css_set_lock); | 2873 | read_unlock(&css_set_lock); |
2833 | } | 2874 | } |
@@ -4491,20 +4532,31 @@ static const struct file_operations proc_cgroupstats_operations = { | |||
4491 | * | 4532 | * |
4492 | * A pointer to the shared css_set was automatically copied in | 4533 | * A pointer to the shared css_set was automatically copied in |
4493 | * fork.c by dup_task_struct(). However, we ignore that copy, since | 4534 | * fork.c by dup_task_struct(). However, we ignore that copy, since |
4494 | * it was not made under the protection of RCU or cgroup_mutex, so | 4535 | * it was not made under the protection of RCU, cgroup_mutex or |
4495 | * might no longer be a valid cgroup pointer. cgroup_attach_task() might | 4536 | * threadgroup_change_begin(), so it might no longer be a valid |
4496 | * have already changed current->cgroups, allowing the previously | 4537 | * cgroup pointer. cgroup_attach_task() might have already changed |
4497 | * referenced cgroup group to be removed and freed. | 4538 | * current->cgroups, allowing the previously referenced cgroup |
4539 | * group to be removed and freed. | ||
4540 | * | ||
4541 | * Outside the pointer validity we also need to process the css_set | ||
4542 | * inheritance between threadgoup_change_begin() and | ||
4543 | * threadgoup_change_end(), this way there is no leak in any process | ||
4544 | * wide migration performed by cgroup_attach_proc() that could otherwise | ||
4545 | * miss a thread because it is too early or too late in the fork stage. | ||
4498 | * | 4546 | * |
4499 | * At the point that cgroup_fork() is called, 'current' is the parent | 4547 | * At the point that cgroup_fork() is called, 'current' is the parent |
4500 | * task, and the passed argument 'child' points to the child task. | 4548 | * task, and the passed argument 'child' points to the child task. |
4501 | */ | 4549 | */ |
4502 | void cgroup_fork(struct task_struct *child) | 4550 | void cgroup_fork(struct task_struct *child) |
4503 | { | 4551 | { |
4504 | task_lock(current); | 4552 | /* |
4553 | * We don't need to task_lock() current because current->cgroups | ||
4554 | * can't be changed concurrently here. The parent obviously hasn't | ||
4555 | * exited and called cgroup_exit(), and we are synchronized against | ||
4556 | * cgroup migration through threadgroup_change_begin(). | ||
4557 | */ | ||
4505 | child->cgroups = current->cgroups; | 4558 | child->cgroups = current->cgroups; |
4506 | get_css_set(child->cgroups); | 4559 | get_css_set(child->cgroups); |
4507 | task_unlock(current); | ||
4508 | INIT_LIST_HEAD(&child->cg_list); | 4560 | INIT_LIST_HEAD(&child->cg_list); |
4509 | } | 4561 | } |
4510 | 4562 | ||
@@ -4546,10 +4598,19 @@ void cgroup_post_fork(struct task_struct *child) | |||
4546 | { | 4598 | { |
4547 | if (use_task_css_set_links) { | 4599 | if (use_task_css_set_links) { |
4548 | write_lock(&css_set_lock); | 4600 | write_lock(&css_set_lock); |
4549 | task_lock(child); | 4601 | if (list_empty(&child->cg_list)) { |
4550 | if (list_empty(&child->cg_list)) | 4602 | /* |
4603 | * It's safe to use child->cgroups without task_lock() | ||
4604 | * here because we are protected through | ||
4605 | * threadgroup_change_begin() against concurrent | ||
4606 | * css_set change in cgroup_task_migrate(). Also | ||
4607 | * the task can't exit at that point until | ||
4608 | * wake_up_new_task() is called, so we are protected | ||
4609 | * against cgroup_exit() setting child->cgroup to | ||
4610 | * init_css_set. | ||
4611 | */ | ||
4551 | list_add(&child->cg_list, &child->cgroups->tasks); | 4612 | list_add(&child->cg_list, &child->cgroups->tasks); |
4552 | task_unlock(child); | 4613 | } |
4553 | write_unlock(&css_set_lock); | 4614 | write_unlock(&css_set_lock); |
4554 | } | 4615 | } |
4555 | } | 4616 | } |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index fcb93fca782d..fc0646b78a64 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -166,13 +166,17 @@ static bool is_task_frozen_enough(struct task_struct *task) | |||
166 | */ | 166 | */ |
167 | static int freezer_can_attach(struct cgroup_subsys *ss, | 167 | static int freezer_can_attach(struct cgroup_subsys *ss, |
168 | struct cgroup *new_cgroup, | 168 | struct cgroup *new_cgroup, |
169 | struct task_struct *task) | 169 | struct cgroup_taskset *tset) |
170 | { | 170 | { |
171 | struct freezer *freezer; | 171 | struct freezer *freezer; |
172 | struct task_struct *task; | ||
172 | 173 | ||
173 | /* | 174 | /* |
174 | * Anything frozen can't move or be moved to/from. | 175 | * Anything frozen can't move or be moved to/from. |
175 | */ | 176 | */ |
177 | cgroup_taskset_for_each(task, new_cgroup, tset) | ||
178 | if (cgroup_freezing(task)) | ||
179 | return -EBUSY; | ||
176 | 180 | ||
177 | freezer = cgroup_freezer(new_cgroup); | 181 | freezer = cgroup_freezer(new_cgroup); |
178 | if (freezer->state != CGROUP_THAWED) | 182 | if (freezer->state != CGROUP_THAWED) |
@@ -181,11 +185,6 @@ static int freezer_can_attach(struct cgroup_subsys *ss, | |||
181 | return 0; | 185 | return 0; |
182 | } | 186 | } |
183 | 187 | ||
184 | static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | ||
185 | { | ||
186 | return cgroup_freezing(tsk) ? -EBUSY : 0; | ||
187 | } | ||
188 | |||
189 | static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) | 188 | static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) |
190 | { | 189 | { |
191 | struct freezer *freezer; | 190 | struct freezer *freezer; |
@@ -381,10 +380,5 @@ struct cgroup_subsys freezer_subsys = { | |||
381 | .populate = freezer_populate, | 380 | .populate = freezer_populate, |
382 | .subsys_id = freezer_subsys_id, | 381 | .subsys_id = freezer_subsys_id, |
383 | .can_attach = freezer_can_attach, | 382 | .can_attach = freezer_can_attach, |
384 | .can_attach_task = freezer_can_attach_task, | ||
385 | .pre_attach = NULL, | ||
386 | .attach_task = NULL, | ||
387 | .attach = NULL, | ||
388 | .fork = freezer_fork, | 383 | .fork = freezer_fork, |
389 | .exit = NULL, | ||
390 | }; | 384 | }; |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 0b1712dba587..a09ac2b9a661 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1389,79 +1389,73 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1389 | return val; | 1389 | return val; |
1390 | } | 1390 | } |
1391 | 1391 | ||
1392 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | ||
1393 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | ||
1394 | struct task_struct *tsk) | ||
1395 | { | ||
1396 | struct cpuset *cs = cgroup_cs(cont); | ||
1397 | |||
1398 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | ||
1399 | return -ENOSPC; | ||
1400 | |||
1401 | /* | ||
1402 | * Kthreads bound to specific cpus cannot be moved to a new cpuset; we | ||
1403 | * cannot change their cpu affinity and isolating such threads by their | ||
1404 | * set of allowed nodes is unnecessary. Thus, cpusets are not | ||
1405 | * applicable for such threads. This prevents checking for success of | ||
1406 | * set_cpus_allowed_ptr() on all attached tasks before cpus_allowed may | ||
1407 | * be changed. | ||
1408 | */ | ||
1409 | if (tsk->flags & PF_THREAD_BOUND) | ||
1410 | return -EINVAL; | ||
1411 | |||
1412 | return 0; | ||
1413 | } | ||
1414 | |||
1415 | static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task) | ||
1416 | { | ||
1417 | return security_task_setscheduler(task); | ||
1418 | } | ||
1419 | |||
1420 | /* | 1392 | /* |
1421 | * Protected by cgroup_lock. The nodemasks must be stored globally because | 1393 | * Protected by cgroup_lock. The nodemasks must be stored globally because |
1422 | * dynamically allocating them is not allowed in pre_attach, and they must | 1394 | * dynamically allocating them is not allowed in can_attach, and they must |
1423 | * persist among pre_attach, attach_task, and attach. | 1395 | * persist until attach. |
1424 | */ | 1396 | */ |
1425 | static cpumask_var_t cpus_attach; | 1397 | static cpumask_var_t cpus_attach; |
1426 | static nodemask_t cpuset_attach_nodemask_from; | 1398 | static nodemask_t cpuset_attach_nodemask_from; |
1427 | static nodemask_t cpuset_attach_nodemask_to; | 1399 | static nodemask_t cpuset_attach_nodemask_to; |
1428 | 1400 | ||
1429 | /* Set-up work for before attaching each task. */ | 1401 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
1430 | static void cpuset_pre_attach(struct cgroup *cont) | 1402 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
1403 | struct cgroup_taskset *tset) | ||
1431 | { | 1404 | { |
1432 | struct cpuset *cs = cgroup_cs(cont); | 1405 | struct cpuset *cs = cgroup_cs(cgrp); |
1406 | struct task_struct *task; | ||
1407 | int ret; | ||
1408 | |||
1409 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | ||
1410 | return -ENOSPC; | ||
1411 | |||
1412 | cgroup_taskset_for_each(task, cgrp, tset) { | ||
1413 | /* | ||
1414 | * Kthreads bound to specific cpus cannot be moved to a new | ||
1415 | * cpuset; we cannot change their cpu affinity and | ||
1416 | * isolating such threads by their set of allowed nodes is | ||
1417 | * unnecessary. Thus, cpusets are not applicable for such | ||
1418 | * threads. This prevents checking for success of | ||
1419 | * set_cpus_allowed_ptr() on all attached tasks before | ||
1420 | * cpus_allowed may be changed. | ||
1421 | */ | ||
1422 | if (task->flags & PF_THREAD_BOUND) | ||
1423 | return -EINVAL; | ||
1424 | if ((ret = security_task_setscheduler(task))) | ||
1425 | return ret; | ||
1426 | } | ||
1433 | 1427 | ||
1428 | /* prepare for attach */ | ||
1434 | if (cs == &top_cpuset) | 1429 | if (cs == &top_cpuset) |
1435 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1430 | cpumask_copy(cpus_attach, cpu_possible_mask); |
1436 | else | 1431 | else |
1437 | guarantee_online_cpus(cs, cpus_attach); | 1432 | guarantee_online_cpus(cs, cpus_attach); |
1438 | 1433 | ||
1439 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); | 1434 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); |
1440 | } | ||
1441 | |||
1442 | /* Per-thread attachment work. */ | ||
1443 | static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk) | ||
1444 | { | ||
1445 | int err; | ||
1446 | struct cpuset *cs = cgroup_cs(cont); | ||
1447 | 1435 | ||
1448 | /* | 1436 | return 0; |
1449 | * can_attach beforehand should guarantee that this doesn't fail. | ||
1450 | * TODO: have a better way to handle failure here | ||
1451 | */ | ||
1452 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | ||
1453 | WARN_ON_ONCE(err); | ||
1454 | |||
1455 | cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to); | ||
1456 | cpuset_update_task_spread_flag(cs, tsk); | ||
1457 | } | 1437 | } |
1458 | 1438 | ||
1459 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1439 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
1460 | struct cgroup *oldcont, struct task_struct *tsk) | 1440 | struct cgroup_taskset *tset) |
1461 | { | 1441 | { |
1462 | struct mm_struct *mm; | 1442 | struct mm_struct *mm; |
1463 | struct cpuset *cs = cgroup_cs(cont); | 1443 | struct task_struct *task; |
1464 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1444 | struct task_struct *leader = cgroup_taskset_first(tset); |
1445 | struct cgroup *oldcgrp = cgroup_taskset_cur_cgroup(tset); | ||
1446 | struct cpuset *cs = cgroup_cs(cgrp); | ||
1447 | struct cpuset *oldcs = cgroup_cs(oldcgrp); | ||
1448 | |||
1449 | cgroup_taskset_for_each(task, cgrp, tset) { | ||
1450 | /* | ||
1451 | * can_attach beforehand should guarantee that this doesn't | ||
1452 | * fail. TODO: have a better way to handle failure here | ||
1453 | */ | ||
1454 | WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); | ||
1455 | |||
1456 | cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); | ||
1457 | cpuset_update_task_spread_flag(cs, task); | ||
1458 | } | ||
1465 | 1459 | ||
1466 | /* | 1460 | /* |
1467 | * Change mm, possibly for multiple threads in a threadgroup. This is | 1461 | * Change mm, possibly for multiple threads in a threadgroup. This is |
@@ -1469,7 +1463,7 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1469 | */ | 1463 | */ |
1470 | cpuset_attach_nodemask_from = oldcs->mems_allowed; | 1464 | cpuset_attach_nodemask_from = oldcs->mems_allowed; |
1471 | cpuset_attach_nodemask_to = cs->mems_allowed; | 1465 | cpuset_attach_nodemask_to = cs->mems_allowed; |
1472 | mm = get_task_mm(tsk); | 1466 | mm = get_task_mm(leader); |
1473 | if (mm) { | 1467 | if (mm) { |
1474 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); | 1468 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); |
1475 | if (is_memory_migrate(cs)) | 1469 | if (is_memory_migrate(cs)) |
@@ -1925,9 +1919,6 @@ struct cgroup_subsys cpuset_subsys = { | |||
1925 | .create = cpuset_create, | 1919 | .create = cpuset_create, |
1926 | .destroy = cpuset_destroy, | 1920 | .destroy = cpuset_destroy, |
1927 | .can_attach = cpuset_can_attach, | 1921 | .can_attach = cpuset_can_attach, |
1928 | .can_attach_task = cpuset_can_attach_task, | ||
1929 | .pre_attach = cpuset_pre_attach, | ||
1930 | .attach_task = cpuset_attach_task, | ||
1931 | .attach = cpuset_attach, | 1922 | .attach = cpuset_attach, |
1932 | .populate = cpuset_populate, | 1923 | .populate = cpuset_populate, |
1933 | .post_clone = cpuset_post_clone, | 1924 | .post_clone = cpuset_post_clone, |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 3afc68c08433..a8f4ac001a00 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -6941,10 +6941,13 @@ static int __perf_cgroup_move(void *info) | |||
6941 | return 0; | 6941 | return 0; |
6942 | } | 6942 | } |
6943 | 6943 | ||
6944 | static void | 6944 | static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
6945 | perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task) | 6945 | struct cgroup_taskset *tset) |
6946 | { | 6946 | { |
6947 | task_function_call(task, __perf_cgroup_move, task); | 6947 | struct task_struct *task; |
6948 | |||
6949 | cgroup_taskset_for_each(task, cgrp, tset) | ||
6950 | task_function_call(task, __perf_cgroup_move, task); | ||
6948 | } | 6951 | } |
6949 | 6952 | ||
6950 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | 6953 | static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, |
@@ -6958,7 +6961,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
6958 | if (!(task->flags & PF_EXITING)) | 6961 | if (!(task->flags & PF_EXITING)) |
6959 | return; | 6962 | return; |
6960 | 6963 | ||
6961 | perf_cgroup_attach_task(cgrp, task); | 6964 | task_function_call(task, __perf_cgroup_move, task); |
6962 | } | 6965 | } |
6963 | 6966 | ||
6964 | struct cgroup_subsys perf_subsys = { | 6967 | struct cgroup_subsys perf_subsys = { |
@@ -6967,6 +6970,6 @@ struct cgroup_subsys perf_subsys = { | |||
6967 | .create = perf_cgroup_create, | 6970 | .create = perf_cgroup_create, |
6968 | .destroy = perf_cgroup_destroy, | 6971 | .destroy = perf_cgroup_destroy, |
6969 | .exit = perf_cgroup_exit, | 6972 | .exit = perf_cgroup_exit, |
6970 | .attach_task = perf_cgroup_attach_task, | 6973 | .attach = perf_cgroup_attach, |
6971 | }; | 6974 | }; |
6972 | #endif /* CONFIG_CGROUP_PERF */ | 6975 | #endif /* CONFIG_CGROUP_PERF */ |
diff --git a/kernel/fork.c b/kernel/fork.c index f34f894c4b98..b00711ce7c13 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -972,7 +972,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
972 | sched_autogroup_fork(sig); | 972 | sched_autogroup_fork(sig); |
973 | 973 | ||
974 | #ifdef CONFIG_CGROUPS | 974 | #ifdef CONFIG_CGROUPS |
975 | init_rwsem(&sig->threadgroup_fork_lock); | 975 | init_rwsem(&sig->group_rwsem); |
976 | #endif | 976 | #endif |
977 | 977 | ||
978 | sig->oom_adj = current->signal->oom_adj; | 978 | sig->oom_adj = current->signal->oom_adj; |
@@ -1153,7 +1153,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1153 | p->io_context = NULL; | 1153 | p->io_context = NULL; |
1154 | p->audit_context = NULL; | 1154 | p->audit_context = NULL; |
1155 | if (clone_flags & CLONE_THREAD) | 1155 | if (clone_flags & CLONE_THREAD) |
1156 | threadgroup_fork_read_lock(current); | 1156 | threadgroup_change_begin(current); |
1157 | cgroup_fork(p); | 1157 | cgroup_fork(p); |
1158 | #ifdef CONFIG_NUMA | 1158 | #ifdef CONFIG_NUMA |
1159 | p->mempolicy = mpol_dup(p->mempolicy); | 1159 | p->mempolicy = mpol_dup(p->mempolicy); |
@@ -1368,7 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1368 | proc_fork_connector(p); | 1368 | proc_fork_connector(p); |
1369 | cgroup_post_fork(p); | 1369 | cgroup_post_fork(p); |
1370 | if (clone_flags & CLONE_THREAD) | 1370 | if (clone_flags & CLONE_THREAD) |
1371 | threadgroup_fork_read_unlock(current); | 1371 | threadgroup_change_end(current); |
1372 | perf_event_fork(p); | 1372 | perf_event_fork(p); |
1373 | return p; | 1373 | return p; |
1374 | 1374 | ||
@@ -1403,7 +1403,7 @@ bad_fork_cleanup_policy: | |||
1403 | bad_fork_cleanup_cgroup: | 1403 | bad_fork_cleanup_cgroup: |
1404 | #endif | 1404 | #endif |
1405 | if (clone_flags & CLONE_THREAD) | 1405 | if (clone_flags & CLONE_THREAD) |
1406 | threadgroup_fork_read_unlock(current); | 1406 | threadgroup_change_end(current); |
1407 | cgroup_exit(p, cgroup_callbacks_done); | 1407 | cgroup_exit(p, cgroup_callbacks_done); |
1408 | delayacct_tsk_free(p); | 1408 | delayacct_tsk_free(p); |
1409 | module_put(task_thread_info(p)->exec_domain->module); | 1409 | module_put(task_thread_info(p)->exec_domain->module); |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 34683efa2cce..6d269cce7aa1 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -159,8 +159,7 @@ int res_counter_memparse_write_strategy(const char *buf, | |||
159 | return 0; | 159 | return 0; |
160 | } | 160 | } |
161 | 161 | ||
162 | /* FIXME - make memparse() take const char* args */ | 162 | *res = memparse(buf, &end); |
163 | *res = memparse((char *)buf, &end); | ||
164 | if (*end != '\0') | 163 | if (*end != '\0') |
165 | return -EINVAL; | 164 | return -EINVAL; |
166 | 165 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0ac0f811d623..cecbb64be05f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -7563,24 +7563,31 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
7563 | sched_destroy_group(tg); | 7563 | sched_destroy_group(tg); |
7564 | } | 7564 | } |
7565 | 7565 | ||
7566 | static int | 7566 | static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
7567 | cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 7567 | struct cgroup_taskset *tset) |
7568 | { | 7568 | { |
7569 | struct task_struct *task; | ||
7570 | |||
7571 | cgroup_taskset_for_each(task, cgrp, tset) { | ||
7569 | #ifdef CONFIG_RT_GROUP_SCHED | 7572 | #ifdef CONFIG_RT_GROUP_SCHED |
7570 | if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) | 7573 | if (!sched_rt_can_attach(cgroup_tg(cgrp), task)) |
7571 | return -EINVAL; | 7574 | return -EINVAL; |
7572 | #else | 7575 | #else |
7573 | /* We don't support RT-tasks being in separate groups */ | 7576 | /* We don't support RT-tasks being in separate groups */ |
7574 | if (tsk->sched_class != &fair_sched_class) | 7577 | if (task->sched_class != &fair_sched_class) |
7575 | return -EINVAL; | 7578 | return -EINVAL; |
7576 | #endif | 7579 | #endif |
7580 | } | ||
7577 | return 0; | 7581 | return 0; |
7578 | } | 7582 | } |
7579 | 7583 | ||
7580 | static void | 7584 | static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
7581 | cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 7585 | struct cgroup_taskset *tset) |
7582 | { | 7586 | { |
7583 | sched_move_task(tsk); | 7587 | struct task_struct *task; |
7588 | |||
7589 | cgroup_taskset_for_each(task, cgrp, tset) | ||
7590 | sched_move_task(task); | ||
7584 | } | 7591 | } |
7585 | 7592 | ||
7586 | static void | 7593 | static void |
@@ -7915,8 +7922,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7915 | .name = "cpu", | 7922 | .name = "cpu", |
7916 | .create = cpu_cgroup_create, | 7923 | .create = cpu_cgroup_create, |
7917 | .destroy = cpu_cgroup_destroy, | 7924 | .destroy = cpu_cgroup_destroy, |
7918 | .can_attach_task = cpu_cgroup_can_attach_task, | 7925 | .can_attach = cpu_cgroup_can_attach, |
7919 | .attach_task = cpu_cgroup_attach_task, | 7926 | .attach = cpu_cgroup_attach, |
7920 | .exit = cpu_cgroup_exit, | 7927 | .exit = cpu_cgroup_exit, |
7921 | .populate = cpu_cgroup_populate, | 7928 | .populate = cpu_cgroup_populate, |
7922 | .subsys_id = cpu_cgroup_subsys_id, | 7929 | .subsys_id = cpu_cgroup_subsys_id, |
diff --git a/kernel/signal.c b/kernel/signal.c index 56ce3a618b28..bb0efa5705ed 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -2355,8 +2355,15 @@ void exit_signals(struct task_struct *tsk) | |||
2355 | int group_stop = 0; | 2355 | int group_stop = 0; |
2356 | sigset_t unblocked; | 2356 | sigset_t unblocked; |
2357 | 2357 | ||
2358 | /* | ||
2359 | * @tsk is about to have PF_EXITING set - lock out users which | ||
2360 | * expect stable threadgroup. | ||
2361 | */ | ||
2362 | threadgroup_change_begin(tsk); | ||
2363 | |||
2358 | if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { | 2364 | if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { |
2359 | tsk->flags |= PF_EXITING; | 2365 | tsk->flags |= PF_EXITING; |
2366 | threadgroup_change_end(tsk); | ||
2360 | return; | 2367 | return; |
2361 | } | 2368 | } |
2362 | 2369 | ||
@@ -2366,6 +2373,9 @@ void exit_signals(struct task_struct *tsk) | |||
2366 | * see wants_signal(), do_signal_stop(). | 2373 | * see wants_signal(), do_signal_stop(). |
2367 | */ | 2374 | */ |
2368 | tsk->flags |= PF_EXITING; | 2375 | tsk->flags |= PF_EXITING; |
2376 | |||
2377 | threadgroup_change_end(tsk); | ||
2378 | |||
2369 | if (!signal_pending(tsk)) | 2379 | if (!signal_pending(tsk)) |
2370 | goto out; | 2380 | goto out; |
2371 | 2381 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 94da8ee9e2c2..00d4fa27d3e6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -5391,8 +5391,9 @@ static void mem_cgroup_clear_mc(void) | |||
5391 | 5391 | ||
5392 | static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | 5392 | static int mem_cgroup_can_attach(struct cgroup_subsys *ss, |
5393 | struct cgroup *cgroup, | 5393 | struct cgroup *cgroup, |
5394 | struct task_struct *p) | 5394 | struct cgroup_taskset *tset) |
5395 | { | 5395 | { |
5396 | struct task_struct *p = cgroup_taskset_first(tset); | ||
5396 | int ret = 0; | 5397 | int ret = 0; |
5397 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup); | 5398 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup); |
5398 | 5399 | ||
@@ -5430,7 +5431,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
5430 | 5431 | ||
5431 | static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, | 5432 | static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, |
5432 | struct cgroup *cgroup, | 5433 | struct cgroup *cgroup, |
5433 | struct task_struct *p) | 5434 | struct cgroup_taskset *tset) |
5434 | { | 5435 | { |
5435 | mem_cgroup_clear_mc(); | 5436 | mem_cgroup_clear_mc(); |
5436 | } | 5437 | } |
@@ -5547,9 +5548,9 @@ retry: | |||
5547 | 5548 | ||
5548 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, | 5549 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, |
5549 | struct cgroup *cont, | 5550 | struct cgroup *cont, |
5550 | struct cgroup *old_cont, | 5551 | struct cgroup_taskset *tset) |
5551 | struct task_struct *p) | ||
5552 | { | 5552 | { |
5553 | struct task_struct *p = cgroup_taskset_first(tset); | ||
5553 | struct mm_struct *mm = get_task_mm(p); | 5554 | struct mm_struct *mm = get_task_mm(p); |
5554 | 5555 | ||
5555 | if (mm) { | 5556 | if (mm) { |
@@ -5564,19 +5565,18 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
5564 | #else /* !CONFIG_MMU */ | 5565 | #else /* !CONFIG_MMU */ |
5565 | static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | 5566 | static int mem_cgroup_can_attach(struct cgroup_subsys *ss, |
5566 | struct cgroup *cgroup, | 5567 | struct cgroup *cgroup, |
5567 | struct task_struct *p) | 5568 | struct cgroup_taskset *tset) |
5568 | { | 5569 | { |
5569 | return 0; | 5570 | return 0; |
5570 | } | 5571 | } |
5571 | static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, | 5572 | static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, |
5572 | struct cgroup *cgroup, | 5573 | struct cgroup *cgroup, |
5573 | struct task_struct *p) | 5574 | struct cgroup_taskset *tset) |
5574 | { | 5575 | { |
5575 | } | 5576 | } |
5576 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, | 5577 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, |
5577 | struct cgroup *cont, | 5578 | struct cgroup *cont, |
5578 | struct cgroup *old_cont, | 5579 | struct cgroup_taskset *tset) |
5579 | struct task_struct *p) | ||
5580 | { | 5580 | { |
5581 | } | 5581 | } |
5582 | #endif | 5582 | #endif |
diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 4450fbeec411..8b5b5d8612c6 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c | |||
@@ -62,11 +62,12 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) | |||
62 | struct cgroup_subsys devices_subsys; | 62 | struct cgroup_subsys devices_subsys; |
63 | 63 | ||
64 | static int devcgroup_can_attach(struct cgroup_subsys *ss, | 64 | static int devcgroup_can_attach(struct cgroup_subsys *ss, |
65 | struct cgroup *new_cgroup, struct task_struct *task) | 65 | struct cgroup *new_cgrp, struct cgroup_taskset *set) |
66 | { | 66 | { |
67 | if (current != task && !capable(CAP_SYS_ADMIN)) | 67 | struct task_struct *task = cgroup_taskset_first(set); |
68 | return -EPERM; | ||
69 | 68 | ||
69 | if (current != task && !capable(CAP_SYS_ADMIN)) | ||
70 | return -EPERM; | ||
70 | return 0; | 71 | return 0; |
71 | } | 72 | } |
72 | 73 | ||