diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 110 |
1 files changed, 84 insertions, 26 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2cf0f79f1fc9..2c9eae6ad970 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include <linux/slab.h> | 46 | #include <linux/slab.h> |
47 | #include <linux/spinlock.h> | 47 | #include <linux/spinlock.h> |
48 | #include <linux/rwsem.h> | 48 | #include <linux/rwsem.h> |
49 | #include <linux/percpu-rwsem.h> | ||
50 | #include <linux/string.h> | 49 | #include <linux/string.h> |
51 | #include <linux/sort.h> | 50 | #include <linux/sort.h> |
52 | #include <linux/kmod.h> | 51 | #include <linux/kmod.h> |
@@ -104,8 +103,6 @@ static DEFINE_SPINLOCK(cgroup_idr_lock); | |||
104 | */ | 103 | */ |
105 | static DEFINE_SPINLOCK(release_agent_path_lock); | 104 | static DEFINE_SPINLOCK(release_agent_path_lock); |
106 | 105 | ||
107 | struct percpu_rw_semaphore cgroup_threadgroup_rwsem; | ||
108 | |||
109 | #define cgroup_assert_mutex_or_rcu_locked() \ | 106 | #define cgroup_assert_mutex_or_rcu_locked() \ |
110 | RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | 107 | RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ |
111 | !lockdep_is_held(&cgroup_mutex), \ | 108 | !lockdep_is_held(&cgroup_mutex), \ |
@@ -874,6 +871,48 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
874 | return cset; | 871 | return cset; |
875 | } | 872 | } |
876 | 873 | ||
874 | void cgroup_threadgroup_change_begin(struct task_struct *tsk) | ||
875 | { | ||
876 | down_read(&tsk->signal->group_rwsem); | ||
877 | } | ||
878 | |||
879 | void cgroup_threadgroup_change_end(struct task_struct *tsk) | ||
880 | { | ||
881 | up_read(&tsk->signal->group_rwsem); | ||
882 | } | ||
883 | |||
884 | /** | ||
885 | * threadgroup_lock - lock threadgroup | ||
886 | * @tsk: member task of the threadgroup to lock | ||
887 | * | ||
888 | * Lock the threadgroup @tsk belongs to. No new task is allowed to enter | ||
889 | * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or | ||
890 | * change ->group_leader/pid. This is useful for cases where the threadgroup | ||
891 | * needs to stay stable across blockable operations. | ||
892 | * | ||
893 | * fork and exit explicitly call threadgroup_change_{begin|end}() for | ||
894 | * synchronization. While held, no new task will be added to threadgroup | ||
895 | * and no existing live task will have its PF_EXITING set. | ||
896 | * | ||
897 | * de_thread() does threadgroup_change_{begin|end}() when a non-leader | ||
898 | * sub-thread becomes a new leader. | ||
899 | */ | ||
900 | static void threadgroup_lock(struct task_struct *tsk) | ||
901 | { | ||
902 | down_write(&tsk->signal->group_rwsem); | ||
903 | } | ||
904 | |||
905 | /** | ||
906 | * threadgroup_unlock - unlock threadgroup | ||
907 | * @tsk: member task of the threadgroup to unlock | ||
908 | * | ||
909 | * Reverse threadgroup_lock(). | ||
910 | */ | ||
911 | static inline void threadgroup_unlock(struct task_struct *tsk) | ||
912 | { | ||
913 | up_write(&tsk->signal->group_rwsem); | ||
914 | } | ||
915 | |||
877 | static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) | 916 | static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) |
878 | { | 917 | { |
879 | struct cgroup *root_cgrp = kf_root->kn->priv; | 918 | struct cgroup *root_cgrp = kf_root->kn->priv; |
@@ -2074,9 +2113,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, | |||
2074 | lockdep_assert_held(&css_set_rwsem); | 2113 | lockdep_assert_held(&css_set_rwsem); |
2075 | 2114 | ||
2076 | /* | 2115 | /* |
2077 | * We are synchronized through cgroup_threadgroup_rwsem against | 2116 | * We are synchronized through threadgroup_lock() against PF_EXITING |
2078 | * PF_EXITING setting such that we can't race against cgroup_exit() | 2117 | * setting such that we can't race against cgroup_exit() changing the |
2079 | * changing the css_set to init_css_set and dropping the old one. | 2118 | * css_set to init_css_set and dropping the old one. |
2080 | */ | 2119 | */ |
2081 | WARN_ON_ONCE(tsk->flags & PF_EXITING); | 2120 | WARN_ON_ONCE(tsk->flags & PF_EXITING); |
2082 | old_cset = task_css_set(tsk); | 2121 | old_cset = task_css_set(tsk); |
@@ -2133,11 +2172,10 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) | |||
2133 | * @src_cset and add it to @preloaded_csets, which should later be cleaned | 2172 | * @src_cset and add it to @preloaded_csets, which should later be cleaned |
2134 | * up by cgroup_migrate_finish(). | 2173 | * up by cgroup_migrate_finish(). |
2135 | * | 2174 | * |
2136 | * This function may be called without holding cgroup_threadgroup_rwsem | 2175 | * This function may be called without holding threadgroup_lock even if the |
2137 | * even if the target is a process. Threads may be created and destroyed | 2176 | * target is a process. Threads may be created and destroyed but as long |
2138 | * but as long as cgroup_mutex is not dropped, no new css_set can be put | 2177 | * as cgroup_mutex is not dropped, no new css_set can be put into play and |
2139 | * into play and the preloaded css_sets are guaranteed to cover all | 2178 | * the preloaded css_sets are guaranteed to cover all migrations. |
2140 | * migrations. | ||
2141 | */ | 2179 | */ |
2142 | static void cgroup_migrate_add_src(struct css_set *src_cset, | 2180 | static void cgroup_migrate_add_src(struct css_set *src_cset, |
2143 | struct cgroup *dst_cgrp, | 2181 | struct cgroup *dst_cgrp, |
@@ -2240,7 +2278,7 @@ err: | |||
2240 | * @threadgroup: whether @leader points to the whole process or a single task | 2278 | * @threadgroup: whether @leader points to the whole process or a single task |
2241 | * | 2279 | * |
2242 | * Migrate a process or task denoted by @leader to @cgrp. If migrating a | 2280 | * Migrate a process or task denoted by @leader to @cgrp. If migrating a |
2243 | * process, the caller must be holding cgroup_threadgroup_rwsem. The | 2281 | * process, the caller must be holding threadgroup_lock of @leader. The |
2244 | * caller is also responsible for invoking cgroup_migrate_add_src() and | 2282 | * caller is also responsible for invoking cgroup_migrate_add_src() and |
2245 | * cgroup_migrate_prepare_dst() on the targets before invoking this | 2283 | * cgroup_migrate_prepare_dst() on the targets before invoking this |
2246 | * function and following up with cgroup_migrate_finish(). | 2284 | * function and following up with cgroup_migrate_finish(). |
@@ -2368,7 +2406,7 @@ out_release_tset: | |||
2368 | * @leader: the task or the leader of the threadgroup to be attached | 2406 | * @leader: the task or the leader of the threadgroup to be attached |
2369 | * @threadgroup: attach the whole threadgroup? | 2407 | * @threadgroup: attach the whole threadgroup? |
2370 | * | 2408 | * |
2371 | * Call holding cgroup_mutex and cgroup_threadgroup_rwsem. | 2409 | * Call holding cgroup_mutex and threadgroup_lock of @leader. |
2372 | */ | 2410 | */ |
2373 | static int cgroup_attach_task(struct cgroup *dst_cgrp, | 2411 | static int cgroup_attach_task(struct cgroup *dst_cgrp, |
2374 | struct task_struct *leader, bool threadgroup) | 2412 | struct task_struct *leader, bool threadgroup) |
@@ -2460,13 +2498,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
2460 | if (!cgrp) | 2498 | if (!cgrp) |
2461 | return -ENODEV; | 2499 | return -ENODEV; |
2462 | 2500 | ||
2463 | percpu_down_write(&cgroup_threadgroup_rwsem); | 2501 | retry_find_task: |
2464 | rcu_read_lock(); | 2502 | rcu_read_lock(); |
2465 | if (pid) { | 2503 | if (pid) { |
2466 | tsk = find_task_by_vpid(pid); | 2504 | tsk = find_task_by_vpid(pid); |
2467 | if (!tsk) { | 2505 | if (!tsk) { |
2506 | rcu_read_unlock(); | ||
2468 | ret = -ESRCH; | 2507 | ret = -ESRCH; |
2469 | goto out_unlock_rcu; | 2508 | goto out_unlock_cgroup; |
2470 | } | 2509 | } |
2471 | } else { | 2510 | } else { |
2472 | tsk = current; | 2511 | tsk = current; |
@@ -2482,23 +2521,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
2482 | */ | 2521 | */ |
2483 | if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { | 2522 | if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { |
2484 | ret = -EINVAL; | 2523 | ret = -EINVAL; |
2485 | goto out_unlock_rcu; | 2524 | rcu_read_unlock(); |
2525 | goto out_unlock_cgroup; | ||
2486 | } | 2526 | } |
2487 | 2527 | ||
2488 | get_task_struct(tsk); | 2528 | get_task_struct(tsk); |
2489 | rcu_read_unlock(); | 2529 | rcu_read_unlock(); |
2490 | 2530 | ||
2531 | threadgroup_lock(tsk); | ||
2532 | if (threadgroup) { | ||
2533 | if (!thread_group_leader(tsk)) { | ||
2534 | /* | ||
2535 | * a race with de_thread from another thread's exec() | ||
2536 | * may strip us of our leadership, if this happens, | ||
2537 | * there is no choice but to throw this task away and | ||
2538 | * try again; this is | ||
2539 | * "double-double-toil-and-trouble-check locking". | ||
2540 | */ | ||
2541 | threadgroup_unlock(tsk); | ||
2542 | put_task_struct(tsk); | ||
2543 | goto retry_find_task; | ||
2544 | } | ||
2545 | } | ||
2546 | |||
2491 | ret = cgroup_procs_write_permission(tsk, cgrp, of); | 2547 | ret = cgroup_procs_write_permission(tsk, cgrp, of); |
2492 | if (!ret) | 2548 | if (!ret) |
2493 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); | 2549 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); |
2494 | 2550 | ||
2495 | put_task_struct(tsk); | 2551 | threadgroup_unlock(tsk); |
2496 | goto out_unlock_threadgroup; | ||
2497 | 2552 | ||
2498 | out_unlock_rcu: | 2553 | put_task_struct(tsk); |
2499 | rcu_read_unlock(); | 2554 | out_unlock_cgroup: |
2500 | out_unlock_threadgroup: | ||
2501 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2502 | cgroup_kn_unlock(of->kn); | 2555 | cgroup_kn_unlock(of->kn); |
2503 | return ret ?: nbytes; | 2556 | return ret ?: nbytes; |
2504 | } | 2557 | } |
@@ -2643,8 +2696,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2643 | 2696 | ||
2644 | lockdep_assert_held(&cgroup_mutex); | 2697 | lockdep_assert_held(&cgroup_mutex); |
2645 | 2698 | ||
2646 | percpu_down_write(&cgroup_threadgroup_rwsem); | ||
2647 | |||
2648 | /* look up all csses currently attached to @cgrp's subtree */ | 2699 | /* look up all csses currently attached to @cgrp's subtree */ |
2649 | down_read(&css_set_rwsem); | 2700 | down_read(&css_set_rwsem); |
2650 | css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { | 2701 | css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { |
@@ -2700,8 +2751,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2700 | goto out_finish; | 2751 | goto out_finish; |
2701 | last_task = task; | 2752 | last_task = task; |
2702 | 2753 | ||
2754 | threadgroup_lock(task); | ||
2755 | /* raced against de_thread() from another thread? */ | ||
2756 | if (!thread_group_leader(task)) { | ||
2757 | threadgroup_unlock(task); | ||
2758 | put_task_struct(task); | ||
2759 | continue; | ||
2760 | } | ||
2761 | |||
2703 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); | 2762 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); |
2704 | 2763 | ||
2764 | threadgroup_unlock(task); | ||
2705 | put_task_struct(task); | 2765 | put_task_struct(task); |
2706 | 2766 | ||
2707 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) | 2767 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) |
@@ -2711,7 +2771,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2711 | 2771 | ||
2712 | out_finish: | 2772 | out_finish: |
2713 | cgroup_migrate_finish(&preloaded_csets); | 2773 | cgroup_migrate_finish(&preloaded_csets); |
2714 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2715 | return ret; | 2774 | return ret; |
2716 | } | 2775 | } |
2717 | 2776 | ||
@@ -5024,7 +5083,6 @@ int __init cgroup_init(void) | |||
5024 | unsigned long key; | 5083 | unsigned long key; |
5025 | int ssid, err; | 5084 | int ssid, err; |
5026 | 5085 | ||
5027 | BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); | ||
5028 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); | 5086 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); |
5029 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); | 5087 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); |
5030 | 5088 | ||