diff options
| -rw-r--r-- | include/linux/cgroup-defs.h | 3 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 4 | ||||
| -rw-r--r-- | kernel/cgroup.c | 87 |
3 files changed, 80 insertions, 14 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 17444505c870..62413c3e2f4b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
| @@ -211,6 +211,9 @@ struct css_set { | |||
| 211 | */ | 211 | */ |
| 212 | struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; | 212 | struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; |
| 213 | 213 | ||
| 214 | /* all css_task_iters currently walking this cset */ | ||
| 215 | struct list_head task_iters; | ||
| 216 | |||
| 214 | /* For RCU-protected deletion */ | 217 | /* For RCU-protected deletion */ |
| 215 | struct rcu_head rcu_head; | 218 | struct rcu_head rcu_head; |
| 216 | }; | 219 | }; |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bdfdb3a1a83c..a9dcf0e76865 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -42,6 +42,10 @@ struct css_task_iter { | |||
| 42 | struct list_head *task_pos; | 42 | struct list_head *task_pos; |
| 43 | struct list_head *tasks_head; | 43 | struct list_head *tasks_head; |
| 44 | struct list_head *mg_tasks_head; | 44 | struct list_head *mg_tasks_head; |
| 45 | |||
| 46 | struct css_set *cur_cset; | ||
| 47 | struct task_struct *cur_task; | ||
| 48 | struct list_head iters_node; /* css_set->task_iters */ | ||
| 45 | }; | 49 | }; |
| 46 | 50 | ||
| 47 | extern struct cgroup_root cgrp_dfl_root; | 51 | extern struct cgroup_root cgrp_dfl_root; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 56e2b772b143..0c5b0e605c47 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -216,6 +216,7 @@ static struct cftype cgroup_legacy_base_files[]; | |||
| 216 | 216 | ||
| 217 | static int rebind_subsystems(struct cgroup_root *dst_root, | 217 | static int rebind_subsystems(struct cgroup_root *dst_root, |
| 218 | unsigned long ss_mask); | 218 | unsigned long ss_mask); |
| 219 | static void css_task_iter_advance(struct css_task_iter *it); | ||
| 219 | static int cgroup_destroy_locked(struct cgroup *cgrp); | 220 | static int cgroup_destroy_locked(struct cgroup *cgrp); |
| 220 | static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, | 221 | static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, |
| 221 | bool visible); | 222 | bool visible); |
| @@ -593,6 +594,7 @@ struct css_set init_css_set = { | |||
| 593 | .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), | 594 | .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), |
| 594 | .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), | 595 | .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), |
| 595 | .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), | 596 | .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), |
| 597 | .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), | ||
| 596 | }; | 598 | }; |
| 597 | 599 | ||
| 598 | static int css_set_count = 1; /* 1 for init_css_set */ | 600 | static int css_set_count = 1; /* 1 for init_css_set */ |
| @@ -675,8 +677,9 @@ static void css_set_update_populated(struct css_set *cset, bool populated) | |||
| 675 | * css_set, @from_cset can be NULL. If @task is being disassociated | 677 | * css_set, @from_cset can be NULL. If @task is being disassociated |
| 676 | * instead of moved, @to_cset can be NULL. | 678 | * instead of moved, @to_cset can be NULL. |
| 677 | * | 679 | * |
| 678 | * This function automatically handles populated_cnt updates but the caller | 680 | * This function automatically handles populated_cnt updates and |
| 679 | * is responsible for managing @from_cset and @to_cset's reference counts. | 681 | * css_task_iter adjustments but the caller is responsible for managing |
| 682 | * @from_cset and @to_cset's reference counts. | ||
| 680 | */ | 683 | */ |
| 681 | static void css_set_move_task(struct task_struct *task, | 684 | static void css_set_move_task(struct task_struct *task, |
| 682 | struct css_set *from_cset, struct css_set *to_cset, | 685 | struct css_set *from_cset, struct css_set *to_cset, |
| @@ -685,7 +688,22 @@ static void css_set_move_task(struct task_struct *task, | |||
| 685 | lockdep_assert_held(&css_set_rwsem); | 688 | lockdep_assert_held(&css_set_rwsem); |
| 686 | 689 | ||
| 687 | if (from_cset) { | 690 | if (from_cset) { |
| 691 | struct css_task_iter *it, *pos; | ||
| 692 | |||
| 688 | WARN_ON_ONCE(list_empty(&task->cg_list)); | 693 | WARN_ON_ONCE(list_empty(&task->cg_list)); |
| 694 | |||
| 695 | /* | ||
| 696 | * @task is leaving, advance task iterators which are | ||
| 697 | * pointing to it so that they can resume at the next | ||
| 698 | * position. Advancing an iterator might remove it from | ||
| 699 | * the list, use safe walk. See css_task_iter_advance*() | ||
| 700 | * for details. | ||
| 701 | */ | ||
| 702 | list_for_each_entry_safe(it, pos, &from_cset->task_iters, | ||
| 703 | iters_node) | ||
| 704 | if (it->task_pos == &task->cg_list) | ||
| 705 | css_task_iter_advance(it); | ||
| 706 | |||
| 689 | list_del_init(&task->cg_list); | 707 | list_del_init(&task->cg_list); |
| 690 | if (!css_set_populated(from_cset)) | 708 | if (!css_set_populated(from_cset)) |
| 691 | css_set_update_populated(from_cset, false); | 709 | css_set_update_populated(from_cset, false); |
| @@ -1019,6 +1037,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
| 1019 | INIT_LIST_HEAD(&cset->mg_tasks); | 1037 | INIT_LIST_HEAD(&cset->mg_tasks); |
| 1020 | INIT_LIST_HEAD(&cset->mg_preload_node); | 1038 | INIT_LIST_HEAD(&cset->mg_preload_node); |
| 1021 | INIT_LIST_HEAD(&cset->mg_node); | 1039 | INIT_LIST_HEAD(&cset->mg_node); |
| 1040 | INIT_LIST_HEAD(&cset->task_iters); | ||
| 1022 | INIT_HLIST_NODE(&cset->hlist); | 1041 | INIT_HLIST_NODE(&cset->hlist); |
| 1023 | 1042 | ||
| 1024 | /* Copy the set of subsystem state objects generated in | 1043 | /* Copy the set of subsystem state objects generated in |
| @@ -3804,6 +3823,8 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) | |||
| 3804 | struct cgrp_cset_link *link; | 3823 | struct cgrp_cset_link *link; |
| 3805 | struct css_set *cset; | 3824 | struct css_set *cset; |
| 3806 | 3825 | ||
| 3826 | lockdep_assert_held(&css_set_rwsem); | ||
| 3827 | |||
| 3807 | /* Advance to the next non-empty css_set */ | 3828 | /* Advance to the next non-empty css_set */ |
| 3808 | do { | 3829 | do { |
| 3809 | l = l->next; | 3830 | l = l->next; |
| @@ -3831,12 +3852,36 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) | |||
| 3831 | 3852 | ||
| 3832 | it->tasks_head = &cset->tasks; | 3853 | it->tasks_head = &cset->tasks; |
| 3833 | it->mg_tasks_head = &cset->mg_tasks; | 3854 | it->mg_tasks_head = &cset->mg_tasks; |
| 3855 | |||
| 3856 | /* | ||
| 3857 | * We don't keep css_sets locked across iteration steps and thus | ||
| 3858 | * need to take steps to ensure that iteration can be resumed after | ||
| 3859 | * the lock is re-acquired. Iteration is performed at two levels - | ||
| 3860 | * css_sets and tasks in them. | ||
| 3861 | * | ||
| 3862 | * Once created, a css_set never leaves its cgroup lists, so a | ||
| 3863 | * pinned css_set is guaranteed to stay put and we can resume | ||
| 3864 | * iteration afterwards. | ||
| 3865 | * | ||
| 3866 | * Tasks may leave @cset across iteration steps. This is resolved | ||
| 3867 | * by registering each iterator with the css_set currently being | ||
| 3868 | * walked and making css_set_move_task() advance iterators whose | ||
| 3869 | * next task is leaving. | ||
| 3870 | */ | ||
| 3871 | if (it->cur_cset) { | ||
| 3872 | list_del(&it->iters_node); | ||
| 3873 | put_css_set_locked(it->cur_cset); | ||
| 3874 | } | ||
| 3875 | get_css_set(cset); | ||
| 3876 | it->cur_cset = cset; | ||
| 3877 | list_add(&it->iters_node, &cset->task_iters); | ||
| 3834 | } | 3878 | } |
| 3835 | 3879 | ||
| 3836 | static void css_task_iter_advance(struct css_task_iter *it) | 3880 | static void css_task_iter_advance(struct css_task_iter *it) |
| 3837 | { | 3881 | { |
| 3838 | struct list_head *l = it->task_pos; | 3882 | struct list_head *l = it->task_pos; |
| 3839 | 3883 | ||
| 3884 | lockdep_assert_held(&css_set_rwsem); | ||
| 3840 | WARN_ON_ONCE(!l); | 3885 | WARN_ON_ONCE(!l); |
| 3841 | 3886 | ||
| 3842 | /* | 3887 | /* |
| @@ -3864,19 +3909,16 @@ static void css_task_iter_advance(struct css_task_iter *it) | |||
| 3864 | * css_task_iter_next() to walk through the tasks until the function | 3909 | * css_task_iter_next() to walk through the tasks until the function |
| 3865 | * returns NULL. On completion of iteration, css_task_iter_end() must be | 3910 | * returns NULL. On completion of iteration, css_task_iter_end() must be |
| 3866 | * called. | 3911 | * called. |
| 3867 | * | ||
| 3868 | * Note that this function acquires a lock which is released when the | ||
| 3869 | * iteration finishes. The caller can't sleep while iteration is in | ||
| 3870 | * progress. | ||
| 3871 | */ | 3912 | */ |
| 3872 | void css_task_iter_start(struct cgroup_subsys_state *css, | 3913 | void css_task_iter_start(struct cgroup_subsys_state *css, |
| 3873 | struct css_task_iter *it) | 3914 | struct css_task_iter *it) |
| 3874 | __acquires(css_set_rwsem) | ||
| 3875 | { | 3915 | { |
| 3876 | /* no one should try to iterate before mounting cgroups */ | 3916 | /* no one should try to iterate before mounting cgroups */ |
| 3877 | WARN_ON_ONCE(!use_task_css_set_links); | 3917 | WARN_ON_ONCE(!use_task_css_set_links); |
| 3878 | 3918 | ||
| 3879 | down_read(&css_set_rwsem); | 3919 | memset(it, 0, sizeof(*it)); |
| 3920 | |||
| 3921 | down_write(&css_set_rwsem); | ||
| 3880 | 3922 | ||
| 3881 | it->ss = css->ss; | 3923 | it->ss = css->ss; |
| 3882 | 3924 | ||
| @@ -3888,6 +3930,8 @@ void css_task_iter_start(struct cgroup_subsys_state *css, | |||
| 3888 | it->cset_head = it->cset_pos; | 3930 | it->cset_head = it->cset_pos; |
| 3889 | 3931 | ||
| 3890 | css_task_iter_advance_css_set(it); | 3932 | css_task_iter_advance_css_set(it); |
| 3933 | |||
| 3934 | up_write(&css_set_rwsem); | ||
| 3891 | } | 3935 | } |
| 3892 | 3936 | ||
| 3893 | /** | 3937 | /** |
| @@ -3900,14 +3944,22 @@ void css_task_iter_start(struct cgroup_subsys_state *css, | |||
| 3900 | */ | 3944 | */ |
| 3901 | struct task_struct *css_task_iter_next(struct css_task_iter *it) | 3945 | struct task_struct *css_task_iter_next(struct css_task_iter *it) |
| 3902 | { | 3946 | { |
| 3903 | struct task_struct *res; | ||
| 3904 | |||
| 3905 | if (!it->cset_pos) | 3947 | if (!it->cset_pos) |
| 3906 | return NULL; | 3948 | return NULL; |
| 3907 | 3949 | ||
| 3908 | res = list_entry(it->task_pos, struct task_struct, cg_list); | 3950 | if (it->cur_task) |
| 3951 | put_task_struct(it->cur_task); | ||
| 3952 | |||
| 3953 | down_write(&css_set_rwsem); | ||
| 3954 | |||
| 3955 | it->cur_task = list_entry(it->task_pos, struct task_struct, cg_list); | ||
| 3956 | get_task_struct(it->cur_task); | ||
| 3957 | |||
| 3909 | css_task_iter_advance(it); | 3958 | css_task_iter_advance(it); |
| 3910 | return res; | 3959 | |
| 3960 | up_write(&css_set_rwsem); | ||
| 3961 | |||
| 3962 | return it->cur_task; | ||
| 3911 | } | 3963 | } |
| 3912 | 3964 | ||
| 3913 | /** | 3965 | /** |
| @@ -3917,9 +3969,16 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) | |||
| 3917 | * Finish task iteration started by css_task_iter_start(). | 3969 | * Finish task iteration started by css_task_iter_start(). |
| 3918 | */ | 3970 | */ |
| 3919 | void css_task_iter_end(struct css_task_iter *it) | 3971 | void css_task_iter_end(struct css_task_iter *it) |
| 3920 | __releases(css_set_rwsem) | ||
| 3921 | { | 3972 | { |
| 3922 | up_read(&css_set_rwsem); | 3973 | if (it->cur_cset) { |
| 3974 | down_write(&css_set_rwsem); | ||
| 3975 | list_del(&it->iters_node); | ||
| 3976 | put_css_set_locked(it->cur_cset); | ||
| 3977 | up_write(&css_set_rwsem); | ||
| 3978 | } | ||
| 3979 | |||
| 3980 | if (it->cur_task) | ||
| 3981 | put_task_struct(it->cur_task); | ||
| 3923 | } | 3982 | } |
| 3924 | 3983 | ||
| 3925 | /** | 3984 | /** |
