aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup-defs.h3
-rw-r--r--include/linux/cgroup.h4
-rw-r--r--kernel/cgroup.c87
3 files changed, 80 insertions, 14 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 17444505c870..62413c3e2f4b 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -211,6 +211,9 @@ struct css_set {
211 */ 211 */
212 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; 212 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
213 213
214 /* all css_task_iters currently walking this cset */
215 struct list_head task_iters;
216
214 /* For RCU-protected deletion */ 217 /* For RCU-protected deletion */
215 struct rcu_head rcu_head; 218 struct rcu_head rcu_head;
216}; 219};
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bdfdb3a1a83c..a9dcf0e76865 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -42,6 +42,10 @@ struct css_task_iter {
42 struct list_head *task_pos; 42 struct list_head *task_pos;
43 struct list_head *tasks_head; 43 struct list_head *tasks_head;
44 struct list_head *mg_tasks_head; 44 struct list_head *mg_tasks_head;
45
46 struct css_set *cur_cset;
47 struct task_struct *cur_task;
48 struct list_head iters_node; /* css_set->task_iters */
45}; 49};
46 50
47extern struct cgroup_root cgrp_dfl_root; 51extern struct cgroup_root cgrp_dfl_root;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 56e2b772b143..0c5b0e605c47 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -216,6 +216,7 @@ static struct cftype cgroup_legacy_base_files[];
216 216
217static int rebind_subsystems(struct cgroup_root *dst_root, 217static int rebind_subsystems(struct cgroup_root *dst_root,
218 unsigned long ss_mask); 218 unsigned long ss_mask);
219static void css_task_iter_advance(struct css_task_iter *it);
219static int cgroup_destroy_locked(struct cgroup *cgrp); 220static int cgroup_destroy_locked(struct cgroup *cgrp);
220static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, 221static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
221 bool visible); 222 bool visible);
@@ -593,6 +594,7 @@ struct css_set init_css_set = {
593 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), 594 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
594 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), 595 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
595 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), 596 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
597 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
596}; 598};
597 599
598static int css_set_count = 1; /* 1 for init_css_set */ 600static int css_set_count = 1; /* 1 for init_css_set */
@@ -675,8 +677,9 @@ static void css_set_update_populated(struct css_set *cset, bool populated)
675 * css_set, @from_cset can be NULL. If @task is being disassociated 677 * css_set, @from_cset can be NULL. If @task is being disassociated
676 * instead of moved, @to_cset can be NULL. 678 * instead of moved, @to_cset can be NULL.
677 * 679 *
678 * This function automatically handles populated_cnt updates but the caller 680 * This function automatically handles populated_cnt updates and
679 * is responsible for managing @from_cset and @to_cset's reference counts. 681 * css_task_iter adjustments but the caller is responsible for managing
682 * @from_cset and @to_cset's reference counts.
680 */ 683 */
681static void css_set_move_task(struct task_struct *task, 684static void css_set_move_task(struct task_struct *task,
682 struct css_set *from_cset, struct css_set *to_cset, 685 struct css_set *from_cset, struct css_set *to_cset,
@@ -685,7 +688,22 @@ static void css_set_move_task(struct task_struct *task,
685 lockdep_assert_held(&css_set_rwsem); 688 lockdep_assert_held(&css_set_rwsem);
686 689
687 if (from_cset) { 690 if (from_cset) {
691 struct css_task_iter *it, *pos;
692
688 WARN_ON_ONCE(list_empty(&task->cg_list)); 693 WARN_ON_ONCE(list_empty(&task->cg_list));
694
695 /*
696 * @task is leaving, advance task iterators which are
697 * pointing to it so that they can resume at the next
698 * position. Advancing an iterator might remove it from
699 * the list, use safe walk. See css_task_iter_advance*()
700 * for details.
701 */
702 list_for_each_entry_safe(it, pos, &from_cset->task_iters,
703 iters_node)
704 if (it->task_pos == &task->cg_list)
705 css_task_iter_advance(it);
706
689 list_del_init(&task->cg_list); 707 list_del_init(&task->cg_list);
690 if (!css_set_populated(from_cset)) 708 if (!css_set_populated(from_cset))
691 css_set_update_populated(from_cset, false); 709 css_set_update_populated(from_cset, false);
@@ -1019,6 +1037,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
1019 INIT_LIST_HEAD(&cset->mg_tasks); 1037 INIT_LIST_HEAD(&cset->mg_tasks);
1020 INIT_LIST_HEAD(&cset->mg_preload_node); 1038 INIT_LIST_HEAD(&cset->mg_preload_node);
1021 INIT_LIST_HEAD(&cset->mg_node); 1039 INIT_LIST_HEAD(&cset->mg_node);
1040 INIT_LIST_HEAD(&cset->task_iters);
1022 INIT_HLIST_NODE(&cset->hlist); 1041 INIT_HLIST_NODE(&cset->hlist);
1023 1042
1024 /* Copy the set of subsystem state objects generated in 1043 /* Copy the set of subsystem state objects generated in
@@ -3804,6 +3823,8 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
3804 struct cgrp_cset_link *link; 3823 struct cgrp_cset_link *link;
3805 struct css_set *cset; 3824 struct css_set *cset;
3806 3825
3826 lockdep_assert_held(&css_set_rwsem);
3827
3807 /* Advance to the next non-empty css_set */ 3828 /* Advance to the next non-empty css_set */
3808 do { 3829 do {
3809 l = l->next; 3830 l = l->next;
@@ -3831,12 +3852,36 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
3831 3852
3832 it->tasks_head = &cset->tasks; 3853 it->tasks_head = &cset->tasks;
3833 it->mg_tasks_head = &cset->mg_tasks; 3854 it->mg_tasks_head = &cset->mg_tasks;
3855
3856 /*
3857 * We don't keep css_sets locked across iteration steps and thus
3858 * need to take steps to ensure that iteration can be resumed after
3859 * the lock is re-acquired. Iteration is performed at two levels -
3860 * css_sets and tasks in them.
3861 *
3862 * Once created, a css_set never leaves its cgroup lists, so a
3863 * pinned css_set is guaranteed to stay put and we can resume
3864 * iteration afterwards.
3865 *
3866 * Tasks may leave @cset across iteration steps. This is resolved
3867 * by registering each iterator with the css_set currently being
3868 * walked and making css_set_move_task() advance iterators whose
3869 * next task is leaving.
3870 */
3871 if (it->cur_cset) {
3872 list_del(&it->iters_node);
3873 put_css_set_locked(it->cur_cset);
3874 }
3875 get_css_set(cset);
3876 it->cur_cset = cset;
3877 list_add(&it->iters_node, &cset->task_iters);
3834} 3878}
3835 3879
3836static void css_task_iter_advance(struct css_task_iter *it) 3880static void css_task_iter_advance(struct css_task_iter *it)
3837{ 3881{
3838 struct list_head *l = it->task_pos; 3882 struct list_head *l = it->task_pos;
3839 3883
3884 lockdep_assert_held(&css_set_rwsem);
3840 WARN_ON_ONCE(!l); 3885 WARN_ON_ONCE(!l);
3841 3886
3842 /* 3887 /*
@@ -3864,19 +3909,16 @@ static void css_task_iter_advance(struct css_task_iter *it)
3864 * css_task_iter_next() to walk through the tasks until the function 3909 * css_task_iter_next() to walk through the tasks until the function
3865 * returns NULL. On completion of iteration, css_task_iter_end() must be 3910 * returns NULL. On completion of iteration, css_task_iter_end() must be
3866 * called. 3911 * called.
3867 *
3868 * Note that this function acquires a lock which is released when the
3869 * iteration finishes. The caller can't sleep while iteration is in
3870 * progress.
3871 */ 3912 */
3872void css_task_iter_start(struct cgroup_subsys_state *css, 3913void css_task_iter_start(struct cgroup_subsys_state *css,
3873 struct css_task_iter *it) 3914 struct css_task_iter *it)
3874 __acquires(css_set_rwsem)
3875{ 3915{
3876 /* no one should try to iterate before mounting cgroups */ 3916 /* no one should try to iterate before mounting cgroups */
3877 WARN_ON_ONCE(!use_task_css_set_links); 3917 WARN_ON_ONCE(!use_task_css_set_links);
3878 3918
3879 down_read(&css_set_rwsem); 3919 memset(it, 0, sizeof(*it));
3920
3921 down_write(&css_set_rwsem);
3880 3922
3881 it->ss = css->ss; 3923 it->ss = css->ss;
3882 3924
@@ -3888,6 +3930,8 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
3888 it->cset_head = it->cset_pos; 3930 it->cset_head = it->cset_pos;
3889 3931
3890 css_task_iter_advance_css_set(it); 3932 css_task_iter_advance_css_set(it);
3933
3934 up_write(&css_set_rwsem);
3891} 3935}
3892 3936
3893/** 3937/**
@@ -3900,14 +3944,22 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
3900 */ 3944 */
3901struct task_struct *css_task_iter_next(struct css_task_iter *it) 3945struct task_struct *css_task_iter_next(struct css_task_iter *it)
3902{ 3946{
3903 struct task_struct *res;
3904
3905 if (!it->cset_pos) 3947 if (!it->cset_pos)
3906 return NULL; 3948 return NULL;
3907 3949
3908 res = list_entry(it->task_pos, struct task_struct, cg_list); 3950 if (it->cur_task)
3951 put_task_struct(it->cur_task);
3952
3953 down_write(&css_set_rwsem);
3954
3955 it->cur_task = list_entry(it->task_pos, struct task_struct, cg_list);
3956 get_task_struct(it->cur_task);
3957
3909 css_task_iter_advance(it); 3958 css_task_iter_advance(it);
3910 return res; 3959
3960 up_write(&css_set_rwsem);
3961
3962 return it->cur_task;
3911} 3963}
3912 3964
3913/** 3965/**
@@ -3917,9 +3969,16 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
3917 * Finish task iteration started by css_task_iter_start(). 3969 * Finish task iteration started by css_task_iter_start().
3918 */ 3970 */
3919void css_task_iter_end(struct css_task_iter *it) 3971void css_task_iter_end(struct css_task_iter *it)
3920 __releases(css_set_rwsem)
3921{ 3972{
3922 up_read(&css_set_rwsem); 3973 if (it->cur_cset) {
3974 down_write(&css_set_rwsem);
3975 list_del(&it->iters_node);
3976 put_css_set_locked(it->cur_cset);
3977 up_write(&css_set_rwsem);
3978 }
3979
3980 if (it->cur_task)
3981 put_task_struct(it->cur_task);
3923} 3982}
3924 3983
3925/** 3984/**