aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h44
-rw-r--r--kernel/cgroup.c62
2 files changed, 63 insertions, 43 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 5375582ea5f6..f2ff578fc03a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -764,14 +764,14 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
764 * @pos: the css * to use as the loop cursor 764 * @pos: the css * to use as the loop cursor
765 * @parent: css whose children to walk 765 * @parent: css whose children to walk
766 * 766 *
767 * Walk @parent's children. Must be called under rcu_read_lock(). A child 767 * Walk @parent's children. Must be called under rcu_read_lock().
768 * css which hasn't finished ->css_online() or already has finished
769 * ->css_offline() may show up during traversal and it's each subsystem's
770 * responsibility to verify that each @pos is alive.
771 * 768 *
772 * If a subsystem synchronizes against the parent in its ->css_online() and 769 * If a subsystem synchronizes ->css_online() and the start of iteration, a
773 * before starting iterating, a css which finished ->css_online() is 770 * css which finished ->css_online() is guaranteed to be visible in the
774 * guaranteed to be visible in the future iterations. 771 * future iterations and will stay visible until the last reference is put.
772 * A css which hasn't finished ->css_online() or already finished
773 * ->css_offline() may show up during traversal. It's each subsystem's
774 * responsibility to synchronize against on/offlining.
775 * 775 *
776 * It is allowed to temporarily drop RCU read lock during iteration. The 776 * It is allowed to temporarily drop RCU read lock during iteration. The
777 * caller is responsible for ensuring that @pos remains accessible until 777 * caller is responsible for ensuring that @pos remains accessible until
@@ -794,17 +794,16 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos);
794 * @root: css whose descendants to walk 794 * @root: css whose descendants to walk
795 * 795 *
796 * Walk @root's descendants. @root is included in the iteration and the 796 * Walk @root's descendants. @root is included in the iteration and the
797 * first node to be visited. Must be called under rcu_read_lock(). A 797 * first node to be visited. Must be called under rcu_read_lock().
798 * descendant css which hasn't finished ->css_online() or already has
799 * finished ->css_offline() may show up during traversal and it's each
800 * subsystem's responsibility to verify that each @pos is alive.
801 * 798 *
802 * If a subsystem synchronizes against the parent in its ->css_online() and 799 * If a subsystem synchronizes ->css_online() and the start of iteration, a
803 * before starting iterating, and synchronizes against @pos on each 800 * css which finished ->css_online() is guaranteed to be visible in the
804 * iteration, any descendant css which finished ->css_online() is 801 * future iterations and will stay visible until the last reference is put.
805 * guaranteed to be visible in the future iterations. 802 * A css which hasn't finished ->css_online() or already finished
803 * ->css_offline() may show up during traversal. It's each subsystem's
804 * responsibility to synchronize against on/offlining.
806 * 805 *
807 * In other words, the following guarantees that a descendant can't escape 806 * For example, the following guarantees that a descendant can't escape
808 * state updates of its ancestors. 807 * state updates of its ancestors.
809 * 808 *
810 * my_online(@css) 809 * my_online(@css)
@@ -860,8 +859,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
860 * 859 *
861 * Similar to css_for_each_descendant_pre() but performs post-order 860 * Similar to css_for_each_descendant_pre() but performs post-order
862 * traversal instead. @root is included in the iteration and the last 861 * traversal instead. @root is included in the iteration and the last
863 * node to be visited. Note that the walk visibility guarantee described 862 * node to be visited.
864 * in pre-order walk doesn't apply the same to post-order walks. 863 *
864 * If a subsystem synchronizes ->css_online() and the start of iteration, a
865 * css which finished ->css_online() is guaranteed to be visible in the
866 * future iterations and will stay visible until the last reference is put.
867 * A css which hasn't finished ->css_online() or already finished
868 * ->css_offline() may show up during traversal. It's each subsystem's
869 * responsibility to synchronize against on/offlining.
870 *
871 * Note that the walk visibility guarantee example described in pre-order
872 * walk doesn't apply the same to post-order walks.
865 */ 873 */
866#define css_for_each_descendant_post(pos, css) \ 874#define css_for_each_descendant_post(pos, css) \
867 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ 875 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5544e685f2da..097a1fc1e1e8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3089,21 +3089,25 @@ static int cgroup_task_count(const struct cgroup *cgrp)
3089 3089
3090/** 3090/**
3091 * css_next_child - find the next child of a given css 3091 * css_next_child - find the next child of a given css
3092 * @pos_css: the current position (%NULL to initiate traversal) 3092 * @pos: the current position (%NULL to initiate traversal)
3093 * @parent_css: css whose children to walk 3093 * @parent: css whose children to walk
3094 * 3094 *
3095 * This function returns the next child of @parent_css and should be called 3095 * This function returns the next child of @parent and should be called
3096 * under either cgroup_mutex or RCU read lock. The only requirement is 3096 * under either cgroup_mutex or RCU read lock. The only requirement is
3097 * that @parent_css and @pos_css are accessible. The next sibling is 3097 * that @parent and @pos are accessible. The next sibling is guaranteed to
3098 * guaranteed to be returned regardless of their states. 3098 * be returned regardless of their states.
3099 *
3100 * If a subsystem synchronizes ->css_online() and the start of iteration, a
3101 * css which finished ->css_online() is guaranteed to be visible in the
3102 * future iterations and will stay visible until the last reference is put.
3103 * A css which hasn't finished ->css_online() or already finished
3104 * ->css_offline() may show up during traversal. It's each subsystem's
3105 * responsibility to synchronize against on/offlining.
3099 */ 3106 */
3100struct cgroup_subsys_state * 3107struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
3101css_next_child(struct cgroup_subsys_state *pos_css, 3108 struct cgroup_subsys_state *parent)
3102 struct cgroup_subsys_state *parent_css)
3103{ 3109{
3104 struct cgroup *pos = pos_css ? pos_css->cgroup : NULL; 3110 struct cgroup_subsys_state *next;
3105 struct cgroup *cgrp = parent_css->cgroup;
3106 struct cgroup *next;
3107 3111
3108 cgroup_assert_mutex_or_rcu_locked(); 3112 cgroup_assert_mutex_or_rcu_locked();
3109 3113
@@ -3128,27 +3132,21 @@ css_next_child(struct cgroup_subsys_state *pos_css,
3128 * races against release and the race window is very small. 3132 * races against release and the race window is very small.
3129 */ 3133 */
3130 if (!pos) { 3134 if (!pos) {
3131 next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling); 3135 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
3132 } else if (likely(!(pos->self.flags & CSS_RELEASED))) { 3136 } else if (likely(!(pos->flags & CSS_RELEASED))) {
3133 next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling); 3137 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
3134 } else { 3138 } else {
3135 list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling) 3139 list_for_each_entry_rcu(next, &parent->children, sibling)
3136 if (next->self.serial_nr > pos->self.serial_nr) 3140 if (next->serial_nr > pos->serial_nr)
3137 break; 3141 break;
3138 } 3142 }
3139 3143
3140 /* 3144 /*
3141 * @next, if not pointing to the head, can be dereferenced and is 3145 * @next, if not pointing to the head, can be dereferenced and is
3142 * the next sibling; however, it might have @ss disabled. If so, 3146 * the next sibling.
3143 * fast-forward to the next enabled one.
3144 */ 3147 */
3145 while (&next->self.sibling != &cgrp->self.children) { 3148 if (&next->sibling != &parent->children)
3146 struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss); 3149 return next;
3147
3148 if (next_css)
3149 return next_css;
3150 next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling);
3151 }
3152 return NULL; 3150 return NULL;
3153} 3151}
3154 3152
@@ -3165,6 +3163,13 @@ css_next_child(struct cgroup_subsys_state *pos_css,
3165 * doesn't require the whole traversal to be contained in a single critical 3163 * doesn't require the whole traversal to be contained in a single critical
3166 * section. This function will return the correct next descendant as long 3164 * section. This function will return the correct next descendant as long
3167 * as both @pos and @root are accessible and @pos is a descendant of @root. 3165 * as both @pos and @root are accessible and @pos is a descendant of @root.
3166 *
3167 * If a subsystem synchronizes ->css_online() and the start of iteration, a
3168 * css which finished ->css_online() is guaranteed to be visible in the
3169 * future iterations and will stay visible until the last reference is put.
3170 * A css which hasn't finished ->css_online() or already finished
3171 * ->css_offline() may show up during traversal. It's each subsystem's
3172 * responsibility to synchronize against on/offlining.
3168 */ 3173 */
3169struct cgroup_subsys_state * 3174struct cgroup_subsys_state *
3170css_next_descendant_pre(struct cgroup_subsys_state *pos, 3175css_next_descendant_pre(struct cgroup_subsys_state *pos,
@@ -3252,6 +3257,13 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos)
3252 * section. This function will return the correct next descendant as long 3257 * section. This function will return the correct next descendant as long
3253 * as both @pos and @cgroup are accessible and @pos is a descendant of 3258 * as both @pos and @cgroup are accessible and @pos is a descendant of
3254 * @cgroup. 3259 * @cgroup.
3260 *
3261 * If a subsystem synchronizes ->css_online() and the start of iteration, a
3262 * css which finished ->css_online() is guaranteed to be visible in the
3263 * future iterations and will stay visible until the last reference is put.
3264 * A css which hasn't finished ->css_online() or already finished
3265 * ->css_offline() may show up during traversal. It's each subsystem's
3266 * responsibility to synchronize against on/offlining.
3255 */ 3267 */
3256struct cgroup_subsys_state * 3268struct cgroup_subsys_state *
3257css_next_descendant_post(struct cgroup_subsys_state *pos, 3269css_next_descendant_post(struct cgroup_subsys_state *pos,