diff options
-rw-r--r-- | include/linux/cgroup.h | 44 | ||||
-rw-r--r-- | kernel/cgroup.c | 62 |
2 files changed, 63 insertions, 43 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5375582ea5f6..f2ff578fc03a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -764,14 +764,14 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); | |||
764 | * @pos: the css * to use as the loop cursor | 764 | * @pos: the css * to use as the loop cursor |
765 | * @parent: css whose children to walk | 765 | * @parent: css whose children to walk |
766 | * | 766 | * |
767 | * Walk @parent's children. Must be called under rcu_read_lock(). A child | 767 | * Walk @parent's children. Must be called under rcu_read_lock(). |
768 | * css which hasn't finished ->css_online() or already has finished | ||
769 | * ->css_offline() may show up during traversal and it's each subsystem's | ||
770 | * responsibility to verify that each @pos is alive. | ||
771 | * | 768 | * |
772 | * If a subsystem synchronizes against the parent in its ->css_online() and | 769 | * If a subsystem synchronizes ->css_online() and the start of iteration, a |
773 | * before starting iterating, a css which finished ->css_online() is | 770 | * css which finished ->css_online() is guaranteed to be visible in the |
774 | * guaranteed to be visible in the future iterations. | 771 | * future iterations and will stay visible until the last reference is put. |
772 | * A css which hasn't finished ->css_online() or already finished | ||
773 | * ->css_offline() may show up during traversal. It's each subsystem's | ||
774 | * responsibility to synchronize against on/offlining. | ||
775 | * | 775 | * |
776 | * It is allowed to temporarily drop RCU read lock during iteration. The | 776 | * It is allowed to temporarily drop RCU read lock during iteration. The |
777 | * caller is responsible for ensuring that @pos remains accessible until | 777 | * caller is responsible for ensuring that @pos remains accessible until |
@@ -794,17 +794,16 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos); | |||
794 | * @root: css whose descendants to walk | 794 | * @root: css whose descendants to walk |
795 | * | 795 | * |
796 | * Walk @root's descendants. @root is included in the iteration and the | 796 | * Walk @root's descendants. @root is included in the iteration and the |
797 | * first node to be visited. Must be called under rcu_read_lock(). A | 797 | * first node to be visited. Must be called under rcu_read_lock(). |
798 | * descendant css which hasn't finished ->css_online() or already has | ||
799 | * finished ->css_offline() may show up during traversal and it's each | ||
800 | * subsystem's responsibility to verify that each @pos is alive. | ||
801 | * | 798 | * |
802 | * If a subsystem synchronizes against the parent in its ->css_online() and | 799 | * If a subsystem synchronizes ->css_online() and the start of iteration, a |
803 | * before starting iterating, and synchronizes against @pos on each | 800 | * css which finished ->css_online() is guaranteed to be visible in the |
804 | * iteration, any descendant css which finished ->css_online() is | 801 | * future iterations and will stay visible until the last reference is put. |
805 | * guaranteed to be visible in the future iterations. | 802 | * A css which hasn't finished ->css_online() or already finished |
803 | * ->css_offline() may show up during traversal. It's each subsystem's | ||
804 | * responsibility to synchronize against on/offlining. | ||
806 | * | 805 | * |
807 | * In other words, the following guarantees that a descendant can't escape | 806 | * For example, the following guarantees that a descendant can't escape |
808 | * state updates of its ancestors. | 807 | * state updates of its ancestors. |
809 | * | 808 | * |
810 | * my_online(@css) | 809 | * my_online(@css) |
@@ -860,8 +859,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, | |||
860 | * | 859 | * |
861 | * Similar to css_for_each_descendant_pre() but performs post-order | 860 | * Similar to css_for_each_descendant_pre() but performs post-order |
862 | * traversal instead. @root is included in the iteration and the last | 861 | * traversal instead. @root is included in the iteration and the last |
863 | * node to be visited. Note that the walk visibility guarantee described | 862 | * node to be visited. |
864 | * in pre-order walk doesn't apply the same to post-order walks. | 863 | * |
864 | * If a subsystem synchronizes ->css_online() and the start of iteration, a | ||
865 | * css which finished ->css_online() is guaranteed to be visible in the | ||
866 | * future iterations and will stay visible until the last reference is put. | ||
867 | * A css which hasn't finished ->css_online() or already finished | ||
868 | * ->css_offline() may show up during traversal. It's each subsystem's | ||
869 | * responsibility to synchronize against on/offlining. | ||
870 | * | ||
871 | * Note that the walk visibility guarantee example described in pre-order | ||
872 | * walk doesn't apply the same to post-order walks. | ||
865 | */ | 873 | */ |
866 | #define css_for_each_descendant_post(pos, css) \ | 874 | #define css_for_each_descendant_post(pos, css) \ |
867 | for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ | 875 | for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5544e685f2da..097a1fc1e1e8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -3089,21 +3089,25 @@ static int cgroup_task_count(const struct cgroup *cgrp) | |||
3089 | 3089 | ||
3090 | /** | 3090 | /** |
3091 | * css_next_child - find the next child of a given css | 3091 | * css_next_child - find the next child of a given css |
3092 | * @pos_css: the current position (%NULL to initiate traversal) | 3092 | * @pos: the current position (%NULL to initiate traversal) |
3093 | * @parent_css: css whose children to walk | 3093 | * @parent: css whose children to walk |
3094 | * | 3094 | * |
3095 | * This function returns the next child of @parent_css and should be called | 3095 | * This function returns the next child of @parent and should be called |
3096 | * under either cgroup_mutex or RCU read lock. The only requirement is | 3096 | * under either cgroup_mutex or RCU read lock. The only requirement is |
3097 | * that @parent_css and @pos_css are accessible. The next sibling is | 3097 | * that @parent and @pos are accessible. The next sibling is guaranteed to |
3098 | * guaranteed to be returned regardless of their states. | 3098 | * be returned regardless of their states. |
3099 | * | ||
3100 | * If a subsystem synchronizes ->css_online() and the start of iteration, a | ||
3101 | * css which finished ->css_online() is guaranteed to be visible in the | ||
3102 | * future iterations and will stay visible until the last reference is put. | ||
3103 | * A css which hasn't finished ->css_online() or already finished | ||
3104 | * ->css_offline() may show up during traversal. It's each subsystem's | ||
3105 | * responsibility to synchronize against on/offlining. | ||
3099 | */ | 3106 | */ |
3100 | struct cgroup_subsys_state * | 3107 | struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, |
3101 | css_next_child(struct cgroup_subsys_state *pos_css, | 3108 | struct cgroup_subsys_state *parent) |
3102 | struct cgroup_subsys_state *parent_css) | ||
3103 | { | 3109 | { |
3104 | struct cgroup *pos = pos_css ? pos_css->cgroup : NULL; | 3110 | struct cgroup_subsys_state *next; |
3105 | struct cgroup *cgrp = parent_css->cgroup; | ||
3106 | struct cgroup *next; | ||
3107 | 3111 | ||
3108 | cgroup_assert_mutex_or_rcu_locked(); | 3112 | cgroup_assert_mutex_or_rcu_locked(); |
3109 | 3113 | ||
@@ -3128,27 +3132,21 @@ css_next_child(struct cgroup_subsys_state *pos_css, | |||
3128 | * races against release and the race window is very small. | 3132 | * races against release and the race window is very small. |
3129 | */ | 3133 | */ |
3130 | if (!pos) { | 3134 | if (!pos) { |
3131 | next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling); | 3135 | next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling); |
3132 | } else if (likely(!(pos->self.flags & CSS_RELEASED))) { | 3136 | } else if (likely(!(pos->flags & CSS_RELEASED))) { |
3133 | next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling); | 3137 | next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling); |
3134 | } else { | 3138 | } else { |
3135 | list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling) | 3139 | list_for_each_entry_rcu(next, &parent->children, sibling) |
3136 | if (next->self.serial_nr > pos->self.serial_nr) | 3140 | if (next->serial_nr > pos->serial_nr) |
3137 | break; | 3141 | break; |
3138 | } | 3142 | } |
3139 | 3143 | ||
3140 | /* | 3144 | /* |
3141 | * @next, if not pointing to the head, can be dereferenced and is | 3145 | * @next, if not pointing to the head, can be dereferenced and is |
3142 | * the next sibling; however, it might have @ss disabled. If so, | 3146 | * the next sibling. |
3143 | * fast-forward to the next enabled one. | ||
3144 | */ | 3147 | */ |
3145 | while (&next->self.sibling != &cgrp->self.children) { | 3148 | if (&next->sibling != &parent->children) |
3146 | struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss); | 3149 | return next; |
3147 | |||
3148 | if (next_css) | ||
3149 | return next_css; | ||
3150 | next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling); | ||
3151 | } | ||
3152 | return NULL; | 3150 | return NULL; |
3153 | } | 3151 | } |
3154 | 3152 | ||
@@ -3165,6 +3163,13 @@ css_next_child(struct cgroup_subsys_state *pos_css, | |||
3165 | * doesn't require the whole traversal to be contained in a single critical | 3163 | * doesn't require the whole traversal to be contained in a single critical |
3166 | * section. This function will return the correct next descendant as long | 3164 | * section. This function will return the correct next descendant as long |
3167 | * as both @pos and @root are accessible and @pos is a descendant of @root. | 3165 | * as both @pos and @root are accessible and @pos is a descendant of @root. |
3166 | * | ||
3167 | * If a subsystem synchronizes ->css_online() and the start of iteration, a | ||
3168 | * css which finished ->css_online() is guaranteed to be visible in the | ||
3169 | * future iterations and will stay visible until the last reference is put. | ||
3170 | * A css which hasn't finished ->css_online() or already finished | ||
3171 | * ->css_offline() may show up during traversal. It's each subsystem's | ||
3172 | * responsibility to synchronize against on/offlining. | ||
3168 | */ | 3173 | */ |
3169 | struct cgroup_subsys_state * | 3174 | struct cgroup_subsys_state * |
3170 | css_next_descendant_pre(struct cgroup_subsys_state *pos, | 3175 | css_next_descendant_pre(struct cgroup_subsys_state *pos, |
@@ -3252,6 +3257,13 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos) | |||
3252 | * section. This function will return the correct next descendant as long | 3257 | * section. This function will return the correct next descendant as long |
3253 | * as both @pos and @cgroup are accessible and @pos is a descendant of | 3258 | * as both @pos and @cgroup are accessible and @pos is a descendant of |
3254 | * @cgroup. | 3259 | * @cgroup. |
3260 | * | ||
3261 | * If a subsystem synchronizes ->css_online() and the start of iteration, a | ||
3262 | * css which finished ->css_online() is guaranteed to be visible in the | ||
3263 | * future iterations and will stay visible until the last reference is put. | ||
3264 | * A css which hasn't finished ->css_online() or already finished | ||
3265 | * ->css_offline() may show up during traversal. It's each subsystem's | ||
3266 | * responsibility to synchronize against on/offlining. | ||
3255 | */ | 3267 | */ |
3256 | struct cgroup_subsys_state * | 3268 | struct cgroup_subsys_state * |
3257 | css_next_descendant_post(struct cgroup_subsys_state *pos, | 3269 | css_next_descendant_post(struct cgroup_subsys_state *pos, |