diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 41 |
1 files changed, 20 insertions, 21 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d5af128ec1ec..5544e685f2da 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -3108,27 +3108,28 @@ css_next_child(struct cgroup_subsys_state *pos_css, | |||
3108 | cgroup_assert_mutex_or_rcu_locked(); | 3108 | cgroup_assert_mutex_or_rcu_locked(); |
3109 | 3109 | ||
3110 | /* | 3110 | /* |
3111 | * @pos could already have been removed. Once a cgroup is removed, | 3111 | * @pos could already have been unlinked from the sibling list. |
3112 | * its ->sibling.next is no longer updated when its next sibling | 3112 | * Once a cgroup is removed, its ->sibling.next is no longer |
3113 | * changes. As CGRP_DEAD assertion is serialized and happens | 3113 | * updated when its next sibling changes. CSS_RELEASED is set when |
3114 | * before the cgroup is taken off the ->sibling list, if we see it | 3114 | * @pos is taken off list, at which time its next pointer is valid, |
3115 | * unasserted, it's guaranteed that the next sibling hasn't | 3115 | * and, as releases are serialized, the one pointed to by the next |
3116 | * finished its grace period even if it's already removed, and thus | 3116 | * pointer is guaranteed to not have started release yet. This |
3117 | * safe to dereference from this RCU critical section. If | 3117 | * implies that if we observe !CSS_RELEASED on @pos in this RCU |
3118 | * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed | 3118 | * critical section, the one pointed to by its next pointer is |
3119 | * to be visible as %true here. | 3119 | * guaranteed to not have finished its RCU grace period even if we |
3120 | * have dropped rcu_read_lock() inbetween iterations. | ||
3120 | * | 3121 | * |
3121 | * If @pos is dead, its next pointer can't be dereferenced; | 3122 | * If @pos has CSS_RELEASED set, its next pointer can't be |
3122 | * however, as each cgroup is given a monotonically increasing | 3123 | * dereferenced; however, as each css is given a monotonically |
3123 | * unique serial number and always appended to the sibling list, | 3124 | * increasing unique serial number and always appended to the |
3124 | * the next one can be found by walking the parent's children until | 3125 | * sibling list, the next one can be found by walking the parent's |
3125 | * we see a cgroup with higher serial number than @pos's. While | 3126 | * children until the first css with higher serial number than |
3126 | * this path can be slower, it's taken only when either the current | 3127 | * @pos's. While this path can be slower, it happens iff iteration |
3127 | * cgroup is removed or iteration and removal race. | 3128 | * races against release and the race window is very small. |
3128 | */ | 3129 | */ |
3129 | if (!pos) { | 3130 | if (!pos) { |
3130 | next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling); | 3131 | next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling); |
3131 | } else if (likely(!cgroup_is_dead(pos))) { | 3132 | } else if (likely(!(pos->self.flags & CSS_RELEASED))) { |
3132 | next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling); | 3133 | next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling); |
3133 | } else { | 3134 | } else { |
3134 | list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling) | 3135 | list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling) |
@@ -4139,6 +4140,7 @@ static void css_release_work_fn(struct work_struct *work) | |||
4139 | 4140 | ||
4140 | mutex_lock(&cgroup_mutex); | 4141 | mutex_lock(&cgroup_mutex); |
4141 | 4142 | ||
4143 | css->flags |= CSS_RELEASED; | ||
4142 | list_del_rcu(&css->sibling); | 4144 | list_del_rcu(&css->sibling); |
4143 | 4145 | ||
4144 | if (ss) { | 4146 | if (ss) { |
@@ -4525,10 +4527,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4525 | 4527 | ||
4526 | /* | 4528 | /* |
4527 | * Mark @cgrp dead. This prevents further task migration and child | 4529 | * Mark @cgrp dead. This prevents further task migration and child |
4528 | * creation by disabling cgroup_lock_live_group(). Note that | 4530 | * creation by disabling cgroup_lock_live_group(). |
4529 | * CGRP_DEAD assertion is depended upon by css_next_child() to | ||
4530 | * resume iteration after dropping RCU read lock. See | ||
4531 | * css_next_child() for details. | ||
4532 | */ | 4531 | */ |
4533 | set_bit(CGRP_DEAD, &cgrp->flags); | 4532 | set_bit(CGRP_DEAD, &cgrp->flags); |
4534 | 4533 | ||