aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/cgroup.h
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-05-23 21:55:38 -0400
committerTejun Heo <tj@kernel.org>2013-05-23 21:55:38 -0400
commit53fa5261747a90746531e8a1c81eeb78fedc2f71 (patch)
treeb615f4ac453b9f40d412b5ba22498b827902f438 /include/linux/cgroup.h
parentbdc7119f1bdd0632d42f435941dc290216a436e7 (diff)
cgroup: add cgroup->serial_nr and implement cgroup_next_sibling()
Currently, there's no easy way to find out the next sibling cgroup unless it's known that the current cgroup is accessed from the parent's children list in a single RCU critical section. This in turn forces all iterators to require whole iteration to be enclosed in a single RCU critical section, which sometimes is too restrictive. This patch implements cgroup_next_sibling() which can reliably determine the next sibling regardless of the state of the current cgroup as long as it's accessible. It currently is impossible to determine the next sibling after dropping RCU read lock because the cgroup being iterated could be removed anytime and if RCU read lock is dropped, nothing guarantess its ->sibling.next pointer is accessible. A removed cgroup would continue to point to its next sibling for RCU accesses but stop receiving updates from the sibling. IOW, the next sibling could be removed and then complete its grace period while RCU read lock is dropped, making it unsafe to dereference ->sibling.next after dropping and re-acquiring RCU read lock. This can be solved by adding a way to traverse to the next sibling without dereferencing ->sibling.next. This patch adds a monotonically increasing cgroup serial number, cgroup->serial_nr, which guarantees that all cgroup->children lists are kept in increasing serial_nr order. A new function, cgroup_next_sibling(), is implemented, which, if CGRP_REMOVED is not set on the current cgroup, follows ->sibling.next; otherwise, traverses the parent's ->children list until it sees a sibling with higher ->serial_nr. This allows the function to always return the next sibling regardless of the state of the current cgroup without adding overhead in the fast path. Further patches will update the iterators to use cgroup_next_sibling() so that they allow dropping RCU read lock and blocking while iteration is in progress which in turn will be used to simplify controllers. v2: Typo fix as per Serge. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r--include/linux/cgroup.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8d9f3c911fca..ee041a01a67e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -189,6 +189,14 @@ struct cgroup {
189 struct dentry *dentry; /* cgroup fs entry, RCU protected */ 189 struct dentry *dentry; /* cgroup fs entry, RCU protected */
190 190
191 /* 191 /*
192 * Monotonically increasing unique serial number which defines a
193 * uniform order among all cgroups. It's guaranteed that all
194 * ->children lists are in the ascending order of ->serial_nr.
195 * It's used to allow interrupting and resuming iterations.
196 */
197 u64 serial_nr;
198
199 /*
192 * This is a copy of dentry->d_name, and it's needed because 200 * This is a copy of dentry->d_name, and it's needed because
193 * we can't use dentry->d_name in cgroup_path(). 201 * we can't use dentry->d_name in cgroup_path().
194 * 202 *
@@ -675,6 +683,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
675 return task_subsys_state(task, subsys_id)->cgroup; 683 return task_subsys_state(task, subsys_id)->cgroup;
676} 684}
677 685
686struct cgroup *cgroup_next_sibling(struct cgroup *pos);
687
678/** 688/**
679 * cgroup_for_each_child - iterate through children of a cgroup 689 * cgroup_for_each_child - iterate through children of a cgroup
680 * @pos: the cgroup * to use as the loop cursor 690 * @pos: the cgroup * to use as the loop cursor