aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorPaul Menage <menage@google.com>2007-10-19 02:39:33 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:36 -0400
commitb4f48b6363c81ca743ef46943ef23fd72e60f679 (patch)
tree40437b78e2d7a7d9d71e7bd63bc96e1ad02daa94 /kernel/cgroup.c
parent355e0c48b757b7fcc79ccb98fda8105ed37a1598 (diff)
Task Control Groups: add fork()/exit() hooks
This adds the necessary hooks to the fork() and exit() paths to ensure that new children inherit their parent's cgroup assignments, and that exiting processes release reference counts on their cgroups. Signed-off-by: Paul Menage <menage@google.com> Cc: Serge E. Hallyn <serue@us.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Jackson <pj@sgi.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c121
1 files changed, 121 insertions, 0 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f4c4dce9558f..7bb520aaf0a3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -132,6 +132,33 @@ list_for_each_entry(_ss, &_root->subsys_list, sibling)
132#define for_each_root(_root) \ 132#define for_each_root(_root) \
133list_for_each_entry(_root, &roots, root_list) 133list_for_each_entry(_root, &roots, root_list)
134 134
135/* Each task_struct has an embedded css_set, so the get/put
136 * operation simply takes a reference count on all the cgroups
137 * referenced by subsystems in this css_set. This can end up
138 * multiple-counting some cgroups, but that's OK - the ref-count is
139 * just a busy/not-busy indicator; ensuring that we only count each
140 * cgroup once would require taking a global lock to ensure that no
141 * subsystems moved between hierarchies while we were doing so.
142 *
143 * Possible TODO: decide at boot time based on the number of
144 * registered subsystems and the number of CPUs or NUMA nodes whether
145 * it's better for performance to ref-count every subsystem, or to
146 * take a global lock and only add one ref count to each hierarchy.
147 */
148static void get_css_set(struct css_set *cg)
149{
150 int i;
151 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
152 atomic_inc(&cg->subsys[i]->cgroup->count);
153}
154
155static void put_css_set(struct css_set *cg)
156{
157 int i;
158 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
159 atomic_dec(&cg->subsys[i]->cgroup->count);
160}
161
135/* 162/*
136 * There is one global cgroup mutex. We also require taking 163 * There is one global cgroup mutex. We also require taking
137 * task_lock() when dereferencing a task's cgroup subsys pointers. 164 * task_lock() when dereferencing a task's cgroup subsys pointers.
@@ -1587,3 +1614,97 @@ int __init cgroup_init(void)
1587out: 1614out:
1588 return err; 1615 return err;
1589} 1616}
1617
1618/**
1619 * cgroup_fork - attach newly forked task to its parents cgroup.
1620 * @tsk: pointer to task_struct of forking parent process.
1621 *
1622 * Description: A task inherits its parent's cgroup at fork().
1623 *
1624 * A pointer to the shared css_set was automatically copied in
1625 * fork.c by dup_task_struct(). However, we ignore that copy, since
1626 * it was not made under the protection of RCU or cgroup_mutex, so
1627 * might no longer be a valid cgroup pointer. attach_task() might
1628 * have already changed current->cgroup, allowing the previously
1629 * referenced cgroup to be removed and freed.
1630 *
1631 * At the point that cgroup_fork() is called, 'current' is the parent
1632 * task, and the passed argument 'child' points to the child task.
1633 */
1634void cgroup_fork(struct task_struct *child)
1635{
1636 rcu_read_lock();
1637 child->cgroups = rcu_dereference(current->cgroups);
1638 get_css_set(&child->cgroups);
1639 rcu_read_unlock();
1640}
1641
1642/**
1643 * cgroup_fork_callbacks - called on a new task very soon before
1644 * adding it to the tasklist. No need to take any locks since no-one
1645 * can be operating on this task
1646 */
1647void cgroup_fork_callbacks(struct task_struct *child)
1648{
1649 if (need_forkexit_callback) {
1650 int i;
1651 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1652 struct cgroup_subsys *ss = subsys[i];
1653 if (ss->fork)
1654 ss->fork(ss, child);
1655 }
1656 }
1657}
1658
1659/**
1660 * cgroup_exit - detach cgroup from exiting task
1661 * @tsk: pointer to task_struct of exiting process
1662 *
1663 * Description: Detach cgroup from @tsk and release it.
1664 *
1665 * Note that cgroups marked notify_on_release force every task in
1666 * them to take the global cgroup_mutex mutex when exiting.
1667 * This could impact scaling on very large systems. Be reluctant to
1668 * use notify_on_release cgroups where very high task exit scaling
1669 * is required on large systems.
1670 *
1671 * the_top_cgroup_hack:
1672 *
1673 * Set the exiting tasks cgroup to the root cgroup (top_cgroup).
1674 *
1675 * We call cgroup_exit() while the task is still competent to
1676 * handle notify_on_release(), then leave the task attached to the
1677 * root cgroup in each hierarchy for the remainder of its exit.
1678 *
1679 * To do this properly, we would increment the reference count on
1680 * top_cgroup, and near the very end of the kernel/exit.c do_exit()
1681 * code we would add a second cgroup function call, to drop that
1682 * reference. This would just create an unnecessary hot spot on
1683 * the top_cgroup reference count, to no avail.
1684 *
1685 * Normally, holding a reference to a cgroup without bumping its
1686 * count is unsafe. The cgroup could go away, or someone could
1687 * attach us to a different cgroup, decrementing the count on
1688 * the first cgroup that we never incremented. But in this case,
1689 * top_cgroup isn't going away, and either task has PF_EXITING set,
1690 * which wards off any attach_task() attempts, or task is a failed
1691 * fork, never visible to attach_task.
1692 *
1693 */
1694void cgroup_exit(struct task_struct *tsk, int run_callbacks)
1695{
1696 int i;
1697
1698 if (run_callbacks && need_forkexit_callback) {
1699 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1700 struct cgroup_subsys *ss = subsys[i];
1701 if (ss->exit)
1702 ss->exit(ss, tsk);
1703 }
1704 }
1705 /* Reassign the task to the init_css_set. */
1706 task_lock(tsk);
1707 put_css_set(&tsk->cgroups);
1708 tsk->cgroups = init_task.cgroups;
1709 task_unlock(tsk);
1710}