aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDaniel Lezcano <daniel.lezcano@free.fr>2011-05-26 19:25:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 20:12:34 -0400
commita77aea92010acf54ad785047234418d5d68772e2 (patch)
treec7cb57b62fd02bee2baceb79251923f7caec6139 /kernel
parentd846687d7f84e45f23ecf3846dbb43312a1206dd (diff)
cgroup: remove the ns_cgroup
The ns_cgroup is an annoying cgroup at the namespace / cgroup frontier and leads to some problems: * cgroup creation is out-of-control * cgroup name can conflict when pids are looping * it is not possible to have a single process handling a lot of namespaces without falling in a exponential creation time * we may want to create a namespace without creating a cgroup The ns_cgroup was replaced by a compatibility flag 'clone_children', where a newly created cgroup will copy the parent cgroup values. The userspace has to manually create a cgroup and add a task to the 'tasks' file. This patch removes the ns_cgroup as suggested in the following thread: https://lists.linux-foundation.org/pipermail/containers/2009-June/018616.html The 'cgroup_clone' function is removed because it is no longer used. This is a userspace-visible change. Commit 45531757b45c ("cgroup: notify ns_cgroup deprecated") (merged into 2.6.27) caused the kernel to emit a printk warning users that the feature is planned for removal. Since that time we have heard from XXX users who were affected by this. Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr> Signed-off-by: Serge E. Hallyn <serge.hallyn@canonical.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Jamal Hadi Salim <hadi@cyberus.ca> Reviewed-by: Li Zefan <lizf@cn.fujitsu.com> Acked-by: Paul Menage <menage@google.com> Acked-by: Matt Helsley <matthltc@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cgroup.c116
-rw-r--r--kernel/cpuset.c7
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/ns_cgroup.c118
-rw-r--r--kernel/nsproxy.c4
6 files changed, 3 insertions, 249 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index e9cf19155b46..2d64cfcc8b42 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -61,7 +61,6 @@ obj-$(CONFIG_COMPAT) += compat.o
61obj-$(CONFIG_CGROUPS) += cgroup.o 61obj-$(CONFIG_CGROUPS) += cgroup.o
62obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o 62obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
63obj-$(CONFIG_CPUSETS) += cpuset.o 63obj-$(CONFIG_CPUSETS) += cpuset.o
64obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
65obj-$(CONFIG_UTS_NS) += utsname.o 64obj-$(CONFIG_UTS_NS) += utsname.o
66obj-$(CONFIG_USER_NS) += user_namespace.o 65obj-$(CONFIG_USER_NS) += user_namespace.o
67obj-$(CONFIG_PID_NS) += pid_namespace.o 66obj-$(CONFIG_PID_NS) += pid_namespace.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 00a884342d3d..2731d115d725 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4630,122 +4630,6 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4630} 4630}
4631 4631
4632/** 4632/**
4633 * cgroup_clone - clone the cgroup the given subsystem is attached to
4634 * @tsk: the task to be moved
4635 * @subsys: the given subsystem
4636 * @nodename: the name for the new cgroup
4637 *
4638 * Duplicate the current cgroup in the hierarchy that the given
4639 * subsystem is attached to, and move this task into the new
4640 * child.
4641 */
4642int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
4643 char *nodename)
4644{
4645 struct dentry *dentry;
4646 int ret = 0;
4647 struct cgroup *parent, *child;
4648 struct inode *inode;
4649 struct css_set *cg;
4650 struct cgroupfs_root *root;
4651 struct cgroup_subsys *ss;
4652
4653 /* We shouldn't be called by an unregistered subsystem */
4654 BUG_ON(!subsys->active);
4655
4656 /* First figure out what hierarchy and cgroup we're dealing
4657 * with, and pin them so we can drop cgroup_mutex */
4658 mutex_lock(&cgroup_mutex);
4659 again:
4660 root = subsys->root;
4661 if (root == &rootnode) {
4662 mutex_unlock(&cgroup_mutex);
4663 return 0;
4664 }
4665
4666 /* Pin the hierarchy */
4667 if (!atomic_inc_not_zero(&root->sb->s_active)) {
4668 /* We race with the final deactivate_super() */
4669 mutex_unlock(&cgroup_mutex);
4670 return 0;
4671 }
4672
4673 /* Keep the cgroup alive */
4674 task_lock(tsk);
4675 parent = task_cgroup(tsk, subsys->subsys_id);
4676 cg = tsk->cgroups;
4677 get_css_set(cg);
4678 task_unlock(tsk);
4679
4680 mutex_unlock(&cgroup_mutex);
4681
4682 /* Now do the VFS work to create a cgroup */
4683 inode = parent->dentry->d_inode;
4684
4685 /* Hold the parent directory mutex across this operation to
4686 * stop anyone else deleting the new cgroup */
4687 mutex_lock(&inode->i_mutex);
4688 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
4689 if (IS_ERR(dentry)) {
4690 printk(KERN_INFO
4691 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
4692 PTR_ERR(dentry));
4693 ret = PTR_ERR(dentry);
4694 goto out_release;
4695 }
4696
4697 /* Create the cgroup directory, which also creates the cgroup */
4698 ret = vfs_mkdir(inode, dentry, 0755);
4699 child = __d_cgrp(dentry);
4700 dput(dentry);
4701 if (ret) {
4702 printk(KERN_INFO
4703 "Failed to create cgroup %s: %d\n", nodename,
4704 ret);
4705 goto out_release;
4706 }
4707
4708 /* The cgroup now exists. Retake cgroup_mutex and check
4709 * that we're still in the same state that we thought we
4710 * were. */
4711 mutex_lock(&cgroup_mutex);
4712 if ((root != subsys->root) ||
4713 (parent != task_cgroup(tsk, subsys->subsys_id))) {
4714 /* Aargh, we raced ... */
4715 mutex_unlock(&inode->i_mutex);
4716 put_css_set(cg);
4717
4718 deactivate_super(root->sb);
4719 /* The cgroup is still accessible in the VFS, but
4720 * we're not going to try to rmdir() it at this
4721 * point. */
4722 printk(KERN_INFO
4723 "Race in cgroup_clone() - leaking cgroup %s\n",
4724 nodename);
4725 goto again;
4726 }
4727
4728 /* do any required auto-setup */
4729 for_each_subsys(root, ss) {
4730 if (ss->post_clone)
4731 ss->post_clone(ss, child);
4732 }
4733
4734 /* All seems fine. Finish by moving the task into the new cgroup */
4735 ret = cgroup_attach_task(child, tsk);
4736 mutex_unlock(&cgroup_mutex);
4737
4738 out_release:
4739 mutex_unlock(&inode->i_mutex);
4740
4741 mutex_lock(&cgroup_mutex);
4742 put_css_set(cg);
4743 mutex_unlock(&cgroup_mutex);
4744 deactivate_super(root->sb);
4745 return ret;
4746}
4747
4748/**
4749 * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp 4633 * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
4750 * @cgrp: the cgroup in question 4634 * @cgrp: the cgroup in question
4751 * @task: the task in question 4635 * @task: the task in question
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 55b297d78adc..1ceeb049c827 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1802,10 +1802,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1802} 1802}
1803 1803
1804/* 1804/*
1805 * post_clone() is called at the end of cgroup_clone(). 1805 * post_clone() is called during cgroup_create() when the
1806 * 'cgroup' was just created automatically as a result of 1806 * clone_children mount argument was specified. The cgroup
1807 * a cgroup_clone(), and the current task is about to 1807 * can not yet have any tasks.
1808 * be moved into 'cgroup'.
1809 * 1808 *
1810 * Currently we refuse to set up the cgroup - thereby 1809 * Currently we refuse to set up the cgroup - thereby
1811 * refusing the task to be entered, and as a result refusing 1810 * refusing the task to be entered, and as a result refusing
diff --git a/kernel/fork.c b/kernel/fork.c
index 1fa9d940e301..1f84099ecce6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1229,12 +1229,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1229 if (clone_flags & CLONE_THREAD) 1229 if (clone_flags & CLONE_THREAD)
1230 p->tgid = current->tgid; 1230 p->tgid = current->tgid;
1231 1231
1232 if (current->nsproxy != p->nsproxy) {
1233 retval = ns_cgroup_clone(p, pid);
1234 if (retval)
1235 goto bad_fork_free_pid;
1236 }
1237
1238 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1232 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1239 /* 1233 /*
1240 * Clear TID on mm_release()? 1234 * Clear TID on mm_release()?
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
deleted file mode 100644
index 2c98ad94ba0e..000000000000
--- a/kernel/ns_cgroup.c
+++ /dev/null
@@ -1,118 +0,0 @@
1/*
2 * ns_cgroup.c - namespace cgroup subsystem
3 *
4 * Copyright 2006, 2007 IBM Corp
5 */
6
7#include <linux/module.h>
8#include <linux/cgroup.h>
9#include <linux/fs.h>
10#include <linux/proc_fs.h>
11#include <linux/slab.h>
12#include <linux/nsproxy.h>
13
14struct ns_cgroup {
15 struct cgroup_subsys_state css;
16};
17
18struct cgroup_subsys ns_subsys;
19
20static inline struct ns_cgroup *cgroup_to_ns(
21 struct cgroup *cgroup)
22{
23 return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
24 struct ns_cgroup, css);
25}
26
27int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
28{
29 char name[PROC_NUMBUF];
30
31 snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
32 return cgroup_clone(task, &ns_subsys, name);
33}
34
35/*
36 * Rules:
37 * 1. you can only enter a cgroup which is a descendant of your current
38 * cgroup
39 * 2. you can only place another process into a cgroup if
40 * a. you have CAP_SYS_ADMIN
41 * b. your cgroup is an ancestor of task's destination cgroup
42 * (hence either you are in the same cgroup as task, or in an
43 * ancestor cgroup thereof)
44 */
45static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
46 struct task_struct *task, bool threadgroup)
47{
48 if (current != task) {
49 if (!capable(CAP_SYS_ADMIN))
50 return -EPERM;
51
52 if (!cgroup_is_descendant(new_cgroup, current))
53 return -EPERM;
54 }
55
56 if (!cgroup_is_descendant(new_cgroup, task))
57 return -EPERM;
58
59 if (threadgroup) {
60 struct task_struct *c;
61 rcu_read_lock();
62 list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
63 if (!cgroup_is_descendant(new_cgroup, c)) {
64 rcu_read_unlock();
65 return -EPERM;
66 }
67 }
68 rcu_read_unlock();
69 }
70
71 return 0;
72}
73
74/*
75 * Rules: you can only create a cgroup if
76 * 1. you are capable(CAP_SYS_ADMIN)
77 * 2. the target cgroup is a descendant of your own cgroup
78 */
79static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
80 struct cgroup *cgroup)
81{
82 struct ns_cgroup *ns_cgroup;
83
84 if (!capable(CAP_SYS_ADMIN))
85 return ERR_PTR(-EPERM);
86 if (!cgroup_is_descendant(cgroup, current))
87 return ERR_PTR(-EPERM);
88 if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) {
89 printk("ns_cgroup can't be created with parent "
90 "'clone_children' set.\n");
91 return ERR_PTR(-EINVAL);
92 }
93
94 printk_once("ns_cgroup deprecated: consider using the "
95 "'clone_children' flag without the ns_cgroup.\n");
96
97 ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
98 if (!ns_cgroup)
99 return ERR_PTR(-ENOMEM);
100 return &ns_cgroup->css;
101}
102
103static void ns_destroy(struct cgroup_subsys *ss,
104 struct cgroup *cgroup)
105{
106 struct ns_cgroup *ns_cgroup;
107
108 ns_cgroup = cgroup_to_ns(cgroup);
109 kfree(ns_cgroup);
110}
111
112struct cgroup_subsys ns_subsys = {
113 .name = "ns",
114 .can_attach = ns_can_attach,
115 .create = ns_create,
116 .destroy = ns_destroy,
117 .subsys_id = ns_subsys_id,
118};
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 5424e37673ed..d6a00f3de15d 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -201,10 +201,6 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
201 goto out; 201 goto out;
202 } 202 }
203 203
204 err = ns_cgroup_clone(current, task_pid(current));
205 if (err)
206 put_nsproxy(*new_nsp);
207
208out: 204out:
209 return err; 205 return err;
210} 206}