aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerge E. Hallyn <serue@us.ibm.com>2007-10-19 02:39:45 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:37 -0400
commit858d72ead4864da0fb0b89b919524125ce998e27 (patch)
tree19ea321ca3b505efecb2053a829daf89a6a22529
parent846c7bb055747989891f5cd2bb6e8d56243ba1e7 (diff)
cgroups: implement namespace tracking subsystem
When a task enters a new namespace via a clone() or unshare(), a new cgroup is created and the task moves into it. This version names cgroups which are automatically created using cgroup_clone() as "node_<pid>" where pid is the pid of the unsharing or cloned process. (Thanks Pavel for the idea) This is safe because if the process unshares again, it will create /cgroups/(...)/node_<pid>/node_<pid> The only possibilities (AFAICT) for a -EEXIST on unshare are 1. pid wraparound 2. a process fails an unshare, then tries again. Case 1 is unlikely enough that I ignore it (at least for now). In case 2, the node_<pid> will be empty and can be rmdir'ed to make the subsequent unshare() succeed. Changelog: Name cloned cgroups as "node_<pid>". [clg@fr.ibm.com: fix order of cgroup subsystems in init/Kconfig] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Cc: Paul Menage <menage@google.com> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/cgroup_subsys.h6
-rw-r--r--include/linux/nsproxy.h7
-rw-r--r--init/Kconfig23
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/ns_cgroup.c100
-rw-r--r--kernel/nsproxy.c17
6 files changed, 146 insertions, 8 deletions
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index cbadc3b5dbc0..651ff0869b2d 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -24,3 +24,9 @@ SUBSYS(debug)
24#endif 24#endif
25 25
26/* */ 26/* */
27
28#ifdef CONFIG_CGROUP_NS
29SUBSYS(ns)
30#endif
31
32/* */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 033a648709b6..f1eca68751a9 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -55,4 +55,11 @@ static inline void exit_task_namespaces(struct task_struct *p)
55 put_nsproxy(ns); 55 put_nsproxy(ns);
56 } 56 }
57} 57}
58
59#ifdef CONFIG_CGROUP_NS
60int ns_cgroup_clone(struct task_struct *tsk);
61#else
62static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
63#endif
64
58#endif 65#endif
diff --git a/init/Kconfig b/init/Kconfig
index 6687f805b38d..0007d1b5e867 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -288,6 +288,22 @@ config CGROUP_DEBUG
288 288
289 Say N if unsure 289 Say N if unsure
290 290
291config CGROUP_NS
292 bool "Namespace cgroup subsystem"
293 depends on CGROUPS
294 help
295 Provides a simple namespace cgroup subsystem to
296 provide hierarchical naming of sets of namespaces,
297 for instance virtual servers and checkpoint/restart
298 jobs.
299
300config CGROUP_CPUACCT
301 bool "Simple CPU accounting cgroup subsystem"
302 depends on CGROUPS
303 help
304 Provides a simple Resource Controller for monitoring the
305 total CPU consumed by the tasks in a cgroup
306
291config CPUSETS 307config CPUSETS
292 bool "Cpuset support" 308 bool "Cpuset support"
293 depends on SMP && CGROUPS 309 depends on SMP && CGROUPS
@@ -345,13 +361,6 @@ config PROC_PID_CPUSET
345 depends on CPUSETS 361 depends on CPUSETS
346 default y 362 default y
347 363
348config CGROUP_CPUACCT
349 bool "Simple CPU accounting cgroup subsystem"
350 depends on CGROUPS
351 help
352 Provides a simple Resource Controller for monitoring the
353 total CPU consumed by the tasks in a cgroup
354
355config RELAY 364config RELAY
356 bool "Kernel->user space relay support (formerly relayfs)" 365 bool "Kernel->user space relay support (formerly relayfs)"
357 help 366 help
diff --git a/kernel/Makefile b/kernel/Makefile
index a50a6debe5fc..32b2d8bdc9f5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_CGROUPS) += cgroup.o
40obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o 40obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
41obj-$(CONFIG_CPUSETS) += cpuset.o 41obj-$(CONFIG_CPUSETS) += cpuset.o
42obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o 42obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o
43obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
43obj-$(CONFIG_IKCONFIG) += configs.o 44obj-$(CONFIG_IKCONFIG) += configs.o
44obj-$(CONFIG_STOP_MACHINE) += stop_machine.o 45obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
45obj-$(CONFIG_AUDIT) += audit.o auditfilter.o 46obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
new file mode 100644
index 000000000000..aead4d69f62b
--- /dev/null
+++ b/kernel/ns_cgroup.c
@@ -0,0 +1,100 @@
1/*
2 * ns_cgroup.c - namespace cgroup subsystem
3 *
4 * Copyright 2006, 2007 IBM Corp
5 */
6
7#include <linux/module.h>
8#include <linux/cgroup.h>
9#include <linux/fs.h>
10
11struct ns_cgroup {
12 struct cgroup_subsys_state css;
13 spinlock_t lock;
14};
15
16struct cgroup_subsys ns_subsys;
17
18static inline struct ns_cgroup *cgroup_to_ns(
19 struct cgroup *cgroup)
20{
21 return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
22 struct ns_cgroup, css);
23}
24
25int ns_cgroup_clone(struct task_struct *task)
26{
27 return cgroup_clone(task, &ns_subsys);
28}
29
30/*
31 * Rules:
32 * 1. you can only enter a cgroup which is a child of your current
33 * cgroup
34 * 2. you can only place another process into a cgroup if
35 * a. you have CAP_SYS_ADMIN
36 * b. your cgroup is an ancestor of task's destination cgroup
37 * (hence either you are in the same cgroup as task, or in an
38 * ancestor cgroup thereof)
39 */
40static int ns_can_attach(struct cgroup_subsys *ss,
41 struct cgroup *new_cgroup, struct task_struct *task)
42{
43 struct cgroup *orig;
44
45 if (current != task) {
46 if (!capable(CAP_SYS_ADMIN))
47 return -EPERM;
48
49 if (!cgroup_is_descendant(new_cgroup))
50 return -EPERM;
51 }
52
53 if (atomic_read(&new_cgroup->count) != 0)
54 return -EPERM;
55
56 orig = task_cgroup(task, ns_subsys_id);
57 if (orig && orig != new_cgroup->parent)
58 return -EPERM;
59
60 return 0;
61}
62
63/*
64 * Rules: you can only create a cgroup if
65 * 1. you are capable(CAP_SYS_ADMIN)
66 * 2. the target cgroup is a descendant of your own cgroup
67 */
68static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
69 struct cgroup *cgroup)
70{
71 struct ns_cgroup *ns_cgroup;
72
73 if (!capable(CAP_SYS_ADMIN))
74 return ERR_PTR(-EPERM);
75 if (!cgroup_is_descendant(cgroup))
76 return ERR_PTR(-EPERM);
77
78 ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
79 if (!ns_cgroup)
80 return ERR_PTR(-ENOMEM);
81 spin_lock_init(&ns_cgroup->lock);
82 return &ns_cgroup->css;
83}
84
85static void ns_destroy(struct cgroup_subsys *ss,
86 struct cgroup *cgroup)
87{
88 struct ns_cgroup *ns_cgroup;
89
90 ns_cgroup = cgroup_to_ns(cgroup);
91 kfree(ns_cgroup);
92}
93
94struct cgroup_subsys ns_subsys = {
95 .name = "ns",
96 .can_attach = ns_can_attach,
97 .create = ns_create,
98 .destroy = ns_destroy,
99 .subsys_id = ns_subsys_id,
100};
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 049e7c0ac566..ac99837e7a04 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -156,7 +156,14 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
156 goto out; 156 goto out;
157 } 157 }
158 158
159 err = ns_cgroup_clone(tsk);
160 if (err) {
161 put_nsproxy(new_ns);
162 goto out;
163 }
164
159 tsk->nsproxy = new_ns; 165 tsk->nsproxy = new_ns;
166
160out: 167out:
161 put_nsproxy(old_ns); 168 put_nsproxy(old_ns);
162 return err; 169 return err;
@@ -196,8 +203,16 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
196 203
197 *new_nsp = create_new_namespaces(unshare_flags, current, 204 *new_nsp = create_new_namespaces(unshare_flags, current,
198 new_fs ? new_fs : current->fs); 205 new_fs ? new_fs : current->fs);
199 if (IS_ERR(*new_nsp)) 206 if (IS_ERR(*new_nsp)) {
200 err = PTR_ERR(*new_nsp); 207 err = PTR_ERR(*new_nsp);
208 goto out;
209 }
210
211 err = ns_cgroup_clone(current);
212 if (err)
213 put_nsproxy(*new_nsp);
214
215out:
201 return err; 216 return err;
202} 217}
203 218