aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/pid.h2
-rw-r--r--include/linux/pid_namespace.h6
-rw-r--r--init/Kconfig24
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/pid.c184
-rw-r--r--kernel/pid_namespace.c197
6 files changed, 220 insertions, 194 deletions
diff --git a/include/linux/pid.h b/include/linux/pid.h
index e29a900a8499..061abb6c0796 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -118,10 +118,10 @@ extern struct pid *find_pid(int nr);
118 */ 118 */
119extern struct pid *find_get_pid(int nr); 119extern struct pid *find_get_pid(int nr);
120extern struct pid *find_ge_pid(int nr, struct pid_namespace *); 120extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
121int next_pidmap(struct pid_namespace *pid_ns, int last);
121 122
122extern struct pid *alloc_pid(struct pid_namespace *ns); 123extern struct pid *alloc_pid(struct pid_namespace *ns);
123extern void FASTCALL(free_pid(struct pid *pid)); 124extern void FASTCALL(free_pid(struct pid *pid));
124extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
125 125
126/* 126/*
127 * the helpers to get the pid's id seen from different namespaces 127 * the helpers to get the pid's id seen from different namespaces
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 1689e28483e4..fcd61fa2c833 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -39,6 +39,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
39 39
40extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); 40extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
41extern void free_pid_ns(struct kref *kref); 41extern void free_pid_ns(struct kref *kref);
42extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
42 43
43static inline void put_pid_ns(struct pid_namespace *ns) 44static inline void put_pid_ns(struct pid_namespace *ns)
44{ 45{
@@ -66,6 +67,11 @@ static inline void put_pid_ns(struct pid_namespace *ns)
66{ 67{
67} 68}
68 69
70
71static inline void zap_pid_ns_processes(struct pid_namespace *ns)
72{
73 BUG();
74}
69#endif /* CONFIG_PID_NS */ 75#endif /* CONFIG_PID_NS */
70 76
71static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) 77static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
diff --git a/init/Kconfig b/init/Kconfig
index 2a6499d6f283..455170e1c1e3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -214,18 +214,6 @@ config TASK_IO_ACCOUNTING
214 214
215 Say N if unsure. 215 Say N if unsure.
216 216
217config PID_NS
218 bool "PID Namespaces (EXPERIMENTAL)"
219 default n
220 depends on EXPERIMENTAL
221 help
222 Suport process id namespaces. This allows having multiple
223 process with the same pid as long as they are in different
224 pid namespaces. This is a building block of containers.
225
226 Unless you want to work with an experimental feature
227 say N here.
228
229config AUDIT 217config AUDIT
230 bool "Auditing support" 218 bool "Auditing support"
231 depends on NET 219 depends on NET
@@ -442,6 +430,18 @@ config USER_NS
442 to provide different user info for different servers. 430 to provide different user info for different servers.
443 If unsure, say N. 431 If unsure, say N.
444 432
433config PID_NS
434 bool "PID Namespaces (EXPERIMENTAL)"
435 default n
436 depends on NAMESPACES && EXPERIMENTAL
437 help
438 Suport process id namespaces. This allows having multiple
439 process with the same pid as long as they are in different
440 pid namespaces. This is a building block of containers.
441
442 Unless you want to work with an experimental feature
443 say N here.
444
445config BLK_DEV_INITRD 445config BLK_DEV_INITRD
446 bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support" 446 bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
447 depends on BROKEN || !FRV 447 depends on BROKEN || !FRV
diff --git a/kernel/Makefile b/kernel/Makefile
index 30a957a35c91..60cd39c84e6d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o
44obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o 44obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
45obj-$(CONFIG_UTS_NS) += utsname.o 45obj-$(CONFIG_UTS_NS) += utsname.o
46obj-$(CONFIG_USER_NS) += user_namespace.o 46obj-$(CONFIG_USER_NS) += user_namespace.o
47obj-$(CONFIG_PID_NS) += pid_namespace.o
47obj-$(CONFIG_IKCONFIG) += configs.o 48obj-$(CONFIG_IKCONFIG) += configs.o
48obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o 49obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
49obj-$(CONFIG_STOP_MACHINE) += stop_machine.o 50obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
diff --git a/kernel/pid.c b/kernel/pid.c
index 3b30bccdfcdc..939746fb4ce7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -41,7 +41,6 @@
41static struct hlist_head *pid_hash; 41static struct hlist_head *pid_hash;
42static int pidhash_shift; 42static int pidhash_shift;
43struct pid init_struct_pid = INIT_STRUCT_PID; 43struct pid init_struct_pid = INIT_STRUCT_PID;
44static struct kmem_cache *pid_ns_cachep;
45 44
46int pid_max = PID_MAX_DEFAULT; 45int pid_max = PID_MAX_DEFAULT;
47 46
@@ -181,7 +180,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
181 return -1; 180 return -1;
182} 181}
183 182
184static int next_pidmap(struct pid_namespace *pid_ns, int last) 183int next_pidmap(struct pid_namespace *pid_ns, int last)
185{ 184{
186 int offset; 185 int offset;
187 struct pidmap *map, *end; 186 struct pidmap *map, *end;
@@ -488,180 +487,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
488} 487}
489EXPORT_SYMBOL_GPL(find_get_pid); 488EXPORT_SYMBOL_GPL(find_get_pid);
490 489
491struct pid_cache {
492 int nr_ids;
493 char name[16];
494 struct kmem_cache *cachep;
495 struct list_head list;
496};
497
498static LIST_HEAD(pid_caches_lh);
499static DEFINE_MUTEX(pid_caches_mutex);
500
501/*
502 * creates the kmem cache to allocate pids from.
503 * @nr_ids: the number of numerical ids this pid will have to carry
504 */
505
506static struct kmem_cache *create_pid_cachep(int nr_ids)
507{
508 struct pid_cache *pcache;
509 struct kmem_cache *cachep;
510
511 mutex_lock(&pid_caches_mutex);
512 list_for_each_entry (pcache, &pid_caches_lh, list)
513 if (pcache->nr_ids == nr_ids)
514 goto out;
515
516 pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
517 if (pcache == NULL)
518 goto err_alloc;
519
520 snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
521 cachep = kmem_cache_create(pcache->name,
522 sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
523 0, SLAB_HWCACHE_ALIGN, NULL);
524 if (cachep == NULL)
525 goto err_cachep;
526
527 pcache->nr_ids = nr_ids;
528 pcache->cachep = cachep;
529 list_add(&pcache->list, &pid_caches_lh);
530out:
531 mutex_unlock(&pid_caches_mutex);
532 return pcache->cachep;
533
534err_cachep:
535 kfree(pcache);
536err_alloc:
537 mutex_unlock(&pid_caches_mutex);
538 return NULL;
539}
540
541#ifdef CONFIG_PID_NS
542static struct pid_namespace *create_pid_namespace(int level)
543{
544 struct pid_namespace *ns;
545 int i;
546
547 ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
548 if (ns == NULL)
549 goto out;
550
551 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
552 if (!ns->pidmap[0].page)
553 goto out_free;
554
555 ns->pid_cachep = create_pid_cachep(level + 1);
556 if (ns->pid_cachep == NULL)
557 goto out_free_map;
558
559 kref_init(&ns->kref);
560 ns->last_pid = 0;
561 ns->child_reaper = NULL;
562 ns->level = level;
563
564 set_bit(0, ns->pidmap[0].page);
565 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
566
567 for (i = 1; i < PIDMAP_ENTRIES; i++) {
568 ns->pidmap[i].page = 0;
569 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
570 }
571
572 return ns;
573
574out_free_map:
575 kfree(ns->pidmap[0].page);
576out_free:
577 kmem_cache_free(pid_ns_cachep, ns);
578out:
579 return ERR_PTR(-ENOMEM);
580}
581
582static void destroy_pid_namespace(struct pid_namespace *ns)
583{
584 int i;
585
586 for (i = 0; i < PIDMAP_ENTRIES; i++)
587 kfree(ns->pidmap[i].page);
588 kmem_cache_free(pid_ns_cachep, ns);
589}
590
591struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
592{
593 struct pid_namespace *new_ns;
594
595 BUG_ON(!old_ns);
596 new_ns = get_pid_ns(old_ns);
597 if (!(flags & CLONE_NEWPID))
598 goto out;
599
600 new_ns = ERR_PTR(-EINVAL);
601 if (flags & CLONE_THREAD)
602 goto out_put;
603
604 new_ns = create_pid_namespace(old_ns->level + 1);
605 if (!IS_ERR(new_ns))
606 new_ns->parent = get_pid_ns(old_ns);
607
608out_put:
609 put_pid_ns(old_ns);
610out:
611 return new_ns;
612}
613
614void free_pid_ns(struct kref *kref)
615{
616 struct pid_namespace *ns, *parent;
617
618 ns = container_of(kref, struct pid_namespace, kref);
619
620 parent = ns->parent;
621 destroy_pid_namespace(ns);
622
623 if (parent != NULL)
624 put_pid_ns(parent);
625}
626#endif /* CONFIG_PID_NS */
627
628void zap_pid_ns_processes(struct pid_namespace *pid_ns)
629{
630 int nr;
631 int rc;
632
633 /*
634 * The last thread in the cgroup-init thread group is terminating.
635 * Find remaining pid_ts in the namespace, signal and wait for them
636 * to exit.
637 *
638 * Note: This signals each threads in the namespace - even those that
639 * belong to the same thread group, To avoid this, we would have
640 * to walk the entire tasklist looking a processes in this
641 * namespace, but that could be unnecessarily expensive if the
642 * pid namespace has just a few processes. Or we need to
643 * maintain a tasklist for each pid namespace.
644 *
645 */
646 read_lock(&tasklist_lock);
647 nr = next_pidmap(pid_ns, 1);
648 while (nr > 0) {
649 kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
650 nr = next_pidmap(pid_ns, nr);
651 }
652 read_unlock(&tasklist_lock);
653
654 do {
655 clear_thread_flag(TIF_SIGPENDING);
656 rc = sys_wait4(-1, NULL, __WALL, NULL);
657 } while (rc != -ECHILD);
658
659
660 /* Child reaper for the pid namespace is going away */
661 pid_ns->child_reaper = NULL;
662 return;
663}
664
665/* 490/*
666 * The pid hash table is scaled according to the amount of memory in the 491 * The pid hash table is scaled according to the amount of memory in the
667 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or 492 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -694,9 +519,6 @@ void __init pidmap_init(void)
694 set_bit(0, init_pid_ns.pidmap[0].page); 519 set_bit(0, init_pid_ns.pidmap[0].page);
695 atomic_dec(&init_pid_ns.pidmap[0].nr_free); 520 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
696 521
697 init_pid_ns.pid_cachep = create_pid_cachep(1); 522 init_pid_ns.pid_cachep = KMEM_CACHE(pid,
698 if (init_pid_ns.pid_cachep == NULL) 523 SLAB_HWCACHE_ALIGN | SLAB_PANIC);
699 panic("Can't create pid_1 cachep\n");
700
701 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
702} 524}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
new file mode 100644
index 000000000000..6d792b66d854
--- /dev/null
+++ b/kernel/pid_namespace.c
@@ -0,0 +1,197 @@
1/*
2 * Pid namespaces
3 *
4 * Authors:
5 * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
6 * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
7 * Many thanks to Oleg Nesterov for comments and help
8 *
9 */
10
11#include <linux/pid.h>
12#include <linux/pid_namespace.h>
13#include <linux/syscalls.h>
14#include <linux/err.h>
15
16#define BITS_PER_PAGE (PAGE_SIZE*8)
17
18struct pid_cache {
19 int nr_ids;
20 char name[16];
21 struct kmem_cache *cachep;
22 struct list_head list;
23};
24
25static LIST_HEAD(pid_caches_lh);
26static DEFINE_MUTEX(pid_caches_mutex);
27static struct kmem_cache *pid_ns_cachep;
28
29/*
30 * creates the kmem cache to allocate pids from.
31 * @nr_ids: the number of numerical ids this pid will have to carry
32 */
33
34static struct kmem_cache *create_pid_cachep(int nr_ids)
35{
36 struct pid_cache *pcache;
37 struct kmem_cache *cachep;
38
39 mutex_lock(&pid_caches_mutex);
40 list_for_each_entry(pcache, &pid_caches_lh, list)
41 if (pcache->nr_ids == nr_ids)
42 goto out;
43
44 pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
45 if (pcache == NULL)
46 goto err_alloc;
47
48 snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
49 cachep = kmem_cache_create(pcache->name,
50 sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
51 0, SLAB_HWCACHE_ALIGN, NULL);
52 if (cachep == NULL)
53 goto err_cachep;
54
55 pcache->nr_ids = nr_ids;
56 pcache->cachep = cachep;
57 list_add(&pcache->list, &pid_caches_lh);
58out:
59 mutex_unlock(&pid_caches_mutex);
60 return pcache->cachep;
61
62err_cachep:
63 kfree(pcache);
64err_alloc:
65 mutex_unlock(&pid_caches_mutex);
66 return NULL;
67}
68
69static struct pid_namespace *create_pid_namespace(int level)
70{
71 struct pid_namespace *ns;
72 int i;
73
74 ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
75 if (ns == NULL)
76 goto out;
77
78 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
79 if (!ns->pidmap[0].page)
80 goto out_free;
81
82 ns->pid_cachep = create_pid_cachep(level + 1);
83 if (ns->pid_cachep == NULL)
84 goto out_free_map;
85
86 kref_init(&ns->kref);
87 ns->last_pid = 0;
88 ns->child_reaper = NULL;
89 ns->level = level;
90
91 set_bit(0, ns->pidmap[0].page);
92 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
93
94 for (i = 1; i < PIDMAP_ENTRIES; i++) {
95 ns->pidmap[i].page = 0;
96 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
97 }
98
99 return ns;
100
101out_free_map:
102 kfree(ns->pidmap[0].page);
103out_free:
104 kmem_cache_free(pid_ns_cachep, ns);
105out:
106 return ERR_PTR(-ENOMEM);
107}
108
109static void destroy_pid_namespace(struct pid_namespace *ns)
110{
111 int i;
112
113 for (i = 0; i < PIDMAP_ENTRIES; i++)
114 kfree(ns->pidmap[i].page);
115 kmem_cache_free(pid_ns_cachep, ns);
116}
117
118struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
119{
120 struct pid_namespace *new_ns;
121
122 BUG_ON(!old_ns);
123 new_ns = get_pid_ns(old_ns);
124 if (!(flags & CLONE_NEWPID))
125 goto out;
126
127 new_ns = ERR_PTR(-EINVAL);
128 if (flags & CLONE_THREAD)
129 goto out_put;
130
131 new_ns = create_pid_namespace(old_ns->level + 1);
132 if (!IS_ERR(new_ns))
133 new_ns->parent = get_pid_ns(old_ns);
134
135out_put:
136 put_pid_ns(old_ns);
137out:
138 return new_ns;
139}
140
141void free_pid_ns(struct kref *kref)
142{
143 struct pid_namespace *ns, *parent;
144
145 ns = container_of(kref, struct pid_namespace, kref);
146
147 parent = ns->parent;
148 destroy_pid_namespace(ns);
149
150 if (parent != NULL)
151 put_pid_ns(parent);
152}
153
154void zap_pid_ns_processes(struct pid_namespace *pid_ns)
155{
156 int nr;
157 int rc;
158
159 /*
160 * The last thread in the cgroup-init thread group is terminating.
161 * Find remaining pid_ts in the namespace, signal and wait for them
162 * to exit.
163 *
164 * Note: This signals each threads in the namespace - even those that
165 * belong to the same thread group, To avoid this, we would have
166 * to walk the entire tasklist looking a processes in this
167 * namespace, but that could be unnecessarily expensive if the
168 * pid namespace has just a few processes. Or we need to
169 * maintain a tasklist for each pid namespace.
170 *
171 */
172 read_lock(&tasklist_lock);
173 nr = next_pidmap(pid_ns, 1);
174 while (nr > 0) {
175 kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
176 nr = next_pidmap(pid_ns, nr);
177 }
178 read_unlock(&tasklist_lock);
179
180 do {
181 clear_thread_flag(TIF_SIGPENDING);
182 rc = sys_wait4(-1, NULL, __WALL, NULL);
183 } while (rc != -ECHILD);
184
185
186 /* Child reaper for the pid namespace is going away */
187 pid_ns->child_reaper = NULL;
188 return;
189}
190
191static __init int pid_namespaces_init(void)
192{
193 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
194 return 0;
195}
196
197__initcall(pid_namespaces_init);