aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/pid.c184
-rw-r--r--kernel/pid_namespace.c197
3 files changed, 201 insertions, 181 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 30a957a35c91..60cd39c84e6d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o
44obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o 44obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
45obj-$(CONFIG_UTS_NS) += utsname.o 45obj-$(CONFIG_UTS_NS) += utsname.o
46obj-$(CONFIG_USER_NS) += user_namespace.o 46obj-$(CONFIG_USER_NS) += user_namespace.o
47obj-$(CONFIG_PID_NS) += pid_namespace.o
47obj-$(CONFIG_IKCONFIG) += configs.o 48obj-$(CONFIG_IKCONFIG) += configs.o
48obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o 49obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
49obj-$(CONFIG_STOP_MACHINE) += stop_machine.o 50obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
diff --git a/kernel/pid.c b/kernel/pid.c
index 3b30bccdfcdc..939746fb4ce7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -41,7 +41,6 @@
41static struct hlist_head *pid_hash; 41static struct hlist_head *pid_hash;
42static int pidhash_shift; 42static int pidhash_shift;
43struct pid init_struct_pid = INIT_STRUCT_PID; 43struct pid init_struct_pid = INIT_STRUCT_PID;
44static struct kmem_cache *pid_ns_cachep;
45 44
46int pid_max = PID_MAX_DEFAULT; 45int pid_max = PID_MAX_DEFAULT;
47 46
@@ -181,7 +180,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
181 return -1; 180 return -1;
182} 181}
183 182
184static int next_pidmap(struct pid_namespace *pid_ns, int last) 183int next_pidmap(struct pid_namespace *pid_ns, int last)
185{ 184{
186 int offset; 185 int offset;
187 struct pidmap *map, *end; 186 struct pidmap *map, *end;
@@ -488,180 +487,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
488} 487}
489EXPORT_SYMBOL_GPL(find_get_pid); 488EXPORT_SYMBOL_GPL(find_get_pid);
490 489
491struct pid_cache {
492 int nr_ids;
493 char name[16];
494 struct kmem_cache *cachep;
495 struct list_head list;
496};
497
498static LIST_HEAD(pid_caches_lh);
499static DEFINE_MUTEX(pid_caches_mutex);
500
501/*
502 * creates the kmem cache to allocate pids from.
503 * @nr_ids: the number of numerical ids this pid will have to carry
504 */
505
506static struct kmem_cache *create_pid_cachep(int nr_ids)
507{
508 struct pid_cache *pcache;
509 struct kmem_cache *cachep;
510
511 mutex_lock(&pid_caches_mutex);
512 list_for_each_entry (pcache, &pid_caches_lh, list)
513 if (pcache->nr_ids == nr_ids)
514 goto out;
515
516 pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
517 if (pcache == NULL)
518 goto err_alloc;
519
520 snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
521 cachep = kmem_cache_create(pcache->name,
522 sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
523 0, SLAB_HWCACHE_ALIGN, NULL);
524 if (cachep == NULL)
525 goto err_cachep;
526
527 pcache->nr_ids = nr_ids;
528 pcache->cachep = cachep;
529 list_add(&pcache->list, &pid_caches_lh);
530out:
531 mutex_unlock(&pid_caches_mutex);
532 return pcache->cachep;
533
534err_cachep:
535 kfree(pcache);
536err_alloc:
537 mutex_unlock(&pid_caches_mutex);
538 return NULL;
539}
540
541#ifdef CONFIG_PID_NS
542static struct pid_namespace *create_pid_namespace(int level)
543{
544 struct pid_namespace *ns;
545 int i;
546
547 ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
548 if (ns == NULL)
549 goto out;
550
551 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
552 if (!ns->pidmap[0].page)
553 goto out_free;
554
555 ns->pid_cachep = create_pid_cachep(level + 1);
556 if (ns->pid_cachep == NULL)
557 goto out_free_map;
558
559 kref_init(&ns->kref);
560 ns->last_pid = 0;
561 ns->child_reaper = NULL;
562 ns->level = level;
563
564 set_bit(0, ns->pidmap[0].page);
565 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
566
567 for (i = 1; i < PIDMAP_ENTRIES; i++) {
568 ns->pidmap[i].page = 0;
569 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
570 }
571
572 return ns;
573
574out_free_map:
575 kfree(ns->pidmap[0].page);
576out_free:
577 kmem_cache_free(pid_ns_cachep, ns);
578out:
579 return ERR_PTR(-ENOMEM);
580}
581
582static void destroy_pid_namespace(struct pid_namespace *ns)
583{
584 int i;
585
586 for (i = 0; i < PIDMAP_ENTRIES; i++)
587 kfree(ns->pidmap[i].page);
588 kmem_cache_free(pid_ns_cachep, ns);
589}
590
591struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
592{
593 struct pid_namespace *new_ns;
594
595 BUG_ON(!old_ns);
596 new_ns = get_pid_ns(old_ns);
597 if (!(flags & CLONE_NEWPID))
598 goto out;
599
600 new_ns = ERR_PTR(-EINVAL);
601 if (flags & CLONE_THREAD)
602 goto out_put;
603
604 new_ns = create_pid_namespace(old_ns->level + 1);
605 if (!IS_ERR(new_ns))
606 new_ns->parent = get_pid_ns(old_ns);
607
608out_put:
609 put_pid_ns(old_ns);
610out:
611 return new_ns;
612}
613
614void free_pid_ns(struct kref *kref)
615{
616 struct pid_namespace *ns, *parent;
617
618 ns = container_of(kref, struct pid_namespace, kref);
619
620 parent = ns->parent;
621 destroy_pid_namespace(ns);
622
623 if (parent != NULL)
624 put_pid_ns(parent);
625}
626#endif /* CONFIG_PID_NS */
627
628void zap_pid_ns_processes(struct pid_namespace *pid_ns)
629{
630 int nr;
631 int rc;
632
633 /*
634 * The last thread in the cgroup-init thread group is terminating.
635 * Find remaining pid_ts in the namespace, signal and wait for them
636 * to exit.
637 *
638 * Note: This signals each threads in the namespace - even those that
639 * belong to the same thread group, To avoid this, we would have
640 * to walk the entire tasklist looking a processes in this
641 * namespace, but that could be unnecessarily expensive if the
642 * pid namespace has just a few processes. Or we need to
643 * maintain a tasklist for each pid namespace.
644 *
645 */
646 read_lock(&tasklist_lock);
647 nr = next_pidmap(pid_ns, 1);
648 while (nr > 0) {
649 kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
650 nr = next_pidmap(pid_ns, nr);
651 }
652 read_unlock(&tasklist_lock);
653
654 do {
655 clear_thread_flag(TIF_SIGPENDING);
656 rc = sys_wait4(-1, NULL, __WALL, NULL);
657 } while (rc != -ECHILD);
658
659
660 /* Child reaper for the pid namespace is going away */
661 pid_ns->child_reaper = NULL;
662 return;
663}
664
665/* 490/*
666 * The pid hash table is scaled according to the amount of memory in the 491 * The pid hash table is scaled according to the amount of memory in the
667 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or 492 * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -694,9 +519,6 @@ void __init pidmap_init(void)
694 set_bit(0, init_pid_ns.pidmap[0].page); 519 set_bit(0, init_pid_ns.pidmap[0].page);
695 atomic_dec(&init_pid_ns.pidmap[0].nr_free); 520 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
696 521
697 init_pid_ns.pid_cachep = create_pid_cachep(1); 522 init_pid_ns.pid_cachep = KMEM_CACHE(pid,
698 if (init_pid_ns.pid_cachep == NULL) 523 SLAB_HWCACHE_ALIGN | SLAB_PANIC);
699 panic("Can't create pid_1 cachep\n");
700
701 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
702} 524}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
new file mode 100644
index 000000000000..6d792b66d854
--- /dev/null
+++ b/kernel/pid_namespace.c
@@ -0,0 +1,197 @@
1/*
2 * Pid namespaces
3 *
4 * Authors:
5 * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
6 * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
7 * Many thanks to Oleg Nesterov for comments and help
8 *
9 */
10
11#include <linux/pid.h>
12#include <linux/pid_namespace.h>
13#include <linux/syscalls.h>
14#include <linux/err.h>
15
16#define BITS_PER_PAGE (PAGE_SIZE*8)
17
18struct pid_cache {
19 int nr_ids;
20 char name[16];
21 struct kmem_cache *cachep;
22 struct list_head list;
23};
24
25static LIST_HEAD(pid_caches_lh);
26static DEFINE_MUTEX(pid_caches_mutex);
27static struct kmem_cache *pid_ns_cachep;
28
29/*
30 * creates the kmem cache to allocate pids from.
31 * @nr_ids: the number of numerical ids this pid will have to carry
32 */
33
34static struct kmem_cache *create_pid_cachep(int nr_ids)
35{
36 struct pid_cache *pcache;
37 struct kmem_cache *cachep;
38
39 mutex_lock(&pid_caches_mutex);
40 list_for_each_entry(pcache, &pid_caches_lh, list)
41 if (pcache->nr_ids == nr_ids)
42 goto out;
43
44 pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
45 if (pcache == NULL)
46 goto err_alloc;
47
48 snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
49 cachep = kmem_cache_create(pcache->name,
50 sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
51 0, SLAB_HWCACHE_ALIGN, NULL);
52 if (cachep == NULL)
53 goto err_cachep;
54
55 pcache->nr_ids = nr_ids;
56 pcache->cachep = cachep;
57 list_add(&pcache->list, &pid_caches_lh);
58out:
59 mutex_unlock(&pid_caches_mutex);
60 return pcache->cachep;
61
62err_cachep:
63 kfree(pcache);
64err_alloc:
65 mutex_unlock(&pid_caches_mutex);
66 return NULL;
67}
68
69static struct pid_namespace *create_pid_namespace(int level)
70{
71 struct pid_namespace *ns;
72 int i;
73
74 ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
75 if (ns == NULL)
76 goto out;
77
78 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
79 if (!ns->pidmap[0].page)
80 goto out_free;
81
82 ns->pid_cachep = create_pid_cachep(level + 1);
83 if (ns->pid_cachep == NULL)
84 goto out_free_map;
85
86 kref_init(&ns->kref);
87 ns->last_pid = 0;
88 ns->child_reaper = NULL;
89 ns->level = level;
90
91 set_bit(0, ns->pidmap[0].page);
92 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
93
94 for (i = 1; i < PIDMAP_ENTRIES; i++) {
95 ns->pidmap[i].page = 0;
96 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
97 }
98
99 return ns;
100
101out_free_map:
102 kfree(ns->pidmap[0].page);
103out_free:
104 kmem_cache_free(pid_ns_cachep, ns);
105out:
106 return ERR_PTR(-ENOMEM);
107}
108
109static void destroy_pid_namespace(struct pid_namespace *ns)
110{
111 int i;
112
113 for (i = 0; i < PIDMAP_ENTRIES; i++)
114 kfree(ns->pidmap[i].page);
115 kmem_cache_free(pid_ns_cachep, ns);
116}
117
118struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
119{
120 struct pid_namespace *new_ns;
121
122 BUG_ON(!old_ns);
123 new_ns = get_pid_ns(old_ns);
124 if (!(flags & CLONE_NEWPID))
125 goto out;
126
127 new_ns = ERR_PTR(-EINVAL);
128 if (flags & CLONE_THREAD)
129 goto out_put;
130
131 new_ns = create_pid_namespace(old_ns->level + 1);
132 if (!IS_ERR(new_ns))
133 new_ns->parent = get_pid_ns(old_ns);
134
135out_put:
136 put_pid_ns(old_ns);
137out:
138 return new_ns;
139}
140
141void free_pid_ns(struct kref *kref)
142{
143 struct pid_namespace *ns, *parent;
144
145 ns = container_of(kref, struct pid_namespace, kref);
146
147 parent = ns->parent;
148 destroy_pid_namespace(ns);
149
150 if (parent != NULL)
151 put_pid_ns(parent);
152}
153
154void zap_pid_ns_processes(struct pid_namespace *pid_ns)
155{
156 int nr;
157 int rc;
158
159 /*
160 * The last thread in the cgroup-init thread group is terminating.
161 * Find remaining pid_ts in the namespace, signal and wait for them
162 * to exit.
163 *
164 * Note: This signals each threads in the namespace - even those that
165 * belong to the same thread group, To avoid this, we would have
166 * to walk the entire tasklist looking a processes in this
167 * namespace, but that could be unnecessarily expensive if the
168 * pid namespace has just a few processes. Or we need to
169 * maintain a tasklist for each pid namespace.
170 *
171 */
172 read_lock(&tasklist_lock);
173 nr = next_pidmap(pid_ns, 1);
174 while (nr > 0) {
175 kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
176 nr = next_pidmap(pid_ns, nr);
177 }
178 read_unlock(&tasklist_lock);
179
180 do {
181 clear_thread_flag(TIF_SIGPENDING);
182 rc = sys_wait4(-1, NULL, __WALL, NULL);
183 } while (rc != -ECHILD);
184
185
186 /* Child reaper for the pid namespace is going away */
187 pid_ns->child_reaper = NULL;
188 return;
189}
190
191static __init int pid_namespaces_init(void)
192{
193 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
194 return 0;
195}
196
197__initcall(pid_namespaces_init);