diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/pid.c | 184 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 197 |
3 files changed, 201 insertions, 181 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 30a957a35c91..60cd39c84e6d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -44,6 +44,7 @@ obj-$(CONFIG_CPUSETS) += cpuset.o | |||
44 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o | 44 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o |
45 | obj-$(CONFIG_UTS_NS) += utsname.o | 45 | obj-$(CONFIG_UTS_NS) += utsname.o |
46 | obj-$(CONFIG_USER_NS) += user_namespace.o | 46 | obj-$(CONFIG_USER_NS) += user_namespace.o |
47 | obj-$(CONFIG_PID_NS) += pid_namespace.o | ||
47 | obj-$(CONFIG_IKCONFIG) += configs.o | 48 | obj-$(CONFIG_IKCONFIG) += configs.o |
48 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o | 49 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o |
49 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o | 50 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o |
diff --git a/kernel/pid.c b/kernel/pid.c index 3b30bccdfcdc..939746fb4ce7 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -41,7 +41,6 @@ | |||
41 | static struct hlist_head *pid_hash; | 41 | static struct hlist_head *pid_hash; |
42 | static int pidhash_shift; | 42 | static int pidhash_shift; |
43 | struct pid init_struct_pid = INIT_STRUCT_PID; | 43 | struct pid init_struct_pid = INIT_STRUCT_PID; |
44 | static struct kmem_cache *pid_ns_cachep; | ||
45 | 44 | ||
46 | int pid_max = PID_MAX_DEFAULT; | 45 | int pid_max = PID_MAX_DEFAULT; |
47 | 46 | ||
@@ -181,7 +180,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) | |||
181 | return -1; | 180 | return -1; |
182 | } | 181 | } |
183 | 182 | ||
184 | static int next_pidmap(struct pid_namespace *pid_ns, int last) | 183 | int next_pidmap(struct pid_namespace *pid_ns, int last) |
185 | { | 184 | { |
186 | int offset; | 185 | int offset; |
187 | struct pidmap *map, *end; | 186 | struct pidmap *map, *end; |
@@ -488,180 +487,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) | |||
488 | } | 487 | } |
489 | EXPORT_SYMBOL_GPL(find_get_pid); | 488 | EXPORT_SYMBOL_GPL(find_get_pid); |
490 | 489 | ||
491 | struct pid_cache { | ||
492 | int nr_ids; | ||
493 | char name[16]; | ||
494 | struct kmem_cache *cachep; | ||
495 | struct list_head list; | ||
496 | }; | ||
497 | |||
498 | static LIST_HEAD(pid_caches_lh); | ||
499 | static DEFINE_MUTEX(pid_caches_mutex); | ||
500 | |||
501 | /* | ||
502 | * creates the kmem cache to allocate pids from. | ||
503 | * @nr_ids: the number of numerical ids this pid will have to carry | ||
504 | */ | ||
505 | |||
506 | static struct kmem_cache *create_pid_cachep(int nr_ids) | ||
507 | { | ||
508 | struct pid_cache *pcache; | ||
509 | struct kmem_cache *cachep; | ||
510 | |||
511 | mutex_lock(&pid_caches_mutex); | ||
512 | list_for_each_entry (pcache, &pid_caches_lh, list) | ||
513 | if (pcache->nr_ids == nr_ids) | ||
514 | goto out; | ||
515 | |||
516 | pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL); | ||
517 | if (pcache == NULL) | ||
518 | goto err_alloc; | ||
519 | |||
520 | snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); | ||
521 | cachep = kmem_cache_create(pcache->name, | ||
522 | sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), | ||
523 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
524 | if (cachep == NULL) | ||
525 | goto err_cachep; | ||
526 | |||
527 | pcache->nr_ids = nr_ids; | ||
528 | pcache->cachep = cachep; | ||
529 | list_add(&pcache->list, &pid_caches_lh); | ||
530 | out: | ||
531 | mutex_unlock(&pid_caches_mutex); | ||
532 | return pcache->cachep; | ||
533 | |||
534 | err_cachep: | ||
535 | kfree(pcache); | ||
536 | err_alloc: | ||
537 | mutex_unlock(&pid_caches_mutex); | ||
538 | return NULL; | ||
539 | } | ||
540 | |||
541 | #ifdef CONFIG_PID_NS | ||
542 | static struct pid_namespace *create_pid_namespace(int level) | ||
543 | { | ||
544 | struct pid_namespace *ns; | ||
545 | int i; | ||
546 | |||
547 | ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); | ||
548 | if (ns == NULL) | ||
549 | goto out; | ||
550 | |||
551 | ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
552 | if (!ns->pidmap[0].page) | ||
553 | goto out_free; | ||
554 | |||
555 | ns->pid_cachep = create_pid_cachep(level + 1); | ||
556 | if (ns->pid_cachep == NULL) | ||
557 | goto out_free_map; | ||
558 | |||
559 | kref_init(&ns->kref); | ||
560 | ns->last_pid = 0; | ||
561 | ns->child_reaper = NULL; | ||
562 | ns->level = level; | ||
563 | |||
564 | set_bit(0, ns->pidmap[0].page); | ||
565 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | ||
566 | |||
567 | for (i = 1; i < PIDMAP_ENTRIES; i++) { | ||
568 | ns->pidmap[i].page = 0; | ||
569 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | ||
570 | } | ||
571 | |||
572 | return ns; | ||
573 | |||
574 | out_free_map: | ||
575 | kfree(ns->pidmap[0].page); | ||
576 | out_free: | ||
577 | kmem_cache_free(pid_ns_cachep, ns); | ||
578 | out: | ||
579 | return ERR_PTR(-ENOMEM); | ||
580 | } | ||
581 | |||
582 | static void destroy_pid_namespace(struct pid_namespace *ns) | ||
583 | { | ||
584 | int i; | ||
585 | |||
586 | for (i = 0; i < PIDMAP_ENTRIES; i++) | ||
587 | kfree(ns->pidmap[i].page); | ||
588 | kmem_cache_free(pid_ns_cachep, ns); | ||
589 | } | ||
590 | |||
591 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | ||
592 | { | ||
593 | struct pid_namespace *new_ns; | ||
594 | |||
595 | BUG_ON(!old_ns); | ||
596 | new_ns = get_pid_ns(old_ns); | ||
597 | if (!(flags & CLONE_NEWPID)) | ||
598 | goto out; | ||
599 | |||
600 | new_ns = ERR_PTR(-EINVAL); | ||
601 | if (flags & CLONE_THREAD) | ||
602 | goto out_put; | ||
603 | |||
604 | new_ns = create_pid_namespace(old_ns->level + 1); | ||
605 | if (!IS_ERR(new_ns)) | ||
606 | new_ns->parent = get_pid_ns(old_ns); | ||
607 | |||
608 | out_put: | ||
609 | put_pid_ns(old_ns); | ||
610 | out: | ||
611 | return new_ns; | ||
612 | } | ||
613 | |||
614 | void free_pid_ns(struct kref *kref) | ||
615 | { | ||
616 | struct pid_namespace *ns, *parent; | ||
617 | |||
618 | ns = container_of(kref, struct pid_namespace, kref); | ||
619 | |||
620 | parent = ns->parent; | ||
621 | destroy_pid_namespace(ns); | ||
622 | |||
623 | if (parent != NULL) | ||
624 | put_pid_ns(parent); | ||
625 | } | ||
626 | #endif /* CONFIG_PID_NS */ | ||
627 | |||
628 | void zap_pid_ns_processes(struct pid_namespace *pid_ns) | ||
629 | { | ||
630 | int nr; | ||
631 | int rc; | ||
632 | |||
633 | /* | ||
634 | * The last thread in the cgroup-init thread group is terminating. | ||
635 | * Find remaining pid_ts in the namespace, signal and wait for them | ||
636 | * to exit. | ||
637 | * | ||
638 | * Note: This signals each threads in the namespace - even those that | ||
639 | * belong to the same thread group, To avoid this, we would have | ||
640 | * to walk the entire tasklist looking a processes in this | ||
641 | * namespace, but that could be unnecessarily expensive if the | ||
642 | * pid namespace has just a few processes. Or we need to | ||
643 | * maintain a tasklist for each pid namespace. | ||
644 | * | ||
645 | */ | ||
646 | read_lock(&tasklist_lock); | ||
647 | nr = next_pidmap(pid_ns, 1); | ||
648 | while (nr > 0) { | ||
649 | kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr); | ||
650 | nr = next_pidmap(pid_ns, nr); | ||
651 | } | ||
652 | read_unlock(&tasklist_lock); | ||
653 | |||
654 | do { | ||
655 | clear_thread_flag(TIF_SIGPENDING); | ||
656 | rc = sys_wait4(-1, NULL, __WALL, NULL); | ||
657 | } while (rc != -ECHILD); | ||
658 | |||
659 | |||
660 | /* Child reaper for the pid namespace is going away */ | ||
661 | pid_ns->child_reaper = NULL; | ||
662 | return; | ||
663 | } | ||
664 | |||
665 | /* | 490 | /* |
666 | * The pid hash table is scaled according to the amount of memory in the | 491 | * The pid hash table is scaled according to the amount of memory in the |
667 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or | 492 | * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or |
@@ -694,9 +519,6 @@ void __init pidmap_init(void) | |||
694 | set_bit(0, init_pid_ns.pidmap[0].page); | 519 | set_bit(0, init_pid_ns.pidmap[0].page); |
695 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); | 520 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
696 | 521 | ||
697 | init_pid_ns.pid_cachep = create_pid_cachep(1); | 522 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, |
698 | if (init_pid_ns.pid_cachep == NULL) | 523 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); |
699 | panic("Can't create pid_1 cachep\n"); | ||
700 | |||
701 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | ||
702 | } | 524 | } |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c new file mode 100644 index 000000000000..6d792b66d854 --- /dev/null +++ b/kernel/pid_namespace.c | |||
@@ -0,0 +1,197 @@ | |||
1 | /* | ||
2 | * Pid namespaces | ||
3 | * | ||
4 | * Authors: | ||
5 | * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc. | ||
6 | * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM | ||
7 | * Many thanks to Oleg Nesterov for comments and help | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/pid.h> | ||
12 | #include <linux/pid_namespace.h> | ||
13 | #include <linux/syscalls.h> | ||
14 | #include <linux/err.h> | ||
15 | |||
16 | #define BITS_PER_PAGE (PAGE_SIZE*8) | ||
17 | |||
18 | struct pid_cache { | ||
19 | int nr_ids; | ||
20 | char name[16]; | ||
21 | struct kmem_cache *cachep; | ||
22 | struct list_head list; | ||
23 | }; | ||
24 | |||
25 | static LIST_HEAD(pid_caches_lh); | ||
26 | static DEFINE_MUTEX(pid_caches_mutex); | ||
27 | static struct kmem_cache *pid_ns_cachep; | ||
28 | |||
29 | /* | ||
30 | * creates the kmem cache to allocate pids from. | ||
31 | * @nr_ids: the number of numerical ids this pid will have to carry | ||
32 | */ | ||
33 | |||
34 | static struct kmem_cache *create_pid_cachep(int nr_ids) | ||
35 | { | ||
36 | struct pid_cache *pcache; | ||
37 | struct kmem_cache *cachep; | ||
38 | |||
39 | mutex_lock(&pid_caches_mutex); | ||
40 | list_for_each_entry(pcache, &pid_caches_lh, list) | ||
41 | if (pcache->nr_ids == nr_ids) | ||
42 | goto out; | ||
43 | |||
44 | pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL); | ||
45 | if (pcache == NULL) | ||
46 | goto err_alloc; | ||
47 | |||
48 | snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); | ||
49 | cachep = kmem_cache_create(pcache->name, | ||
50 | sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), | ||
51 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
52 | if (cachep == NULL) | ||
53 | goto err_cachep; | ||
54 | |||
55 | pcache->nr_ids = nr_ids; | ||
56 | pcache->cachep = cachep; | ||
57 | list_add(&pcache->list, &pid_caches_lh); | ||
58 | out: | ||
59 | mutex_unlock(&pid_caches_mutex); | ||
60 | return pcache->cachep; | ||
61 | |||
62 | err_cachep: | ||
63 | kfree(pcache); | ||
64 | err_alloc: | ||
65 | mutex_unlock(&pid_caches_mutex); | ||
66 | return NULL; | ||
67 | } | ||
68 | |||
69 | static struct pid_namespace *create_pid_namespace(int level) | ||
70 | { | ||
71 | struct pid_namespace *ns; | ||
72 | int i; | ||
73 | |||
74 | ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL); | ||
75 | if (ns == NULL) | ||
76 | goto out; | ||
77 | |||
78 | ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
79 | if (!ns->pidmap[0].page) | ||
80 | goto out_free; | ||
81 | |||
82 | ns->pid_cachep = create_pid_cachep(level + 1); | ||
83 | if (ns->pid_cachep == NULL) | ||
84 | goto out_free_map; | ||
85 | |||
86 | kref_init(&ns->kref); | ||
87 | ns->last_pid = 0; | ||
88 | ns->child_reaper = NULL; | ||
89 | ns->level = level; | ||
90 | |||
91 | set_bit(0, ns->pidmap[0].page); | ||
92 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | ||
93 | |||
94 | for (i = 1; i < PIDMAP_ENTRIES; i++) { | ||
95 | ns->pidmap[i].page = 0; | ||
96 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | ||
97 | } | ||
98 | |||
99 | return ns; | ||
100 | |||
101 | out_free_map: | ||
102 | kfree(ns->pidmap[0].page); | ||
103 | out_free: | ||
104 | kmem_cache_free(pid_ns_cachep, ns); | ||
105 | out: | ||
106 | return ERR_PTR(-ENOMEM); | ||
107 | } | ||
108 | |||
109 | static void destroy_pid_namespace(struct pid_namespace *ns) | ||
110 | { | ||
111 | int i; | ||
112 | |||
113 | for (i = 0; i < PIDMAP_ENTRIES; i++) | ||
114 | kfree(ns->pidmap[i].page); | ||
115 | kmem_cache_free(pid_ns_cachep, ns); | ||
116 | } | ||
117 | |||
118 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | ||
119 | { | ||
120 | struct pid_namespace *new_ns; | ||
121 | |||
122 | BUG_ON(!old_ns); | ||
123 | new_ns = get_pid_ns(old_ns); | ||
124 | if (!(flags & CLONE_NEWPID)) | ||
125 | goto out; | ||
126 | |||
127 | new_ns = ERR_PTR(-EINVAL); | ||
128 | if (flags & CLONE_THREAD) | ||
129 | goto out_put; | ||
130 | |||
131 | new_ns = create_pid_namespace(old_ns->level + 1); | ||
132 | if (!IS_ERR(new_ns)) | ||
133 | new_ns->parent = get_pid_ns(old_ns); | ||
134 | |||
135 | out_put: | ||
136 | put_pid_ns(old_ns); | ||
137 | out: | ||
138 | return new_ns; | ||
139 | } | ||
140 | |||
141 | void free_pid_ns(struct kref *kref) | ||
142 | { | ||
143 | struct pid_namespace *ns, *parent; | ||
144 | |||
145 | ns = container_of(kref, struct pid_namespace, kref); | ||
146 | |||
147 | parent = ns->parent; | ||
148 | destroy_pid_namespace(ns); | ||
149 | |||
150 | if (parent != NULL) | ||
151 | put_pid_ns(parent); | ||
152 | } | ||
153 | |||
154 | void zap_pid_ns_processes(struct pid_namespace *pid_ns) | ||
155 | { | ||
156 | int nr; | ||
157 | int rc; | ||
158 | |||
159 | /* | ||
160 | * The last thread in the cgroup-init thread group is terminating. | ||
161 | * Find remaining pid_ts in the namespace, signal and wait for them | ||
162 | * to exit. | ||
163 | * | ||
164 | * Note: This signals each threads in the namespace - even those that | ||
165 | * belong to the same thread group, To avoid this, we would have | ||
166 | * to walk the entire tasklist looking a processes in this | ||
167 | * namespace, but that could be unnecessarily expensive if the | ||
168 | * pid namespace has just a few processes. Or we need to | ||
169 | * maintain a tasklist for each pid namespace. | ||
170 | * | ||
171 | */ | ||
172 | read_lock(&tasklist_lock); | ||
173 | nr = next_pidmap(pid_ns, 1); | ||
174 | while (nr > 0) { | ||
175 | kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr); | ||
176 | nr = next_pidmap(pid_ns, nr); | ||
177 | } | ||
178 | read_unlock(&tasklist_lock); | ||
179 | |||
180 | do { | ||
181 | clear_thread_flag(TIF_SIGPENDING); | ||
182 | rc = sys_wait4(-1, NULL, __WALL, NULL); | ||
183 | } while (rc != -ECHILD); | ||
184 | |||
185 | |||
186 | /* Child reaper for the pid namespace is going away */ | ||
187 | pid_ns->child_reaper = NULL; | ||
188 | return; | ||
189 | } | ||
190 | |||
191 | static __init int pid_namespaces_init(void) | ||
192 | { | ||
193 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | __initcall(pid_namespaces_init); | ||