diff options
Diffstat (limited to 'kernel/pid_namespace.c')
-rw-r--r-- | kernel/pid_namespace.c | 113 |
1 files changed, 90 insertions, 23 deletions
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7b07cc0dfb75..fdbd0cdf271a 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/pid.h> | 11 | #include <linux/pid.h> |
12 | #include <linux/pid_namespace.h> | 12 | #include <linux/pid_namespace.h> |
13 | #include <linux/user_namespace.h> | ||
13 | #include <linux/syscalls.h> | 14 | #include <linux/syscalls.h> |
14 | #include <linux/err.h> | 15 | #include <linux/err.h> |
15 | #include <linux/acct.h> | 16 | #include <linux/acct.h> |
@@ -71,10 +72,17 @@ err_alloc: | |||
71 | return NULL; | 72 | return NULL; |
72 | } | 73 | } |
73 | 74 | ||
75 | static void proc_cleanup_work(struct work_struct *work) | ||
76 | { | ||
77 | struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); | ||
78 | pid_ns_release_proc(ns); | ||
79 | } | ||
80 | |||
74 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ | 81 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ |
75 | #define MAX_PID_NS_LEVEL 32 | 82 | #define MAX_PID_NS_LEVEL 32 |
76 | 83 | ||
77 | static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) | 84 | static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, |
85 | struct pid_namespace *parent_pid_ns) | ||
78 | { | 86 | { |
79 | struct pid_namespace *ns; | 87 | struct pid_namespace *ns; |
80 | unsigned int level = parent_pid_ns->level + 1; | 88 | unsigned int level = parent_pid_ns->level + 1; |
@@ -99,9 +107,15 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
99 | if (ns->pid_cachep == NULL) | 107 | if (ns->pid_cachep == NULL) |
100 | goto out_free_map; | 108 | goto out_free_map; |
101 | 109 | ||
110 | err = proc_alloc_inum(&ns->proc_inum); | ||
111 | if (err) | ||
112 | goto out_free_map; | ||
113 | |||
102 | kref_init(&ns->kref); | 114 | kref_init(&ns->kref); |
103 | ns->level = level; | 115 | ns->level = level; |
104 | ns->parent = get_pid_ns(parent_pid_ns); | 116 | ns->parent = get_pid_ns(parent_pid_ns); |
117 | ns->user_ns = get_user_ns(user_ns); | ||
118 | INIT_WORK(&ns->proc_work, proc_cleanup_work); | ||
105 | 119 | ||
106 | set_bit(0, ns->pidmap[0].page); | 120 | set_bit(0, ns->pidmap[0].page); |
107 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | 121 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); |
@@ -109,14 +123,8 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
109 | for (i = 1; i < PIDMAP_ENTRIES; i++) | 123 | for (i = 1; i < PIDMAP_ENTRIES; i++) |
110 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | 124 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); |
111 | 125 | ||
112 | err = pid_ns_prepare_proc(ns); | ||
113 | if (err) | ||
114 | goto out_put_parent_pid_ns; | ||
115 | |||
116 | return ns; | 126 | return ns; |
117 | 127 | ||
118 | out_put_parent_pid_ns: | ||
119 | put_pid_ns(parent_pid_ns); | ||
120 | out_free_map: | 128 | out_free_map: |
121 | kfree(ns->pidmap[0].page); | 129 | kfree(ns->pidmap[0].page); |
122 | out_free: | 130 | out_free: |
@@ -129,18 +137,21 @@ static void destroy_pid_namespace(struct pid_namespace *ns) | |||
129 | { | 137 | { |
130 | int i; | 138 | int i; |
131 | 139 | ||
140 | proc_free_inum(ns->proc_inum); | ||
132 | for (i = 0; i < PIDMAP_ENTRIES; i++) | 141 | for (i = 0; i < PIDMAP_ENTRIES; i++) |
133 | kfree(ns->pidmap[i].page); | 142 | kfree(ns->pidmap[i].page); |
143 | put_user_ns(ns->user_ns); | ||
134 | kmem_cache_free(pid_ns_cachep, ns); | 144 | kmem_cache_free(pid_ns_cachep, ns); |
135 | } | 145 | } |
136 | 146 | ||
137 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | 147 | struct pid_namespace *copy_pid_ns(unsigned long flags, |
148 | struct user_namespace *user_ns, struct pid_namespace *old_ns) | ||
138 | { | 149 | { |
139 | if (!(flags & CLONE_NEWPID)) | 150 | if (!(flags & CLONE_NEWPID)) |
140 | return get_pid_ns(old_ns); | 151 | return get_pid_ns(old_ns); |
141 | if (flags & (CLONE_THREAD|CLONE_PARENT)) | 152 | if (task_active_pid_ns(current) != old_ns) |
142 | return ERR_PTR(-EINVAL); | 153 | return ERR_PTR(-EINVAL); |
143 | return create_pid_namespace(old_ns); | 154 | return create_pid_namespace(user_ns, old_ns); |
144 | } | 155 | } |
145 | 156 | ||
146 | static void free_pid_ns(struct kref *kref) | 157 | static void free_pid_ns(struct kref *kref) |
@@ -211,22 +222,15 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
211 | 222 | ||
212 | /* | 223 | /* |
213 | * sys_wait4() above can't reap the TASK_DEAD children. | 224 | * sys_wait4() above can't reap the TASK_DEAD children. |
214 | * Make sure they all go away, see __unhash_process(). | 225 | * Make sure they all go away, see free_pid(). |
215 | */ | 226 | */ |
216 | for (;;) { | 227 | for (;;) { |
217 | bool need_wait = false; | 228 | set_current_state(TASK_UNINTERRUPTIBLE); |
218 | 229 | if (pid_ns->nr_hashed == 1) | |
219 | read_lock(&tasklist_lock); | ||
220 | if (!list_empty(¤t->children)) { | ||
221 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
222 | need_wait = true; | ||
223 | } | ||
224 | read_unlock(&tasklist_lock); | ||
225 | |||
226 | if (!need_wait) | ||
227 | break; | 230 | break; |
228 | schedule(); | 231 | schedule(); |
229 | } | 232 | } |
233 | __set_current_state(TASK_RUNNING); | ||
230 | 234 | ||
231 | if (pid_ns->reboot) | 235 | if (pid_ns->reboot) |
232 | current->signal->group_exit_code = pid_ns->reboot; | 236 | current->signal->group_exit_code = pid_ns->reboot; |
@@ -239,9 +243,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
239 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, | 243 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, |
240 | void __user *buffer, size_t *lenp, loff_t *ppos) | 244 | void __user *buffer, size_t *lenp, loff_t *ppos) |
241 | { | 245 | { |
246 | struct pid_namespace *pid_ns = task_active_pid_ns(current); | ||
242 | struct ctl_table tmp = *table; | 247 | struct ctl_table tmp = *table; |
243 | 248 | ||
244 | if (write && !capable(CAP_SYS_ADMIN)) | 249 | if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN)) |
245 | return -EPERM; | 250 | return -EPERM; |
246 | 251 | ||
247 | /* | 252 | /* |
@@ -250,7 +255,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, | |||
250 | * it should synchronize its usage with external means. | 255 | * it should synchronize its usage with external means. |
251 | */ | 256 | */ |
252 | 257 | ||
253 | tmp.data = ¤t->nsproxy->pid_ns->last_pid; | 258 | tmp.data = &pid_ns->last_pid; |
254 | return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); | 259 | return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); |
255 | } | 260 | } |
256 | 261 | ||
@@ -299,6 +304,68 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | |||
299 | return 0; | 304 | return 0; |
300 | } | 305 | } |
301 | 306 | ||
307 | static void *pidns_get(struct task_struct *task) | ||
308 | { | ||
309 | struct pid_namespace *ns; | ||
310 | |||
311 | rcu_read_lock(); | ||
312 | ns = get_pid_ns(task_active_pid_ns(task)); | ||
313 | rcu_read_unlock(); | ||
314 | |||
315 | return ns; | ||
316 | } | ||
317 | |||
318 | static void pidns_put(void *ns) | ||
319 | { | ||
320 | put_pid_ns(ns); | ||
321 | } | ||
322 | |||
323 | static int pidns_install(struct nsproxy *nsproxy, void *ns) | ||
324 | { | ||
325 | struct pid_namespace *active = task_active_pid_ns(current); | ||
326 | struct pid_namespace *ancestor, *new = ns; | ||
327 | |||
328 | if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || | ||
329 | !nsown_capable(CAP_SYS_ADMIN)) | ||
330 | return -EPERM; | ||
331 | |||
332 | /* | ||
333 | * Only allow entering the current active pid namespace | ||
334 | * or a child of the current active pid namespace. | ||
335 | * | ||
336 | * This is required for fork to return a usable pid value and | ||
337 | * this maintains the property that processes and their | ||
338 | * children can not escape their current pid namespace. | ||
339 | */ | ||
340 | if (new->level < active->level) | ||
341 | return -EINVAL; | ||
342 | |||
343 | ancestor = new; | ||
344 | while (ancestor->level > active->level) | ||
345 | ancestor = ancestor->parent; | ||
346 | if (ancestor != active) | ||
347 | return -EINVAL; | ||
348 | |||
349 | put_pid_ns(nsproxy->pid_ns); | ||
350 | nsproxy->pid_ns = get_pid_ns(new); | ||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | static unsigned int pidns_inum(void *ns) | ||
355 | { | ||
356 | struct pid_namespace *pid_ns = ns; | ||
357 | return pid_ns->proc_inum; | ||
358 | } | ||
359 | |||
360 | const struct proc_ns_operations pidns_operations = { | ||
361 | .name = "pid", | ||
362 | .type = CLONE_NEWPID, | ||
363 | .get = pidns_get, | ||
364 | .put = pidns_put, | ||
365 | .install = pidns_install, | ||
366 | .inum = pidns_inum, | ||
367 | }; | ||
368 | |||
302 | static __init int pid_namespaces_init(void) | 369 | static __init int pid_namespaces_init(void) |
303 | { | 370 | { |
304 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | 371 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |