diff options
Diffstat (limited to 'kernel/pid_namespace.c')
-rw-r--r-- | kernel/pid_namespace.c | 117 |
1 files changed, 94 insertions, 23 deletions
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7b07cc0dfb75..c1c3dc1c6023 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/pid.h> | 11 | #include <linux/pid.h> |
12 | #include <linux/pid_namespace.h> | 12 | #include <linux/pid_namespace.h> |
13 | #include <linux/user_namespace.h> | ||
13 | #include <linux/syscalls.h> | 14 | #include <linux/syscalls.h> |
14 | #include <linux/err.h> | 15 | #include <linux/err.h> |
15 | #include <linux/acct.h> | 16 | #include <linux/acct.h> |
@@ -71,10 +72,17 @@ err_alloc: | |||
71 | return NULL; | 72 | return NULL; |
72 | } | 73 | } |
73 | 74 | ||
75 | static void proc_cleanup_work(struct work_struct *work) | ||
76 | { | ||
77 | struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); | ||
78 | pid_ns_release_proc(ns); | ||
79 | } | ||
80 | |||
74 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ | 81 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ |
75 | #define MAX_PID_NS_LEVEL 32 | 82 | #define MAX_PID_NS_LEVEL 32 |
76 | 83 | ||
77 | static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) | 84 | static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, |
85 | struct pid_namespace *parent_pid_ns) | ||
78 | { | 86 | { |
79 | struct pid_namespace *ns; | 87 | struct pid_namespace *ns; |
80 | unsigned int level = parent_pid_ns->level + 1; | 88 | unsigned int level = parent_pid_ns->level + 1; |
@@ -99,9 +107,16 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
99 | if (ns->pid_cachep == NULL) | 107 | if (ns->pid_cachep == NULL) |
100 | goto out_free_map; | 108 | goto out_free_map; |
101 | 109 | ||
110 | err = proc_alloc_inum(&ns->proc_inum); | ||
111 | if (err) | ||
112 | goto out_free_map; | ||
113 | |||
102 | kref_init(&ns->kref); | 114 | kref_init(&ns->kref); |
103 | ns->level = level; | 115 | ns->level = level; |
104 | ns->parent = get_pid_ns(parent_pid_ns); | 116 | ns->parent = get_pid_ns(parent_pid_ns); |
117 | ns->user_ns = get_user_ns(user_ns); | ||
118 | ns->nr_hashed = PIDNS_HASH_ADDING; | ||
119 | INIT_WORK(&ns->proc_work, proc_cleanup_work); | ||
105 | 120 | ||
106 | set_bit(0, ns->pidmap[0].page); | 121 | set_bit(0, ns->pidmap[0].page); |
107 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | 122 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); |
@@ -109,14 +124,8 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
109 | for (i = 1; i < PIDMAP_ENTRIES; i++) | 124 | for (i = 1; i < PIDMAP_ENTRIES; i++) |
110 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | 125 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); |
111 | 126 | ||
112 | err = pid_ns_prepare_proc(ns); | ||
113 | if (err) | ||
114 | goto out_put_parent_pid_ns; | ||
115 | |||
116 | return ns; | 127 | return ns; |
117 | 128 | ||
118 | out_put_parent_pid_ns: | ||
119 | put_pid_ns(parent_pid_ns); | ||
120 | out_free_map: | 129 | out_free_map: |
121 | kfree(ns->pidmap[0].page); | 130 | kfree(ns->pidmap[0].page); |
122 | out_free: | 131 | out_free: |
@@ -129,18 +138,21 @@ static void destroy_pid_namespace(struct pid_namespace *ns) | |||
129 | { | 138 | { |
130 | int i; | 139 | int i; |
131 | 140 | ||
141 | proc_free_inum(ns->proc_inum); | ||
132 | for (i = 0; i < PIDMAP_ENTRIES; i++) | 142 | for (i = 0; i < PIDMAP_ENTRIES; i++) |
133 | kfree(ns->pidmap[i].page); | 143 | kfree(ns->pidmap[i].page); |
144 | put_user_ns(ns->user_ns); | ||
134 | kmem_cache_free(pid_ns_cachep, ns); | 145 | kmem_cache_free(pid_ns_cachep, ns); |
135 | } | 146 | } |
136 | 147 | ||
137 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | 148 | struct pid_namespace *copy_pid_ns(unsigned long flags, |
149 | struct user_namespace *user_ns, struct pid_namespace *old_ns) | ||
138 | { | 150 | { |
139 | if (!(flags & CLONE_NEWPID)) | 151 | if (!(flags & CLONE_NEWPID)) |
140 | return get_pid_ns(old_ns); | 152 | return get_pid_ns(old_ns); |
141 | if (flags & (CLONE_THREAD|CLONE_PARENT)) | 153 | if (task_active_pid_ns(current) != old_ns) |
142 | return ERR_PTR(-EINVAL); | 154 | return ERR_PTR(-EINVAL); |
143 | return create_pid_namespace(old_ns); | 155 | return create_pid_namespace(user_ns, old_ns); |
144 | } | 156 | } |
145 | 157 | ||
146 | static void free_pid_ns(struct kref *kref) | 158 | static void free_pid_ns(struct kref *kref) |
@@ -170,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
170 | int rc; | 182 | int rc; |
171 | struct task_struct *task, *me = current; | 183 | struct task_struct *task, *me = current; |
172 | 184 | ||
185 | /* Don't allow any more processes into the pid namespace */ | ||
186 | disable_pid_allocation(pid_ns); | ||
187 | |||
173 | /* Ignore SIGCHLD causing any terminated children to autoreap */ | 188 | /* Ignore SIGCHLD causing any terminated children to autoreap */ |
174 | spin_lock_irq(&me->sighand->siglock); | 189 | spin_lock_irq(&me->sighand->siglock); |
175 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; | 190 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; |
@@ -211,22 +226,15 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
211 | 226 | ||
212 | /* | 227 | /* |
213 | * sys_wait4() above can't reap the TASK_DEAD children. | 228 | * sys_wait4() above can't reap the TASK_DEAD children. |
214 | * Make sure they all go away, see __unhash_process(). | 229 | * Make sure they all go away, see free_pid(). |
215 | */ | 230 | */ |
216 | for (;;) { | 231 | for (;;) { |
217 | bool need_wait = false; | 232 | set_current_state(TASK_UNINTERRUPTIBLE); |
218 | 233 | if (pid_ns->nr_hashed == 1) | |
219 | read_lock(&tasklist_lock); | ||
220 | if (!list_empty(¤t->children)) { | ||
221 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
222 | need_wait = true; | ||
223 | } | ||
224 | read_unlock(&tasklist_lock); | ||
225 | |||
226 | if (!need_wait) | ||
227 | break; | 234 | break; |
228 | schedule(); | 235 | schedule(); |
229 | } | 236 | } |
237 | __set_current_state(TASK_RUNNING); | ||
230 | 238 | ||
231 | if (pid_ns->reboot) | 239 | if (pid_ns->reboot) |
232 | current->signal->group_exit_code = pid_ns->reboot; | 240 | current->signal->group_exit_code = pid_ns->reboot; |
@@ -239,9 +247,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
239 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, | 247 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, |
240 | void __user *buffer, size_t *lenp, loff_t *ppos) | 248 | void __user *buffer, size_t *lenp, loff_t *ppos) |
241 | { | 249 | { |
250 | struct pid_namespace *pid_ns = task_active_pid_ns(current); | ||
242 | struct ctl_table tmp = *table; | 251 | struct ctl_table tmp = *table; |
243 | 252 | ||
244 | if (write && !capable(CAP_SYS_ADMIN)) | 253 | if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN)) |
245 | return -EPERM; | 254 | return -EPERM; |
246 | 255 | ||
247 | /* | 256 | /* |
@@ -250,7 +259,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, | |||
250 | * it should synchronize its usage with external means. | 259 | * it should synchronize its usage with external means. |
251 | */ | 260 | */ |
252 | 261 | ||
253 | tmp.data = ¤t->nsproxy->pid_ns->last_pid; | 262 | tmp.data = &pid_ns->last_pid; |
254 | return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); | 263 | return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); |
255 | } | 264 | } |
256 | 265 | ||
@@ -299,6 +308,68 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | |||
299 | return 0; | 308 | return 0; |
300 | } | 309 | } |
301 | 310 | ||
311 | static void *pidns_get(struct task_struct *task) | ||
312 | { | ||
313 | struct pid_namespace *ns; | ||
314 | |||
315 | rcu_read_lock(); | ||
316 | ns = get_pid_ns(task_active_pid_ns(task)); | ||
317 | rcu_read_unlock(); | ||
318 | |||
319 | return ns; | ||
320 | } | ||
321 | |||
322 | static void pidns_put(void *ns) | ||
323 | { | ||
324 | put_pid_ns(ns); | ||
325 | } | ||
326 | |||
327 | static int pidns_install(struct nsproxy *nsproxy, void *ns) | ||
328 | { | ||
329 | struct pid_namespace *active = task_active_pid_ns(current); | ||
330 | struct pid_namespace *ancestor, *new = ns; | ||
331 | |||
332 | if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || | ||
333 | !nsown_capable(CAP_SYS_ADMIN)) | ||
334 | return -EPERM; | ||
335 | |||
336 | /* | ||
337 | * Only allow entering the current active pid namespace | ||
338 | * or a child of the current active pid namespace. | ||
339 | * | ||
340 | * This is required for fork to return a usable pid value and | ||
341 | * this maintains the property that processes and their | ||
342 | * children can not escape their current pid namespace. | ||
343 | */ | ||
344 | if (new->level < active->level) | ||
345 | return -EINVAL; | ||
346 | |||
347 | ancestor = new; | ||
348 | while (ancestor->level > active->level) | ||
349 | ancestor = ancestor->parent; | ||
350 | if (ancestor != active) | ||
351 | return -EINVAL; | ||
352 | |||
353 | put_pid_ns(nsproxy->pid_ns); | ||
354 | nsproxy->pid_ns = get_pid_ns(new); | ||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | static unsigned int pidns_inum(void *ns) | ||
359 | { | ||
360 | struct pid_namespace *pid_ns = ns; | ||
361 | return pid_ns->proc_inum; | ||
362 | } | ||
363 | |||
364 | const struct proc_ns_operations pidns_operations = { | ||
365 | .name = "pid", | ||
366 | .type = CLONE_NEWPID, | ||
367 | .get = pidns_get, | ||
368 | .put = pidns_put, | ||
369 | .install = pidns_install, | ||
370 | .inum = pidns_inum, | ||
371 | }; | ||
372 | |||
302 | static __init int pid_namespaces_init(void) | 373 | static __init int pid_namespaces_init(void) |
303 | { | 374 | { |
304 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | 375 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |