aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/fork.c43
-rw-r--r--kernel/nsproxy.c3
-rw-r--r--kernel/pid.c88
4 files changed, 113 insertions, 22 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b0bf326143a..1301c087537 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,6 +25,7 @@
25#define CLONE_NEWUTS 0x04000000 /* New utsname group? */ 25#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
26#define CLONE_NEWIPC 0x08000000 /* New ipcs */ 26#define CLONE_NEWIPC 0x08000000 /* New ipcs */
27#define CLONE_NEWUSER 0x10000000 /* New user namespace */ 27#define CLONE_NEWUSER 0x10000000 /* New user namespace */
28#define CLONE_NEWPID 0x20000000 /* New pid namespace */
28#define CLONE_NEWNET 0x40000000 /* New network namespace */ 29#define CLONE_NEWNET 0x40000000 /* New network namespace */
29 30
30/* 31/*
diff --git a/kernel/fork.c b/kernel/fork.c
index bab34192799..f252784f933 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -973,7 +973,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
973 unsigned long stack_start, 973 unsigned long stack_start,
974 struct pt_regs *regs, 974 struct pt_regs *regs,
975 unsigned long stack_size, 975 unsigned long stack_size,
976 int __user *parent_tidptr,
977 int __user *child_tidptr, 976 int __user *child_tidptr,
978 struct pid *pid) 977 struct pid *pid)
979{ 978{
@@ -1043,11 +1042,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1043 p->did_exec = 0; 1042 p->did_exec = 0;
1044 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1043 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1045 copy_flags(clone_flags, p); 1044 copy_flags(clone_flags, p);
1046 retval = -EFAULT;
1047 if (clone_flags & CLONE_PARENT_SETTID)
1048 if (put_user(p->pid, parent_tidptr))
1049 goto bad_fork_cleanup_delays_binfmt;
1050
1051 INIT_LIST_HEAD(&p->children); 1045 INIT_LIST_HEAD(&p->children);
1052 INIT_LIST_HEAD(&p->sibling); 1046 INIT_LIST_HEAD(&p->sibling);
1053 p->vfork_done = NULL; 1047 p->vfork_done = NULL;
@@ -1289,11 +1283,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1289 __ptrace_link(p, current->parent); 1283 __ptrace_link(p, current->parent);
1290 1284
1291 if (thread_group_leader(p)) { 1285 if (thread_group_leader(p)) {
1292 p->signal->tty = current->signal->tty; 1286 if (clone_flags & CLONE_NEWPID) {
1293 p->signal->pgrp = task_pgrp_nr(current); 1287 p->nsproxy->pid_ns->child_reaper = p;
1294 set_task_session(p, task_session_nr(current)); 1288 p->signal->tty = NULL;
1295 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1289 p->signal->pgrp = p->pid;
1296 attach_pid(p, PIDTYPE_SID, task_session(current)); 1290 set_task_session(p, p->pid);
1291 attach_pid(p, PIDTYPE_PGID, pid);
1292 attach_pid(p, PIDTYPE_SID, pid);
1293 } else {
1294 p->signal->tty = current->signal->tty;
1295 p->signal->pgrp = task_pgrp_nr(current);
1296 set_task_session(p, task_session_nr(current));
1297 attach_pid(p, PIDTYPE_PGID,
1298 task_pgrp(current));
1299 attach_pid(p, PIDTYPE_SID,
1300 task_session(current));
1301 }
1297 1302
1298 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1303 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1299 __get_cpu_var(process_counts)++; 1304 __get_cpu_var(process_counts)++;
@@ -1339,7 +1344,6 @@ bad_fork_cleanup_policy:
1339bad_fork_cleanup_cgroup: 1344bad_fork_cleanup_cgroup:
1340#endif 1345#endif
1341 cgroup_exit(p, cgroup_callbacks_done); 1346 cgroup_exit(p, cgroup_callbacks_done);
1342bad_fork_cleanup_delays_binfmt:
1343 delayacct_tsk_free(p); 1347 delayacct_tsk_free(p);
1344 if (p->binfmt) 1348 if (p->binfmt)
1345 module_put(p->binfmt->module); 1349 module_put(p->binfmt->module);
@@ -1366,7 +1370,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1366 struct task_struct *task; 1370 struct task_struct *task;
1367 struct pt_regs regs; 1371 struct pt_regs regs;
1368 1372
1369 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 1373 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1370 &init_struct_pid); 1374 &init_struct_pid);
1371 if (!IS_ERR(task)) 1375 if (!IS_ERR(task))
1372 init_idle(task, cpu); 1376 init_idle(task, cpu);
@@ -1414,7 +1418,7 @@ long do_fork(unsigned long clone_flags,
1414 } 1418 }
1415 1419
1416 p = copy_process(clone_flags, stack_start, regs, stack_size, 1420 p = copy_process(clone_flags, stack_start, regs, stack_size,
1417 parent_tidptr, child_tidptr, NULL); 1421 child_tidptr, NULL);
1418 /* 1422 /*
1419 * Do this prior waking up the new thread - the thread pointer 1423 * Do this prior waking up the new thread - the thread pointer
1420 * might get invalid after that point, if the thread exits quickly. 1424 * might get invalid after that point, if the thread exits quickly.
@@ -1422,7 +1426,16 @@ long do_fork(unsigned long clone_flags,
1422 if (!IS_ERR(p)) { 1426 if (!IS_ERR(p)) {
1423 struct completion vfork; 1427 struct completion vfork;
1424 1428
1425 nr = pid_nr(task_pid(p)); 1429 /*
1430 * this is enough to call pid_nr_ns here, but this if
1431 * improves optimisation of regular fork()
1432 */
1433 nr = (clone_flags & CLONE_NEWPID) ?
1434 task_pid_nr_ns(p, current->nsproxy->pid_ns) :
1435 task_pid_vnr(p);
1436
1437 if (clone_flags & CLONE_PARENT_SETTID)
1438 put_user(nr, parent_tidptr);
1426 1439
1427 if (clone_flags & CLONE_VFORK) { 1440 if (clone_flags & CLONE_VFORK) {
1428 p->vfork_done = &vfork; 1441 p->vfork_done = &vfork;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index c8ef7c2992e..79f871bc0ef 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -129,7 +129,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
129 129
130 get_nsproxy(old_ns); 130 get_nsproxy(old_ns);
131 131
132 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET))) 132 if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
133 CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
133 return 0; 134 return 0;
134 135
135 if (!capable(CAP_SYS_ADMIN)) { 136 if (!capable(CAP_SYS_ADMIN)) {
diff --git a/kernel/pid.c b/kernel/pid.c
index 4b17acdb862..f76097c6047 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -18,6 +18,12 @@
18 * allocation scenario when all but one out of 1 million PIDs possible are 18 * allocation scenario when all but one out of 1 million PIDs possible are
19 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE 19 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
20 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). 20 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
21 *
22 * Pid namespaces:
23 * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
24 * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
25 * Many thanks to Oleg Nesterov for comments and help
26 *
21 */ 27 */
22 28
23#include <linux/mm.h> 29#include <linux/mm.h>
@@ -456,8 +462,8 @@ static struct kmem_cache *create_pid_cachep(int nr_ids)
456 462
457 snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); 463 snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
458 cachep = kmem_cache_create(pcache->name, 464 cachep = kmem_cache_create(pcache->name,
459 /* FIXME add numerical ids here */ 465 sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
460 sizeof(struct pid), 0, SLAB_HWCACHE_ALIGN, NULL); 466 0, SLAB_HWCACHE_ALIGN, NULL);
461 if (cachep == NULL) 467 if (cachep == NULL)
462 goto err_cachep; 468 goto err_cachep;
463 469
@@ -475,19 +481,89 @@ err_alloc:
475 return NULL; 481 return NULL;
476} 482}
477 483
484static struct pid_namespace *create_pid_namespace(int level)
485{
486 struct pid_namespace *ns;
487 int i;
488
489 ns = kmalloc(sizeof(struct pid_namespace), GFP_KERNEL);
490 if (ns == NULL)
491 goto out;
492
493 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
494 if (!ns->pidmap[0].page)
495 goto out_free;
496
497 ns->pid_cachep = create_pid_cachep(level + 1);
498 if (ns->pid_cachep == NULL)
499 goto out_free_map;
500
501 kref_init(&ns->kref);
502 ns->last_pid = 0;
503 ns->child_reaper = NULL;
504 ns->level = level;
505
506 set_bit(0, ns->pidmap[0].page);
507 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
508
509 for (i = 1; i < PIDMAP_ENTRIES; i++) {
510 ns->pidmap[i].page = 0;
511 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
512 }
513
514 return ns;
515
516out_free_map:
517 kfree(ns->pidmap[0].page);
518out_free:
519 kfree(ns);
520out:
521 return ERR_PTR(-ENOMEM);
522}
523
524static void destroy_pid_namespace(struct pid_namespace *ns)
525{
526 int i;
527
528 for (i = 0; i < PIDMAP_ENTRIES; i++)
529 kfree(ns->pidmap[i].page);
530 kfree(ns);
531}
532
478struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) 533struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
479{ 534{
535 struct pid_namespace *new_ns;
536
480 BUG_ON(!old_ns); 537 BUG_ON(!old_ns);
481 get_pid_ns(old_ns); 538 new_ns = get_pid_ns(old_ns);
482 return old_ns; 539 if (!(flags & CLONE_NEWPID))
540 goto out;
541
542 new_ns = ERR_PTR(-EINVAL);
543 if (flags & CLONE_THREAD)
544 goto out_put;
545
546 new_ns = create_pid_namespace(old_ns->level + 1);
547 if (!IS_ERR(new_ns))
548 new_ns->parent = get_pid_ns(old_ns);
549
550out_put:
551 put_pid_ns(old_ns);
552out:
553 return new_ns;
483} 554}
484 555
485void free_pid_ns(struct kref *kref) 556void free_pid_ns(struct kref *kref)
486{ 557{
487 struct pid_namespace *ns; 558 struct pid_namespace *ns, *parent;
488 559
489 ns = container_of(kref, struct pid_namespace, kref); 560 ns = container_of(kref, struct pid_namespace, kref);
490 kfree(ns); 561
562 parent = ns->parent;
563 destroy_pid_namespace(ns);
564
565 if (parent != NULL)
566 put_pid_ns(parent);
491} 567}
492 568
493/* 569/*