aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/pid.c
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@openvz.org>2007-10-19 02:40:10 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:39 -0400
commit30e49c263e36341b60b735cbef5ca37912549264 (patch)
tree103e74c41db97476ae38cdd4ffc18e4da03f28e8 /kernel/pid.c
parentb461cc03828c743aed6b3855b9ab0d39a9d54ec5 (diff)
pid namespaces: allow cloning of new namespace
When clone() is invoked with CLONE_NEWPID, create a new pid namespace and then create a new struct pid for the new process. Allocate pid_t's for the new process in the new pid namespace and all ancestor pid namespaces. Make the newly cloned process the session and process group leader. Since the active pid namespace is special and expected to be the first entry in pid->upid_list, preserve the order of pid namespaces. The size of 'struct pid' is dependent on the the number of pid namespaces the process exists in, so we use multiple pid-caches'. Only one pid cache is created during system startup and this used by processes that exist only in init_pid_ns. When a process clones its pid namespace, we create additional pid caches as necessary and use the pid cache to allocate 'struct pids' for that depth. Note, that with this patch the newly created namespace won't work, since the rest of the kernel still uses global pids, but this is to be fixed soon. Init pid namespace still works. [oleg@tv-sign.ru: merge fix] Signed-off-by: Pavel Emelyanov <xemul@openvz.org> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Cc: Paul Menage <menage@google.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/pid.c')
-rw-r--r--kernel/pid.c88
1 files changed, 82 insertions, 6 deletions
diff --git a/kernel/pid.c b/kernel/pid.c
index 4b17acdb862f..f76097c60475 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -18,6 +18,12 @@
18 * allocation scenario when all but one out of 1 million PIDs possible are 18 * allocation scenario when all but one out of 1 million PIDs possible are
19 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE 19 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
20 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). 20 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
21 *
22 * Pid namespaces:
23 * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
24 * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
25 * Many thanks to Oleg Nesterov for comments and help
26 *
21 */ 27 */
22 28
23#include <linux/mm.h> 29#include <linux/mm.h>
@@ -456,8 +462,8 @@ static struct kmem_cache *create_pid_cachep(int nr_ids)
456 462
457 snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); 463 snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
458 cachep = kmem_cache_create(pcache->name, 464 cachep = kmem_cache_create(pcache->name,
459 /* FIXME add numerical ids here */ 465 sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
460 sizeof(struct pid), 0, SLAB_HWCACHE_ALIGN, NULL); 466 0, SLAB_HWCACHE_ALIGN, NULL);
461 if (cachep == NULL) 467 if (cachep == NULL)
462 goto err_cachep; 468 goto err_cachep;
463 469
@@ -475,19 +481,89 @@ err_alloc:
475 return NULL; 481 return NULL;
476} 482}
477 483
484static struct pid_namespace *create_pid_namespace(int level)
485{
486 struct pid_namespace *ns;
487 int i;
488
489 ns = kmalloc(sizeof(struct pid_namespace), GFP_KERNEL);
490 if (ns == NULL)
491 goto out;
492
493 ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
494 if (!ns->pidmap[0].page)
495 goto out_free;
496
497 ns->pid_cachep = create_pid_cachep(level + 1);
498 if (ns->pid_cachep == NULL)
499 goto out_free_map;
500
501 kref_init(&ns->kref);
502 ns->last_pid = 0;
503 ns->child_reaper = NULL;
504 ns->level = level;
505
506 set_bit(0, ns->pidmap[0].page);
507 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
508
509 for (i = 1; i < PIDMAP_ENTRIES; i++) {
510 ns->pidmap[i].page = 0;
511 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
512 }
513
514 return ns;
515
516out_free_map:
517 kfree(ns->pidmap[0].page);
518out_free:
519 kfree(ns);
520out:
521 return ERR_PTR(-ENOMEM);
522}
523
524static void destroy_pid_namespace(struct pid_namespace *ns)
525{
526 int i;
527
528 for (i = 0; i < PIDMAP_ENTRIES; i++)
529 kfree(ns->pidmap[i].page);
530 kfree(ns);
531}
532
478struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) 533struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
479{ 534{
535 struct pid_namespace *new_ns;
536
480 BUG_ON(!old_ns); 537 BUG_ON(!old_ns);
481 get_pid_ns(old_ns); 538 new_ns = get_pid_ns(old_ns);
482 return old_ns; 539 if (!(flags & CLONE_NEWPID))
540 goto out;
541
542 new_ns = ERR_PTR(-EINVAL);
543 if (flags & CLONE_THREAD)
544 goto out_put;
545
546 new_ns = create_pid_namespace(old_ns->level + 1);
547 if (!IS_ERR(new_ns))
548 new_ns->parent = get_pid_ns(old_ns);
549
550out_put:
551 put_pid_ns(old_ns);
552out:
553 return new_ns;
483} 554}
484 555
485void free_pid_ns(struct kref *kref) 556void free_pid_ns(struct kref *kref)
486{ 557{
487 struct pid_namespace *ns; 558 struct pid_namespace *ns, *parent;
488 559
489 ns = container_of(kref, struct pid_namespace, kref); 560 ns = container_of(kref, struct pid_namespace, kref);
490 kfree(ns); 561
562 parent = ns->parent;
563 destroy_pid_namespace(ns);
564
565 if (parent != NULL)
566 put_pid_ns(parent);
491} 567}
492 568
493/* 569/*