diff options
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | kernel/fork.c | 43 | ||||
-rw-r--r-- | kernel/nsproxy.c | 3 | ||||
-rw-r--r-- | kernel/pid.c | 88 |
4 files changed, 113 insertions, 22 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index b0bf326143a9..1301c0875370 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ | 25 | #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ |
26 | #define CLONE_NEWIPC 0x08000000 /* New ipcs */ | 26 | #define CLONE_NEWIPC 0x08000000 /* New ipcs */ |
27 | #define CLONE_NEWUSER 0x10000000 /* New user namespace */ | 27 | #define CLONE_NEWUSER 0x10000000 /* New user namespace */ |
28 | #define CLONE_NEWPID 0x20000000 /* New pid namespace */ | ||
28 | #define CLONE_NEWNET 0x40000000 /* New network namespace */ | 29 | #define CLONE_NEWNET 0x40000000 /* New network namespace */ |
29 | 30 | ||
30 | /* | 31 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index bab34192799b..f252784f9330 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -973,7 +973,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
973 | unsigned long stack_start, | 973 | unsigned long stack_start, |
974 | struct pt_regs *regs, | 974 | struct pt_regs *regs, |
975 | unsigned long stack_size, | 975 | unsigned long stack_size, |
976 | int __user *parent_tidptr, | ||
977 | int __user *child_tidptr, | 976 | int __user *child_tidptr, |
978 | struct pid *pid) | 977 | struct pid *pid) |
979 | { | 978 | { |
@@ -1043,11 +1042,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1043 | p->did_exec = 0; | 1042 | p->did_exec = 0; |
1044 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | 1043 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
1045 | copy_flags(clone_flags, p); | 1044 | copy_flags(clone_flags, p); |
1046 | retval = -EFAULT; | ||
1047 | if (clone_flags & CLONE_PARENT_SETTID) | ||
1048 | if (put_user(p->pid, parent_tidptr)) | ||
1049 | goto bad_fork_cleanup_delays_binfmt; | ||
1050 | |||
1051 | INIT_LIST_HEAD(&p->children); | 1045 | INIT_LIST_HEAD(&p->children); |
1052 | INIT_LIST_HEAD(&p->sibling); | 1046 | INIT_LIST_HEAD(&p->sibling); |
1053 | p->vfork_done = NULL; | 1047 | p->vfork_done = NULL; |
@@ -1289,11 +1283,22 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1289 | __ptrace_link(p, current->parent); | 1283 | __ptrace_link(p, current->parent); |
1290 | 1284 | ||
1291 | if (thread_group_leader(p)) { | 1285 | if (thread_group_leader(p)) { |
1292 | p->signal->tty = current->signal->tty; | 1286 | if (clone_flags & CLONE_NEWPID) { |
1293 | p->signal->pgrp = task_pgrp_nr(current); | 1287 | p->nsproxy->pid_ns->child_reaper = p; |
1294 | set_task_session(p, task_session_nr(current)); | 1288 | p->signal->tty = NULL; |
1295 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); | 1289 | p->signal->pgrp = p->pid; |
1296 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1290 | set_task_session(p, p->pid); |
1291 | attach_pid(p, PIDTYPE_PGID, pid); | ||
1292 | attach_pid(p, PIDTYPE_SID, pid); | ||
1293 | } else { | ||
1294 | p->signal->tty = current->signal->tty; | ||
1295 | p->signal->pgrp = task_pgrp_nr(current); | ||
1296 | set_task_session(p, task_session_nr(current)); | ||
1297 | attach_pid(p, PIDTYPE_PGID, | ||
1298 | task_pgrp(current)); | ||
1299 | attach_pid(p, PIDTYPE_SID, | ||
1300 | task_session(current)); | ||
1301 | } | ||
1297 | 1302 | ||
1298 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1303 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
1299 | __get_cpu_var(process_counts)++; | 1304 | __get_cpu_var(process_counts)++; |
@@ -1339,7 +1344,6 @@ bad_fork_cleanup_policy: | |||
1339 | bad_fork_cleanup_cgroup: | 1344 | bad_fork_cleanup_cgroup: |
1340 | #endif | 1345 | #endif |
1341 | cgroup_exit(p, cgroup_callbacks_done); | 1346 | cgroup_exit(p, cgroup_callbacks_done); |
1342 | bad_fork_cleanup_delays_binfmt: | ||
1343 | delayacct_tsk_free(p); | 1347 | delayacct_tsk_free(p); |
1344 | if (p->binfmt) | 1348 | if (p->binfmt) |
1345 | module_put(p->binfmt->module); | 1349 | module_put(p->binfmt->module); |
@@ -1366,7 +1370,7 @@ struct task_struct * __cpuinit fork_idle(int cpu) | |||
1366 | struct task_struct *task; | 1370 | struct task_struct *task; |
1367 | struct pt_regs regs; | 1371 | struct pt_regs regs; |
1368 | 1372 | ||
1369 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, | 1373 | task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, |
1370 | &init_struct_pid); | 1374 | &init_struct_pid); |
1371 | if (!IS_ERR(task)) | 1375 | if (!IS_ERR(task)) |
1372 | init_idle(task, cpu); | 1376 | init_idle(task, cpu); |
@@ -1414,7 +1418,7 @@ long do_fork(unsigned long clone_flags, | |||
1414 | } | 1418 | } |
1415 | 1419 | ||
1416 | p = copy_process(clone_flags, stack_start, regs, stack_size, | 1420 | p = copy_process(clone_flags, stack_start, regs, stack_size, |
1417 | parent_tidptr, child_tidptr, NULL); | 1421 | child_tidptr, NULL); |
1418 | /* | 1422 | /* |
1419 | * Do this prior waking up the new thread - the thread pointer | 1423 | * Do this prior waking up the new thread - the thread pointer |
1420 | * might get invalid after that point, if the thread exits quickly. | 1424 | * might get invalid after that point, if the thread exits quickly. |
@@ -1422,7 +1426,16 @@ long do_fork(unsigned long clone_flags, | |||
1422 | if (!IS_ERR(p)) { | 1426 | if (!IS_ERR(p)) { |
1423 | struct completion vfork; | 1427 | struct completion vfork; |
1424 | 1428 | ||
1425 | nr = pid_nr(task_pid(p)); | 1429 | /* |
1430 | * this is enough to call pid_nr_ns here, but this if | ||
1431 | * improves optimisation of regular fork() | ||
1432 | */ | ||
1433 | nr = (clone_flags & CLONE_NEWPID) ? | ||
1434 | task_pid_nr_ns(p, current->nsproxy->pid_ns) : | ||
1435 | task_pid_vnr(p); | ||
1436 | |||
1437 | if (clone_flags & CLONE_PARENT_SETTID) | ||
1438 | put_user(nr, parent_tidptr); | ||
1426 | 1439 | ||
1427 | if (clone_flags & CLONE_VFORK) { | 1440 | if (clone_flags & CLONE_VFORK) { |
1428 | p->vfork_done = &vfork; | 1441 | p->vfork_done = &vfork; |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index c8ef7c2992ed..79f871bc0ef4 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -129,7 +129,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
129 | 129 | ||
130 | get_nsproxy(old_ns); | 130 | get_nsproxy(old_ns); |
131 | 131 | ||
132 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET))) | 132 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
133 | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET))) | ||
133 | return 0; | 134 | return 0; |
134 | 135 | ||
135 | if (!capable(CAP_SYS_ADMIN)) { | 136 | if (!capable(CAP_SYS_ADMIN)) { |
diff --git a/kernel/pid.c b/kernel/pid.c index 4b17acdb862f..f76097c60475 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -18,6 +18,12 @@ | |||
18 | * allocation scenario when all but one out of 1 million PIDs possible are | 18 | * allocation scenario when all but one out of 1 million PIDs possible are |
19 | * allocated already: the scanning of 32 list entries and at most PAGE_SIZE | 19 | * allocated already: the scanning of 32 list entries and at most PAGE_SIZE |
20 | * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). | 20 | * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). |
21 | * | ||
22 | * Pid namespaces: | ||
23 | * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc. | ||
24 | * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM | ||
25 | * Many thanks to Oleg Nesterov for comments and help | ||
26 | * | ||
21 | */ | 27 | */ |
22 | 28 | ||
23 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
@@ -456,8 +462,8 @@ static struct kmem_cache *create_pid_cachep(int nr_ids) | |||
456 | 462 | ||
457 | snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); | 463 | snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids); |
458 | cachep = kmem_cache_create(pcache->name, | 464 | cachep = kmem_cache_create(pcache->name, |
459 | /* FIXME add numerical ids here */ | 465 | sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid), |
460 | sizeof(struct pid), 0, SLAB_HWCACHE_ALIGN, NULL); | 466 | 0, SLAB_HWCACHE_ALIGN, NULL); |
461 | if (cachep == NULL) | 467 | if (cachep == NULL) |
462 | goto err_cachep; | 468 | goto err_cachep; |
463 | 469 | ||
@@ -475,19 +481,89 @@ err_alloc: | |||
475 | return NULL; | 481 | return NULL; |
476 | } | 482 | } |
477 | 483 | ||
484 | static struct pid_namespace *create_pid_namespace(int level) | ||
485 | { | ||
486 | struct pid_namespace *ns; | ||
487 | int i; | ||
488 | |||
489 | ns = kmalloc(sizeof(struct pid_namespace), GFP_KERNEL); | ||
490 | if (ns == NULL) | ||
491 | goto out; | ||
492 | |||
493 | ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
494 | if (!ns->pidmap[0].page) | ||
495 | goto out_free; | ||
496 | |||
497 | ns->pid_cachep = create_pid_cachep(level + 1); | ||
498 | if (ns->pid_cachep == NULL) | ||
499 | goto out_free_map; | ||
500 | |||
501 | kref_init(&ns->kref); | ||
502 | ns->last_pid = 0; | ||
503 | ns->child_reaper = NULL; | ||
504 | ns->level = level; | ||
505 | |||
506 | set_bit(0, ns->pidmap[0].page); | ||
507 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | ||
508 | |||
509 | for (i = 1; i < PIDMAP_ENTRIES; i++) { | ||
510 | ns->pidmap[i].page = 0; | ||
511 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | ||
512 | } | ||
513 | |||
514 | return ns; | ||
515 | |||
516 | out_free_map: | ||
517 | kfree(ns->pidmap[0].page); | ||
518 | out_free: | ||
519 | kfree(ns); | ||
520 | out: | ||
521 | return ERR_PTR(-ENOMEM); | ||
522 | } | ||
523 | |||
524 | static void destroy_pid_namespace(struct pid_namespace *ns) | ||
525 | { | ||
526 | int i; | ||
527 | |||
528 | for (i = 0; i < PIDMAP_ENTRIES; i++) | ||
529 | kfree(ns->pidmap[i].page); | ||
530 | kfree(ns); | ||
531 | } | ||
532 | |||
478 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | 533 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) |
479 | { | 534 | { |
535 | struct pid_namespace *new_ns; | ||
536 | |||
480 | BUG_ON(!old_ns); | 537 | BUG_ON(!old_ns); |
481 | get_pid_ns(old_ns); | 538 | new_ns = get_pid_ns(old_ns); |
482 | return old_ns; | 539 | if (!(flags & CLONE_NEWPID)) |
540 | goto out; | ||
541 | |||
542 | new_ns = ERR_PTR(-EINVAL); | ||
543 | if (flags & CLONE_THREAD) | ||
544 | goto out_put; | ||
545 | |||
546 | new_ns = create_pid_namespace(old_ns->level + 1); | ||
547 | if (!IS_ERR(new_ns)) | ||
548 | new_ns->parent = get_pid_ns(old_ns); | ||
549 | |||
550 | out_put: | ||
551 | put_pid_ns(old_ns); | ||
552 | out: | ||
553 | return new_ns; | ||
483 | } | 554 | } |
484 | 555 | ||
485 | void free_pid_ns(struct kref *kref) | 556 | void free_pid_ns(struct kref *kref) |
486 | { | 557 | { |
487 | struct pid_namespace *ns; | 558 | struct pid_namespace *ns, *parent; |
488 | 559 | ||
489 | ns = container_of(kref, struct pid_namespace, kref); | 560 | ns = container_of(kref, struct pid_namespace, kref); |
490 | kfree(ns); | 561 | |
562 | parent = ns->parent; | ||
563 | destroy_pid_namespace(ns); | ||
564 | |||
565 | if (parent != NULL) | ||
566 | put_pid_ns(parent); | ||
491 | } | 567 | } |
492 | 568 | ||
493 | /* | 569 | /* |