aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c133
1 files changed, 84 insertions, 49 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 2ce28f165e31..ddafdfac9456 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -29,7 +29,7 @@
29#include <linux/nsproxy.h> 29#include <linux/nsproxy.h>
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/cpu.h> 31#include <linux/cpu.h>
32#include <linux/cpuset.h> 32#include <linux/cgroup.h>
33#include <linux/security.h> 33#include <linux/security.h>
34#include <linux/swap.h> 34#include <linux/swap.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
@@ -50,6 +50,7 @@
50#include <linux/taskstats_kern.h> 50#include <linux/taskstats_kern.h>
51#include <linux/random.h> 51#include <linux/random.h>
52#include <linux/tty.h> 52#include <linux/tty.h>
53#include <linux/proc_fs.h>
53 54
54#include <asm/pgtable.h> 55#include <asm/pgtable.h>
55#include <asm/pgalloc.h> 56#include <asm/pgalloc.h>
@@ -116,7 +117,7 @@ EXPORT_SYMBOL(free_task);
116 117
117void __put_task_struct(struct task_struct *tsk) 118void __put_task_struct(struct task_struct *tsk)
118{ 119{
119 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); 120 WARN_ON(!tsk->exit_state);
120 WARN_ON(atomic_read(&tsk->usage)); 121 WARN_ON(atomic_read(&tsk->usage));
121 WARN_ON(tsk == current); 122 WARN_ON(tsk == current);
122 123
@@ -205,7 +206,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
205} 206}
206 207
207#ifdef CONFIG_MMU 208#ifdef CONFIG_MMU
208static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) 209static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
209{ 210{
210 struct vm_area_struct *mpnt, *tmp, **pprev; 211 struct vm_area_struct *mpnt, *tmp, **pprev;
211 struct rb_node **rb_link, *rb_parent; 212 struct rb_node **rb_link, *rb_parent;
@@ -583,7 +584,7 @@ fail_nomem:
583 return retval; 584 return retval;
584} 585}
585 586
586static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 587static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
587{ 588{
588 struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); 589 struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
589 /* We don't need to lock fs - think why ;-) */ 590 /* We don't need to lock fs - think why ;-) */
@@ -615,7 +616,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
615 616
616EXPORT_SYMBOL_GPL(copy_fs_struct); 617EXPORT_SYMBOL_GPL(copy_fs_struct);
617 618
618static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) 619static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
619{ 620{
620 if (clone_flags & CLONE_FS) { 621 if (clone_flags & CLONE_FS) {
621 atomic_inc(&current->fs->count); 622 atomic_inc(&current->fs->count);
@@ -818,7 +819,7 @@ int unshare_files(void)
818 819
819EXPORT_SYMBOL(unshare_files); 820EXPORT_SYMBOL(unshare_files);
820 821
821static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) 822static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
822{ 823{
823 struct sighand_struct *sig; 824 struct sighand_struct *sig;
824 825
@@ -841,7 +842,7 @@ void __cleanup_sighand(struct sighand_struct *sighand)
841 kmem_cache_free(sighand_cachep, sighand); 842 kmem_cache_free(sighand_cachep, sighand);
842} 843}
843 844
844static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) 845static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
845{ 846{
846 struct signal_struct *sig; 847 struct signal_struct *sig;
847 int ret; 848 int ret;
@@ -923,7 +924,7 @@ void __cleanup_signal(struct signal_struct *sig)
923 kmem_cache_free(signal_cachep, sig); 924 kmem_cache_free(signal_cachep, sig);
924} 925}
925 926
926static inline void cleanup_signal(struct task_struct *tsk) 927static void cleanup_signal(struct task_struct *tsk)
927{ 928{
928 struct signal_struct *sig = tsk->signal; 929 struct signal_struct *sig = tsk->signal;
929 930
@@ -933,7 +934,7 @@ static inline void cleanup_signal(struct task_struct *tsk)
933 __cleanup_signal(sig); 934 __cleanup_signal(sig);
934} 935}
935 936
936static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) 937static void copy_flags(unsigned long clone_flags, struct task_struct *p)
937{ 938{
938 unsigned long new_flags = p->flags; 939 unsigned long new_flags = p->flags;
939 940
@@ -949,10 +950,10 @@ asmlinkage long sys_set_tid_address(int __user *tidptr)
949{ 950{
950 current->clear_child_tid = tidptr; 951 current->clear_child_tid = tidptr;
951 952
952 return current->pid; 953 return task_pid_vnr(current);
953} 954}
954 955
955static inline void rt_mutex_init_task(struct task_struct *p) 956static void rt_mutex_init_task(struct task_struct *p)
956{ 957{
957 spin_lock_init(&p->pi_lock); 958 spin_lock_init(&p->pi_lock);
958#ifdef CONFIG_RT_MUTEXES 959#ifdef CONFIG_RT_MUTEXES
@@ -973,12 +974,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
973 unsigned long stack_start, 974 unsigned long stack_start,
974 struct pt_regs *regs, 975 struct pt_regs *regs,
975 unsigned long stack_size, 976 unsigned long stack_size,
976 int __user *parent_tidptr,
977 int __user *child_tidptr, 977 int __user *child_tidptr,
978 struct pid *pid) 978 struct pid *pid)
979{ 979{
980 int retval; 980 int retval;
981 struct task_struct *p = NULL; 981 struct task_struct *p;
982 int cgroup_callbacks_done = 0;
982 983
983 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 984 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
984 return ERR_PTR(-EINVAL); 985 return ERR_PTR(-EINVAL);
@@ -1042,12 +1043,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1042 p->did_exec = 0; 1043 p->did_exec = 0;
1043 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 1044 delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
1044 copy_flags(clone_flags, p); 1045 copy_flags(clone_flags, p);
1045 p->pid = pid_nr(pid);
1046 retval = -EFAULT;
1047 if (clone_flags & CLONE_PARENT_SETTID)
1048 if (put_user(p->pid, parent_tidptr))
1049 goto bad_fork_cleanup_delays_binfmt;
1050
1051 INIT_LIST_HEAD(&p->children); 1046 INIT_LIST_HEAD(&p->children);
1052 INIT_LIST_HEAD(&p->sibling); 1047 INIT_LIST_HEAD(&p->sibling);
1053 p->vfork_done = NULL; 1048 p->vfork_done = NULL;
@@ -1087,13 +1082,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1087#endif 1082#endif
1088 p->io_context = NULL; 1083 p->io_context = NULL;
1089 p->audit_context = NULL; 1084 p->audit_context = NULL;
1090 cpuset_fork(p); 1085 cgroup_fork(p);
1091#ifdef CONFIG_NUMA 1086#ifdef CONFIG_NUMA
1092 p->mempolicy = mpol_copy(p->mempolicy); 1087 p->mempolicy = mpol_copy(p->mempolicy);
1093 if (IS_ERR(p->mempolicy)) { 1088 if (IS_ERR(p->mempolicy)) {
1094 retval = PTR_ERR(p->mempolicy); 1089 retval = PTR_ERR(p->mempolicy);
1095 p->mempolicy = NULL; 1090 p->mempolicy = NULL;
1096 goto bad_fork_cleanup_cpuset; 1091 goto bad_fork_cleanup_cgroup;
1097 } 1092 }
1098 mpol_fix_fork_child_flag(p); 1093 mpol_fix_fork_child_flag(p);
1099#endif 1094#endif
@@ -1126,10 +1121,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1126 p->blocked_on = NULL; /* not blocked yet */ 1121 p->blocked_on = NULL; /* not blocked yet */
1127#endif 1122#endif
1128 1123
1129 p->tgid = p->pid;
1130 if (clone_flags & CLONE_THREAD)
1131 p->tgid = current->tgid;
1132
1133 if ((retval = security_task_alloc(p))) 1124 if ((retval = security_task_alloc(p)))
1134 goto bad_fork_cleanup_policy; 1125 goto bad_fork_cleanup_policy;
1135 if ((retval = audit_alloc(p))) 1126 if ((retval = audit_alloc(p)))
@@ -1155,6 +1146,24 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1155 if (retval) 1146 if (retval)
1156 goto bad_fork_cleanup_namespaces; 1147 goto bad_fork_cleanup_namespaces;
1157 1148
1149 if (pid != &init_struct_pid) {
1150 retval = -ENOMEM;
1151 pid = alloc_pid(task_active_pid_ns(p));
1152 if (!pid)
1153 goto bad_fork_cleanup_namespaces;
1154
1155 if (clone_flags & CLONE_NEWPID) {
1156 retval = pid_ns_prepare_proc(task_active_pid_ns(p));
1157 if (retval < 0)
1158 goto bad_fork_free_pid;
1159 }
1160 }
1161
1162 p->pid = pid_nr(pid);
1163 p->tgid = p->pid;
1164 if (clone_flags & CLONE_THREAD)
1165 p->tgid = current->tgid;
1166
1158 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1167 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1159 /* 1168 /*
1160 * Clear TID on mm_release()? 1169 * Clear TID on mm_release()?
@@ -1204,6 +1213,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1204 /* Perform scheduler related setup. Assign this task to a CPU. */ 1213 /* Perform scheduler related setup. Assign this task to a CPU. */
1205 sched_fork(p, clone_flags); 1214 sched_fork(p, clone_flags);
1206 1215
1216 /* Now that the task is set up, run cgroup callbacks if
1217 * necessary. We need to run them before the task is visible
1218 * on the tasklist. */
1219 cgroup_fork_callbacks(p);
1220 cgroup_callbacks_done = 1;
1221
1207 /* Need tasklist lock for parent etc handling! */ 1222 /* Need tasklist lock for parent etc handling! */
1208 write_lock_irq(&tasklist_lock); 1223 write_lock_irq(&tasklist_lock);
1209 1224
@@ -1246,7 +1261,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1246 spin_unlock(&current->sighand->siglock); 1261 spin_unlock(&current->sighand->siglock);
1247 write_unlock_irq(&tasklist_lock); 1262 write_unlock_irq(&tasklist_lock);
1248 retval = -ERESTARTNOINTR; 1263 retval = -ERESTARTNOINTR;
1249 goto bad_fork_cleanup_namespaces; 1264 goto bad_fork_free_pid;
1250 } 1265 }
1251 1266
1252 if (clone_flags & CLONE_THREAD) { 1267 if (clone_flags & CLONE_THREAD) {
@@ -1275,11 +1290,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1275 __ptrace_link(p, current->parent); 1290 __ptrace_link(p, current->parent);
1276 1291
1277 if (thread_group_leader(p)) { 1292 if (thread_group_leader(p)) {
1278 p->signal->tty = current->signal->tty; 1293 if (clone_flags & CLONE_NEWPID) {
1279 p->signal->pgrp = process_group(current); 1294 p->nsproxy->pid_ns->child_reaper = p;
1280 set_signal_session(p->signal, process_session(current)); 1295 p->signal->tty = NULL;
1281 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1296 set_task_pgrp(p, p->pid);
1282 attach_pid(p, PIDTYPE_SID, task_session(current)); 1297 set_task_session(p, p->pid);
1298 attach_pid(p, PIDTYPE_PGID, pid);
1299 attach_pid(p, PIDTYPE_SID, pid);
1300 } else {
1301 p->signal->tty = current->signal->tty;
1302 set_task_pgrp(p, task_pgrp_nr(current));
1303 set_task_session(p, task_session_nr(current));
1304 attach_pid(p, PIDTYPE_PGID,
1305 task_pgrp(current));
1306 attach_pid(p, PIDTYPE_SID,
1307 task_session(current));
1308 }
1283 1309
1284 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1310 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1285 __get_cpu_var(process_counts)++; 1311 __get_cpu_var(process_counts)++;
@@ -1292,8 +1318,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1292 spin_unlock(&current->sighand->siglock); 1318 spin_unlock(&current->sighand->siglock);
1293 write_unlock_irq(&tasklist_lock); 1319 write_unlock_irq(&tasklist_lock);
1294 proc_fork_connector(p); 1320 proc_fork_connector(p);
1321 cgroup_post_fork(p);
1295 return p; 1322 return p;
1296 1323
1324bad_fork_free_pid:
1325 if (pid != &init_struct_pid)
1326 free_pid(pid);
1297bad_fork_cleanup_namespaces: 1327bad_fork_cleanup_namespaces:
1298 exit_task_namespaces(p); 1328 exit_task_namespaces(p);
1299bad_fork_cleanup_keys: 1329bad_fork_cleanup_keys:
@@ -1318,10 +1348,9 @@ bad_fork_cleanup_security:
1318bad_fork_cleanup_policy: 1348bad_fork_cleanup_policy:
1319#ifdef CONFIG_NUMA 1349#ifdef CONFIG_NUMA
1320 mpol_free(p->mempolicy); 1350 mpol_free(p->mempolicy);
1321bad_fork_cleanup_cpuset: 1351bad_fork_cleanup_cgroup:
1322#endif 1352#endif
1323 cpuset_exit(p); 1353 cgroup_exit(p, cgroup_callbacks_done);
1324bad_fork_cleanup_delays_binfmt:
1325 delayacct_tsk_free(p); 1354 delayacct_tsk_free(p);
1326 if (p->binfmt) 1355 if (p->binfmt)
1327 module_put(p->binfmt->module); 1356 module_put(p->binfmt->module);
@@ -1348,7 +1377,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1348 struct task_struct *task; 1377 struct task_struct *task;
1349 struct pt_regs regs; 1378 struct pt_regs regs;
1350 1379
1351 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 1380 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1352 &init_struct_pid); 1381 &init_struct_pid);
1353 if (!IS_ERR(task)) 1382 if (!IS_ERR(task))
1354 init_idle(task, cpu); 1383 init_idle(task, cpu);
@@ -1356,7 +1385,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1356 return task; 1385 return task;
1357} 1386}
1358 1387
1359static inline int fork_traceflag (unsigned clone_flags) 1388static int fork_traceflag(unsigned clone_flags)
1360{ 1389{
1361 if (clone_flags & CLONE_UNTRACED) 1390 if (clone_flags & CLONE_UNTRACED)
1362 return 0; 1391 return 0;
@@ -1387,19 +1416,16 @@ long do_fork(unsigned long clone_flags,
1387{ 1416{
1388 struct task_struct *p; 1417 struct task_struct *p;
1389 int trace = 0; 1418 int trace = 0;
1390 struct pid *pid = alloc_pid();
1391 long nr; 1419 long nr;
1392 1420
1393 if (!pid)
1394 return -EAGAIN;
1395 nr = pid->nr;
1396 if (unlikely(current->ptrace)) { 1421 if (unlikely(current->ptrace)) {
1397 trace = fork_traceflag (clone_flags); 1422 trace = fork_traceflag (clone_flags);
1398 if (trace) 1423 if (trace)
1399 clone_flags |= CLONE_PTRACE; 1424 clone_flags |= CLONE_PTRACE;
1400 } 1425 }
1401 1426
1402 p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 1427 p = copy_process(clone_flags, stack_start, regs, stack_size,
1428 child_tidptr, NULL);
1403 /* 1429 /*
1404 * Do this prior waking up the new thread - the thread pointer 1430 * Do this prior waking up the new thread - the thread pointer
1405 * might get invalid after that point, if the thread exits quickly. 1431 * might get invalid after that point, if the thread exits quickly.
@@ -1407,6 +1433,17 @@ long do_fork(unsigned long clone_flags,
1407 if (!IS_ERR(p)) { 1433 if (!IS_ERR(p)) {
1408 struct completion vfork; 1434 struct completion vfork;
1409 1435
1436 /*
1437 * this is enough to call pid_nr_ns here, but this if
1438 * improves optimisation of regular fork()
1439 */
1440 nr = (clone_flags & CLONE_NEWPID) ?
1441 task_pid_nr_ns(p, current->nsproxy->pid_ns) :
1442 task_pid_vnr(p);
1443
1444 if (clone_flags & CLONE_PARENT_SETTID)
1445 put_user(nr, parent_tidptr);
1446
1410 if (clone_flags & CLONE_VFORK) { 1447 if (clone_flags & CLONE_VFORK) {
1411 p->vfork_done = &vfork; 1448 p->vfork_done = &vfork;
1412 init_completion(&vfork); 1449 init_completion(&vfork);
@@ -1440,7 +1477,6 @@ long do_fork(unsigned long clone_flags,
1440 } 1477 }
1441 } 1478 }
1442 } else { 1479 } else {
1443 free_pid(pid);
1444 nr = PTR_ERR(p); 1480 nr = PTR_ERR(p);
1445 } 1481 }
1446 return nr; 1482 return nr;
@@ -1485,7 +1521,7 @@ void __init proc_caches_init(void)
1485 * Check constraints on flags passed to the unshare system call and 1521 * Check constraints on flags passed to the unshare system call and
1486 * force unsharing of additional process context as appropriate. 1522 * force unsharing of additional process context as appropriate.
1487 */ 1523 */
1488static inline void check_unshare_flags(unsigned long *flags_ptr) 1524static void check_unshare_flags(unsigned long *flags_ptr)
1489{ 1525{
1490 /* 1526 /*
1491 * If unsharing a thread from a thread group, must also 1527 * If unsharing a thread from a thread group, must also
@@ -1617,7 +1653,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
1617 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; 1653 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1618 struct files_struct *fd, *new_fd = NULL; 1654 struct files_struct *fd, *new_fd = NULL;
1619 struct sem_undo_list *new_ulist = NULL; 1655 struct sem_undo_list *new_ulist = NULL;
1620 struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL; 1656 struct nsproxy *new_nsproxy = NULL;
1621 1657
1622 check_unshare_flags(&unshare_flags); 1658 check_unshare_flags(&unshare_flags);
1623 1659
@@ -1647,14 +1683,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
1647 1683
1648 if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { 1684 if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) {
1649 1685
1650 task_lock(current);
1651
1652 if (new_nsproxy) { 1686 if (new_nsproxy) {
1653 old_nsproxy = current->nsproxy; 1687 switch_task_namespaces(current, new_nsproxy);
1654 current->nsproxy = new_nsproxy; 1688 new_nsproxy = NULL;
1655 new_nsproxy = old_nsproxy;
1656 } 1689 }
1657 1690
1691 task_lock(current);
1692
1658 if (new_fs) { 1693 if (new_fs) {
1659 fs = current->fs; 1694 fs = current->fs;
1660 current->fs = new_fs; 1695 current->fs = new_fs;