diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 96 |
1 files changed, 52 insertions, 44 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 2a372a0e206f..43cbf30669e6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/mount.h> | 47 | #include <linux/mount.h> |
48 | #include <linux/audit.h> | 48 | #include <linux/audit.h> |
49 | #include <linux/memcontrol.h> | 49 | #include <linux/memcontrol.h> |
50 | #include <linux/ftrace.h> | ||
50 | #include <linux/profile.h> | 51 | #include <linux/profile.h> |
51 | #include <linux/rmap.h> | 52 | #include <linux/rmap.h> |
52 | #include <linux/acct.h> | 53 | #include <linux/acct.h> |
@@ -80,6 +81,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; | |||
80 | 81 | ||
81 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 82 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
82 | 83 | ||
84 | DEFINE_TRACE(sched_process_fork); | ||
85 | |||
83 | int nr_processes(void) | 86 | int nr_processes(void) |
84 | { | 87 | { |
85 | int cpu; | 88 | int cpu; |
@@ -137,6 +140,7 @@ void free_task(struct task_struct *tsk) | |||
137 | prop_local_destroy_single(&tsk->dirties); | 140 | prop_local_destroy_single(&tsk->dirties); |
138 | free_thread_info(tsk->stack); | 141 | free_thread_info(tsk->stack); |
139 | rt_mutex_debug_task_free(tsk); | 142 | rt_mutex_debug_task_free(tsk); |
143 | ftrace_graph_exit_task(tsk); | ||
140 | free_task_struct(tsk); | 144 | free_task_struct(tsk); |
141 | } | 145 | } |
142 | EXPORT_SYMBOL(free_task); | 146 | EXPORT_SYMBOL(free_task); |
@@ -147,9 +151,8 @@ void __put_task_struct(struct task_struct *tsk) | |||
147 | WARN_ON(atomic_read(&tsk->usage)); | 151 | WARN_ON(atomic_read(&tsk->usage)); |
148 | WARN_ON(tsk == current); | 152 | WARN_ON(tsk == current); |
149 | 153 | ||
150 | security_task_free(tsk); | 154 | put_cred(tsk->real_cred); |
151 | free_uid(tsk->user); | 155 | put_cred(tsk->cred); |
152 | put_group_info(tsk->group_info); | ||
153 | delayacct_tsk_free(tsk); | 156 | delayacct_tsk_free(tsk); |
154 | 157 | ||
155 | if (!profile_handoff_task(tsk)) | 158 | if (!profile_handoff_task(tsk)) |
@@ -315,17 +318,20 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
315 | file = tmp->vm_file; | 318 | file = tmp->vm_file; |
316 | if (file) { | 319 | if (file) { |
317 | struct inode *inode = file->f_path.dentry->d_inode; | 320 | struct inode *inode = file->f_path.dentry->d_inode; |
321 | struct address_space *mapping = file->f_mapping; | ||
322 | |||
318 | get_file(file); | 323 | get_file(file); |
319 | if (tmp->vm_flags & VM_DENYWRITE) | 324 | if (tmp->vm_flags & VM_DENYWRITE) |
320 | atomic_dec(&inode->i_writecount); | 325 | atomic_dec(&inode->i_writecount); |
321 | 326 | spin_lock(&mapping->i_mmap_lock); | |
322 | /* insert tmp into the share list, just after mpnt */ | 327 | if (tmp->vm_flags & VM_SHARED) |
323 | spin_lock(&file->f_mapping->i_mmap_lock); | 328 | mapping->i_mmap_writable++; |
324 | tmp->vm_truncate_count = mpnt->vm_truncate_count; | 329 | tmp->vm_truncate_count = mpnt->vm_truncate_count; |
325 | flush_dcache_mmap_lock(file->f_mapping); | 330 | flush_dcache_mmap_lock(mapping); |
331 | /* insert tmp into the share list, just after mpnt */ | ||
326 | vma_prio_tree_add(tmp, mpnt); | 332 | vma_prio_tree_add(tmp, mpnt); |
327 | flush_dcache_mmap_unlock(file->f_mapping); | 333 | flush_dcache_mmap_unlock(mapping); |
328 | spin_unlock(&file->f_mapping->i_mmap_lock); | 334 | spin_unlock(&mapping->i_mmap_lock); |
329 | } | 335 | } |
330 | 336 | ||
331 | /* | 337 | /* |
@@ -409,8 +415,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
409 | set_mm_counter(mm, file_rss, 0); | 415 | set_mm_counter(mm, file_rss, 0); |
410 | set_mm_counter(mm, anon_rss, 0); | 416 | set_mm_counter(mm, anon_rss, 0); |
411 | spin_lock_init(&mm->page_table_lock); | 417 | spin_lock_init(&mm->page_table_lock); |
412 | rwlock_init(&mm->ioctx_list_lock); | 418 | spin_lock_init(&mm->ioctx_lock); |
413 | mm->ioctx_list = NULL; | 419 | INIT_HLIST_HEAD(&mm->ioctx_list); |
414 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 420 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
415 | mm->cached_hole_size = ~0UL; | 421 | mm->cached_hole_size = ~0UL; |
416 | mm_init_owner(mm, p); | 422 | mm_init_owner(mm, p); |
@@ -815,12 +821,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
815 | if (!sig) | 821 | if (!sig) |
816 | return -ENOMEM; | 822 | return -ENOMEM; |
817 | 823 | ||
818 | ret = copy_thread_group_keys(tsk); | ||
819 | if (ret < 0) { | ||
820 | kmem_cache_free(signal_cachep, sig); | ||
821 | return ret; | ||
822 | } | ||
823 | |||
824 | atomic_set(&sig->count, 1); | 824 | atomic_set(&sig->count, 1); |
825 | atomic_set(&sig->live, 1); | 825 | atomic_set(&sig->live, 1); |
826 | init_waitqueue_head(&sig->wait_chldexit); | 826 | init_waitqueue_head(&sig->wait_chldexit); |
@@ -865,7 +865,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
865 | void __cleanup_signal(struct signal_struct *sig) | 865 | void __cleanup_signal(struct signal_struct *sig) |
866 | { | 866 | { |
867 | thread_group_cputime_free(sig); | 867 | thread_group_cputime_free(sig); |
868 | exit_thread_group_keys(sig); | ||
869 | tty_kref_put(sig->tty); | 868 | tty_kref_put(sig->tty); |
870 | kmem_cache_free(signal_cachep, sig); | 869 | kmem_cache_free(signal_cachep, sig); |
871 | } | 870 | } |
@@ -981,16 +980,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
981 | DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); | 980 | DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); |
982 | #endif | 981 | #endif |
983 | retval = -EAGAIN; | 982 | retval = -EAGAIN; |
984 | if (atomic_read(&p->user->processes) >= | 983 | if (atomic_read(&p->real_cred->user->processes) >= |
985 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 984 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { |
986 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 985 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
987 | p->user != current->nsproxy->user_ns->root_user) | 986 | p->real_cred->user != INIT_USER) |
988 | goto bad_fork_free; | 987 | goto bad_fork_free; |
989 | } | 988 | } |
990 | 989 | ||
991 | atomic_inc(&p->user->__count); | 990 | retval = copy_creds(p, clone_flags); |
992 | atomic_inc(&p->user->processes); | 991 | if (retval < 0) |
993 | get_group_info(p->group_info); | 992 | goto bad_fork_free; |
994 | 993 | ||
995 | /* | 994 | /* |
996 | * If multiple threads are within copy_process(), then this check | 995 | * If multiple threads are within copy_process(), then this check |
@@ -1045,10 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1045 | do_posix_clock_monotonic_gettime(&p->start_time); | 1044 | do_posix_clock_monotonic_gettime(&p->start_time); |
1046 | p->real_start_time = p->start_time; | 1045 | p->real_start_time = p->start_time; |
1047 | monotonic_to_bootbased(&p->real_start_time); | 1046 | monotonic_to_bootbased(&p->real_start_time); |
1048 | #ifdef CONFIG_SECURITY | ||
1049 | p->security = NULL; | ||
1050 | #endif | ||
1051 | p->cap_bset = current->cap_bset; | ||
1052 | p->io_context = NULL; | 1047 | p->io_context = NULL; |
1053 | p->audit_context = NULL; | 1048 | p->audit_context = NULL; |
1054 | cgroup_fork(p); | 1049 | cgroup_fork(p); |
@@ -1089,14 +1084,14 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1089 | #ifdef CONFIG_DEBUG_MUTEXES | 1084 | #ifdef CONFIG_DEBUG_MUTEXES |
1090 | p->blocked_on = NULL; /* not blocked yet */ | 1085 | p->blocked_on = NULL; /* not blocked yet */ |
1091 | #endif | 1086 | #endif |
1087 | if (unlikely(ptrace_reparented(current))) | ||
1088 | ptrace_fork(p, clone_flags); | ||
1092 | 1089 | ||
1093 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1090 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1094 | sched_fork(p, clone_flags); | 1091 | sched_fork(p, clone_flags); |
1095 | 1092 | ||
1096 | if ((retval = security_task_alloc(p))) | ||
1097 | goto bad_fork_cleanup_policy; | ||
1098 | if ((retval = audit_alloc(p))) | 1093 | if ((retval = audit_alloc(p))) |
1099 | goto bad_fork_cleanup_security; | 1094 | goto bad_fork_cleanup_policy; |
1100 | /* copy all the process information */ | 1095 | /* copy all the process information */ |
1101 | if ((retval = copy_semundo(clone_flags, p))) | 1096 | if ((retval = copy_semundo(clone_flags, p))) |
1102 | goto bad_fork_cleanup_audit; | 1097 | goto bad_fork_cleanup_audit; |
@@ -1110,10 +1105,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1110 | goto bad_fork_cleanup_sighand; | 1105 | goto bad_fork_cleanup_sighand; |
1111 | if ((retval = copy_mm(clone_flags, p))) | 1106 | if ((retval = copy_mm(clone_flags, p))) |
1112 | goto bad_fork_cleanup_signal; | 1107 | goto bad_fork_cleanup_signal; |
1113 | if ((retval = copy_keys(clone_flags, p))) | ||
1114 | goto bad_fork_cleanup_mm; | ||
1115 | if ((retval = copy_namespaces(clone_flags, p))) | 1108 | if ((retval = copy_namespaces(clone_flags, p))) |
1116 | goto bad_fork_cleanup_keys; | 1109 | goto bad_fork_cleanup_mm; |
1117 | if ((retval = copy_io(clone_flags, p))) | 1110 | if ((retval = copy_io(clone_flags, p))) |
1118 | goto bad_fork_cleanup_namespaces; | 1111 | goto bad_fork_cleanup_namespaces; |
1119 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); | 1112 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); |
@@ -1133,6 +1126,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1133 | } | 1126 | } |
1134 | } | 1127 | } |
1135 | 1128 | ||
1129 | ftrace_graph_init_task(p); | ||
1130 | |||
1136 | p->pid = pid_nr(pid); | 1131 | p->pid = pid_nr(pid); |
1137 | p->tgid = p->pid; | 1132 | p->tgid = p->pid; |
1138 | if (clone_flags & CLONE_THREAD) | 1133 | if (clone_flags & CLONE_THREAD) |
@@ -1141,7 +1136,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1141 | if (current->nsproxy != p->nsproxy) { | 1136 | if (current->nsproxy != p->nsproxy) { |
1142 | retval = ns_cgroup_clone(p, pid); | 1137 | retval = ns_cgroup_clone(p, pid); |
1143 | if (retval) | 1138 | if (retval) |
1144 | goto bad_fork_free_pid; | 1139 | goto bad_fork_free_graph; |
1145 | } | 1140 | } |
1146 | 1141 | ||
1147 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1142 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
@@ -1234,7 +1229,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1234 | spin_unlock(¤t->sighand->siglock); | 1229 | spin_unlock(¤t->sighand->siglock); |
1235 | write_unlock_irq(&tasklist_lock); | 1230 | write_unlock_irq(&tasklist_lock); |
1236 | retval = -ERESTARTNOINTR; | 1231 | retval = -ERESTARTNOINTR; |
1237 | goto bad_fork_free_pid; | 1232 | goto bad_fork_free_graph; |
1238 | } | 1233 | } |
1239 | 1234 | ||
1240 | if (clone_flags & CLONE_THREAD) { | 1235 | if (clone_flags & CLONE_THREAD) { |
@@ -1271,6 +1266,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1271 | cgroup_post_fork(p); | 1266 | cgroup_post_fork(p); |
1272 | return p; | 1267 | return p; |
1273 | 1268 | ||
1269 | bad_fork_free_graph: | ||
1270 | ftrace_graph_exit_task(p); | ||
1274 | bad_fork_free_pid: | 1271 | bad_fork_free_pid: |
1275 | if (pid != &init_struct_pid) | 1272 | if (pid != &init_struct_pid) |
1276 | free_pid(pid); | 1273 | free_pid(pid); |
@@ -1278,8 +1275,6 @@ bad_fork_cleanup_io: | |||
1278 | put_io_context(p->io_context); | 1275 | put_io_context(p->io_context); |
1279 | bad_fork_cleanup_namespaces: | 1276 | bad_fork_cleanup_namespaces: |
1280 | exit_task_namespaces(p); | 1277 | exit_task_namespaces(p); |
1281 | bad_fork_cleanup_keys: | ||
1282 | exit_keys(p); | ||
1283 | bad_fork_cleanup_mm: | 1278 | bad_fork_cleanup_mm: |
1284 | if (p->mm) | 1279 | if (p->mm) |
1285 | mmput(p->mm); | 1280 | mmput(p->mm); |
@@ -1295,8 +1290,6 @@ bad_fork_cleanup_semundo: | |||
1295 | exit_sem(p); | 1290 | exit_sem(p); |
1296 | bad_fork_cleanup_audit: | 1291 | bad_fork_cleanup_audit: |
1297 | audit_free(p); | 1292 | audit_free(p); |
1298 | bad_fork_cleanup_security: | ||
1299 | security_task_free(p); | ||
1300 | bad_fork_cleanup_policy: | 1293 | bad_fork_cleanup_policy: |
1301 | #ifdef CONFIG_NUMA | 1294 | #ifdef CONFIG_NUMA |
1302 | mpol_put(p->mempolicy); | 1295 | mpol_put(p->mempolicy); |
@@ -1309,9 +1302,9 @@ bad_fork_cleanup_cgroup: | |||
1309 | bad_fork_cleanup_put_domain: | 1302 | bad_fork_cleanup_put_domain: |
1310 | module_put(task_thread_info(p)->exec_domain->module); | 1303 | module_put(task_thread_info(p)->exec_domain->module); |
1311 | bad_fork_cleanup_count: | 1304 | bad_fork_cleanup_count: |
1312 | put_group_info(p->group_info); | 1305 | atomic_dec(&p->cred->user->processes); |
1313 | atomic_dec(&p->user->processes); | 1306 | put_cred(p->real_cred); |
1314 | free_uid(p->user); | 1307 | put_cred(p->cred); |
1315 | bad_fork_free: | 1308 | bad_fork_free: |
1316 | free_task(p); | 1309 | free_task(p); |
1317 | fork_out: | 1310 | fork_out: |
@@ -1355,6 +1348,21 @@ long do_fork(unsigned long clone_flags, | |||
1355 | long nr; | 1348 | long nr; |
1356 | 1349 | ||
1357 | /* | 1350 | /* |
1351 | * Do some preliminary argument and permissions checking before we | ||
1352 | * actually start allocating stuff | ||
1353 | */ | ||
1354 | if (clone_flags & CLONE_NEWUSER) { | ||
1355 | if (clone_flags & CLONE_THREAD) | ||
1356 | return -EINVAL; | ||
1357 | /* hopefully this check will go away when userns support is | ||
1358 | * complete | ||
1359 | */ | ||
1360 | if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || | ||
1361 | !capable(CAP_SETGID)) | ||
1362 | return -EPERM; | ||
1363 | } | ||
1364 | |||
1365 | /* | ||
1358 | * We hope to recycle these flags after 2.6.26 | 1366 | * We hope to recycle these flags after 2.6.26 |
1359 | */ | 1367 | */ |
1360 | if (unlikely(clone_flags & CLONE_STOPPED)) { | 1368 | if (unlikely(clone_flags & CLONE_STOPPED)) { |
@@ -1398,6 +1406,7 @@ long do_fork(unsigned long clone_flags, | |||
1398 | init_completion(&vfork); | 1406 | init_completion(&vfork); |
1399 | } | 1407 | } |
1400 | 1408 | ||
1409 | audit_finish_fork(p); | ||
1401 | tracehook_report_clone(trace, regs, clone_flags, nr, p); | 1410 | tracehook_report_clone(trace, regs, clone_flags, nr, p); |
1402 | 1411 | ||
1403 | /* | 1412 | /* |
@@ -1601,8 +1610,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1601 | err = -EINVAL; | 1610 | err = -EINVAL; |
1602 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1611 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
1603 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1612 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
1604 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| | 1613 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) |
1605 | CLONE_NEWNET)) | ||
1606 | goto bad_unshare_out; | 1614 | goto bad_unshare_out; |
1607 | 1615 | ||
1608 | /* | 1616 | /* |