diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 101 |
1 files changed, 58 insertions, 43 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 495da2e9a8b4..4018308048cf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/mount.h> | 47 | #include <linux/mount.h> |
48 | #include <linux/audit.h> | 48 | #include <linux/audit.h> |
49 | #include <linux/memcontrol.h> | 49 | #include <linux/memcontrol.h> |
50 | #include <linux/ftrace.h> | ||
50 | #include <linux/profile.h> | 51 | #include <linux/profile.h> |
51 | #include <linux/rmap.h> | 52 | #include <linux/rmap.h> |
52 | #include <linux/acct.h> | 53 | #include <linux/acct.h> |
@@ -80,6 +81,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; | |||
80 | 81 | ||
81 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 82 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
82 | 83 | ||
84 | DEFINE_TRACE(sched_process_fork); | ||
85 | |||
83 | int nr_processes(void) | 86 | int nr_processes(void) |
84 | { | 87 | { |
85 | int cpu; | 88 | int cpu; |
@@ -137,6 +140,7 @@ void free_task(struct task_struct *tsk) | |||
137 | prop_local_destroy_single(&tsk->dirties); | 140 | prop_local_destroy_single(&tsk->dirties); |
138 | free_thread_info(tsk->stack); | 141 | free_thread_info(tsk->stack); |
139 | rt_mutex_debug_task_free(tsk); | 142 | rt_mutex_debug_task_free(tsk); |
143 | ftrace_graph_exit_task(tsk); | ||
140 | free_task_struct(tsk); | 144 | free_task_struct(tsk); |
141 | } | 145 | } |
142 | EXPORT_SYMBOL(free_task); | 146 | EXPORT_SYMBOL(free_task); |
@@ -147,9 +151,8 @@ void __put_task_struct(struct task_struct *tsk) | |||
147 | WARN_ON(atomic_read(&tsk->usage)); | 151 | WARN_ON(atomic_read(&tsk->usage)); |
148 | WARN_ON(tsk == current); | 152 | WARN_ON(tsk == current); |
149 | 153 | ||
150 | security_task_free(tsk); | 154 | put_cred(tsk->real_cred); |
151 | free_uid(tsk->user); | 155 | put_cred(tsk->cred); |
152 | put_group_info(tsk->group_info); | ||
153 | delayacct_tsk_free(tsk); | 156 | delayacct_tsk_free(tsk); |
154 | 157 | ||
155 | if (!profile_handoff_task(tsk)) | 158 | if (!profile_handoff_task(tsk)) |
@@ -397,6 +400,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); | |||
397 | #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) | 400 | #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) |
398 | #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) | 401 | #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) |
399 | 402 | ||
403 | static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; | ||
404 | |||
405 | static int __init coredump_filter_setup(char *s) | ||
406 | { | ||
407 | default_dump_filter = | ||
408 | (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & | ||
409 | MMF_DUMP_FILTER_MASK; | ||
410 | return 1; | ||
411 | } | ||
412 | |||
413 | __setup("coredump_filter=", coredump_filter_setup); | ||
414 | |||
400 | #include <linux/init_task.h> | 415 | #include <linux/init_task.h> |
401 | 416 | ||
402 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 417 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
@@ -405,15 +420,14 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
405 | atomic_set(&mm->mm_count, 1); | 420 | atomic_set(&mm->mm_count, 1); |
406 | init_rwsem(&mm->mmap_sem); | 421 | init_rwsem(&mm->mmap_sem); |
407 | INIT_LIST_HEAD(&mm->mmlist); | 422 | INIT_LIST_HEAD(&mm->mmlist); |
408 | mm->flags = (current->mm) ? current->mm->flags | 423 | mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; |
409 | : MMF_DUMP_FILTER_DEFAULT; | ||
410 | mm->core_state = NULL; | 424 | mm->core_state = NULL; |
411 | mm->nr_ptes = 0; | 425 | mm->nr_ptes = 0; |
412 | set_mm_counter(mm, file_rss, 0); | 426 | set_mm_counter(mm, file_rss, 0); |
413 | set_mm_counter(mm, anon_rss, 0); | 427 | set_mm_counter(mm, anon_rss, 0); |
414 | spin_lock_init(&mm->page_table_lock); | 428 | spin_lock_init(&mm->page_table_lock); |
415 | rwlock_init(&mm->ioctx_list_lock); | 429 | spin_lock_init(&mm->ioctx_lock); |
416 | mm->ioctx_list = NULL; | 430 | INIT_HLIST_HEAD(&mm->ioctx_list); |
417 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 431 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
418 | mm->cached_hole_size = ~0UL; | 432 | mm->cached_hole_size = ~0UL; |
419 | mm_init_owner(mm, p); | 433 | mm_init_owner(mm, p); |
@@ -755,7 +769,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) | |||
755 | { | 769 | { |
756 | struct sighand_struct *sig; | 770 | struct sighand_struct *sig; |
757 | 771 | ||
758 | if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) { | 772 | if (clone_flags & CLONE_SIGHAND) { |
759 | atomic_inc(¤t->sighand->count); | 773 | atomic_inc(¤t->sighand->count); |
760 | return 0; | 774 | return 0; |
761 | } | 775 | } |
@@ -818,12 +832,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
818 | if (!sig) | 832 | if (!sig) |
819 | return -ENOMEM; | 833 | return -ENOMEM; |
820 | 834 | ||
821 | ret = copy_thread_group_keys(tsk); | ||
822 | if (ret < 0) { | ||
823 | kmem_cache_free(signal_cachep, sig); | ||
824 | return ret; | ||
825 | } | ||
826 | |||
827 | atomic_set(&sig->count, 1); | 835 | atomic_set(&sig->count, 1); |
828 | atomic_set(&sig->live, 1); | 836 | atomic_set(&sig->live, 1); |
829 | init_waitqueue_head(&sig->wait_chldexit); | 837 | init_waitqueue_head(&sig->wait_chldexit); |
@@ -868,7 +876,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
868 | void __cleanup_signal(struct signal_struct *sig) | 876 | void __cleanup_signal(struct signal_struct *sig) |
869 | { | 877 | { |
870 | thread_group_cputime_free(sig); | 878 | thread_group_cputime_free(sig); |
871 | exit_thread_group_keys(sig); | ||
872 | tty_kref_put(sig->tty); | 879 | tty_kref_put(sig->tty); |
873 | kmem_cache_free(signal_cachep, sig); | 880 | kmem_cache_free(signal_cachep, sig); |
874 | } | 881 | } |
@@ -984,16 +991,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
984 | DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); | 991 | DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); |
985 | #endif | 992 | #endif |
986 | retval = -EAGAIN; | 993 | retval = -EAGAIN; |
987 | if (atomic_read(&p->user->processes) >= | 994 | if (atomic_read(&p->real_cred->user->processes) >= |
988 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 995 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { |
989 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 996 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
990 | p->user != current->nsproxy->user_ns->root_user) | 997 | p->real_cred->user != INIT_USER) |
991 | goto bad_fork_free; | 998 | goto bad_fork_free; |
992 | } | 999 | } |
993 | 1000 | ||
994 | atomic_inc(&p->user->__count); | 1001 | retval = copy_creds(p, clone_flags); |
995 | atomic_inc(&p->user->processes); | 1002 | if (retval < 0) |
996 | get_group_info(p->group_info); | 1003 | goto bad_fork_free; |
997 | 1004 | ||
998 | /* | 1005 | /* |
999 | * If multiple threads are within copy_process(), then this check | 1006 | * If multiple threads are within copy_process(), then this check |
@@ -1048,10 +1055,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1048 | do_posix_clock_monotonic_gettime(&p->start_time); | 1055 | do_posix_clock_monotonic_gettime(&p->start_time); |
1049 | p->real_start_time = p->start_time; | 1056 | p->real_start_time = p->start_time; |
1050 | monotonic_to_bootbased(&p->real_start_time); | 1057 | monotonic_to_bootbased(&p->real_start_time); |
1051 | #ifdef CONFIG_SECURITY | ||
1052 | p->security = NULL; | ||
1053 | #endif | ||
1054 | p->cap_bset = current->cap_bset; | ||
1055 | p->io_context = NULL; | 1058 | p->io_context = NULL; |
1056 | p->audit_context = NULL; | 1059 | p->audit_context = NULL; |
1057 | cgroup_fork(p); | 1060 | cgroup_fork(p); |
@@ -1092,14 +1095,14 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1092 | #ifdef CONFIG_DEBUG_MUTEXES | 1095 | #ifdef CONFIG_DEBUG_MUTEXES |
1093 | p->blocked_on = NULL; /* not blocked yet */ | 1096 | p->blocked_on = NULL; /* not blocked yet */ |
1094 | #endif | 1097 | #endif |
1098 | if (unlikely(ptrace_reparented(current))) | ||
1099 | ptrace_fork(p, clone_flags); | ||
1095 | 1100 | ||
1096 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1101 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1097 | sched_fork(p, clone_flags); | 1102 | sched_fork(p, clone_flags); |
1098 | 1103 | ||
1099 | if ((retval = security_task_alloc(p))) | ||
1100 | goto bad_fork_cleanup_policy; | ||
1101 | if ((retval = audit_alloc(p))) | 1104 | if ((retval = audit_alloc(p))) |
1102 | goto bad_fork_cleanup_security; | 1105 | goto bad_fork_cleanup_policy; |
1103 | /* copy all the process information */ | 1106 | /* copy all the process information */ |
1104 | if ((retval = copy_semundo(clone_flags, p))) | 1107 | if ((retval = copy_semundo(clone_flags, p))) |
1105 | goto bad_fork_cleanup_audit; | 1108 | goto bad_fork_cleanup_audit; |
@@ -1113,10 +1116,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1113 | goto bad_fork_cleanup_sighand; | 1116 | goto bad_fork_cleanup_sighand; |
1114 | if ((retval = copy_mm(clone_flags, p))) | 1117 | if ((retval = copy_mm(clone_flags, p))) |
1115 | goto bad_fork_cleanup_signal; | 1118 | goto bad_fork_cleanup_signal; |
1116 | if ((retval = copy_keys(clone_flags, p))) | ||
1117 | goto bad_fork_cleanup_mm; | ||
1118 | if ((retval = copy_namespaces(clone_flags, p))) | 1119 | if ((retval = copy_namespaces(clone_flags, p))) |
1119 | goto bad_fork_cleanup_keys; | 1120 | goto bad_fork_cleanup_mm; |
1120 | if ((retval = copy_io(clone_flags, p))) | 1121 | if ((retval = copy_io(clone_flags, p))) |
1121 | goto bad_fork_cleanup_namespaces; | 1122 | goto bad_fork_cleanup_namespaces; |
1122 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); | 1123 | retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); |
@@ -1125,17 +1126,19 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1125 | 1126 | ||
1126 | if (pid != &init_struct_pid) { | 1127 | if (pid != &init_struct_pid) { |
1127 | retval = -ENOMEM; | 1128 | retval = -ENOMEM; |
1128 | pid = alloc_pid(task_active_pid_ns(p)); | 1129 | pid = alloc_pid(p->nsproxy->pid_ns); |
1129 | if (!pid) | 1130 | if (!pid) |
1130 | goto bad_fork_cleanup_io; | 1131 | goto bad_fork_cleanup_io; |
1131 | 1132 | ||
1132 | if (clone_flags & CLONE_NEWPID) { | 1133 | if (clone_flags & CLONE_NEWPID) { |
1133 | retval = pid_ns_prepare_proc(task_active_pid_ns(p)); | 1134 | retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); |
1134 | if (retval < 0) | 1135 | if (retval < 0) |
1135 | goto bad_fork_free_pid; | 1136 | goto bad_fork_free_pid; |
1136 | } | 1137 | } |
1137 | } | 1138 | } |
1138 | 1139 | ||
1140 | ftrace_graph_init_task(p); | ||
1141 | |||
1139 | p->pid = pid_nr(pid); | 1142 | p->pid = pid_nr(pid); |
1140 | p->tgid = p->pid; | 1143 | p->tgid = p->pid; |
1141 | if (clone_flags & CLONE_THREAD) | 1144 | if (clone_flags & CLONE_THREAD) |
@@ -1144,7 +1147,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1144 | if (current->nsproxy != p->nsproxy) { | 1147 | if (current->nsproxy != p->nsproxy) { |
1145 | retval = ns_cgroup_clone(p, pid); | 1148 | retval = ns_cgroup_clone(p, pid); |
1146 | if (retval) | 1149 | if (retval) |
1147 | goto bad_fork_free_pid; | 1150 | goto bad_fork_free_graph; |
1148 | } | 1151 | } |
1149 | 1152 | ||
1150 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1153 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
@@ -1237,7 +1240,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1237 | spin_unlock(¤t->sighand->siglock); | 1240 | spin_unlock(¤t->sighand->siglock); |
1238 | write_unlock_irq(&tasklist_lock); | 1241 | write_unlock_irq(&tasklist_lock); |
1239 | retval = -ERESTARTNOINTR; | 1242 | retval = -ERESTARTNOINTR; |
1240 | goto bad_fork_free_pid; | 1243 | goto bad_fork_free_graph; |
1241 | } | 1244 | } |
1242 | 1245 | ||
1243 | if (clone_flags & CLONE_THREAD) { | 1246 | if (clone_flags & CLONE_THREAD) { |
@@ -1274,6 +1277,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1274 | cgroup_post_fork(p); | 1277 | cgroup_post_fork(p); |
1275 | return p; | 1278 | return p; |
1276 | 1279 | ||
1280 | bad_fork_free_graph: | ||
1281 | ftrace_graph_exit_task(p); | ||
1277 | bad_fork_free_pid: | 1282 | bad_fork_free_pid: |
1278 | if (pid != &init_struct_pid) | 1283 | if (pid != &init_struct_pid) |
1279 | free_pid(pid); | 1284 | free_pid(pid); |
@@ -1281,8 +1286,6 @@ bad_fork_cleanup_io: | |||
1281 | put_io_context(p->io_context); | 1286 | put_io_context(p->io_context); |
1282 | bad_fork_cleanup_namespaces: | 1287 | bad_fork_cleanup_namespaces: |
1283 | exit_task_namespaces(p); | 1288 | exit_task_namespaces(p); |
1284 | bad_fork_cleanup_keys: | ||
1285 | exit_keys(p); | ||
1286 | bad_fork_cleanup_mm: | 1289 | bad_fork_cleanup_mm: |
1287 | if (p->mm) | 1290 | if (p->mm) |
1288 | mmput(p->mm); | 1291 | mmput(p->mm); |
@@ -1298,8 +1301,6 @@ bad_fork_cleanup_semundo: | |||
1298 | exit_sem(p); | 1301 | exit_sem(p); |
1299 | bad_fork_cleanup_audit: | 1302 | bad_fork_cleanup_audit: |
1300 | audit_free(p); | 1303 | audit_free(p); |
1301 | bad_fork_cleanup_security: | ||
1302 | security_task_free(p); | ||
1303 | bad_fork_cleanup_policy: | 1304 | bad_fork_cleanup_policy: |
1304 | #ifdef CONFIG_NUMA | 1305 | #ifdef CONFIG_NUMA |
1305 | mpol_put(p->mempolicy); | 1306 | mpol_put(p->mempolicy); |
@@ -1312,9 +1313,9 @@ bad_fork_cleanup_cgroup: | |||
1312 | bad_fork_cleanup_put_domain: | 1313 | bad_fork_cleanup_put_domain: |
1313 | module_put(task_thread_info(p)->exec_domain->module); | 1314 | module_put(task_thread_info(p)->exec_domain->module); |
1314 | bad_fork_cleanup_count: | 1315 | bad_fork_cleanup_count: |
1315 | put_group_info(p->group_info); | 1316 | atomic_dec(&p->cred->user->processes); |
1316 | atomic_dec(&p->user->processes); | 1317 | put_cred(p->real_cred); |
1317 | free_uid(p->user); | 1318 | put_cred(p->cred); |
1318 | bad_fork_free: | 1319 | bad_fork_free: |
1319 | free_task(p); | 1320 | free_task(p); |
1320 | fork_out: | 1321 | fork_out: |
@@ -1358,6 +1359,21 @@ long do_fork(unsigned long clone_flags, | |||
1358 | long nr; | 1359 | long nr; |
1359 | 1360 | ||
1360 | /* | 1361 | /* |
1362 | * Do some preliminary argument and permissions checking before we | ||
1363 | * actually start allocating stuff | ||
1364 | */ | ||
1365 | if (clone_flags & CLONE_NEWUSER) { | ||
1366 | if (clone_flags & CLONE_THREAD) | ||
1367 | return -EINVAL; | ||
1368 | /* hopefully this check will go away when userns support is | ||
1369 | * complete | ||
1370 | */ | ||
1371 | if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || | ||
1372 | !capable(CAP_SETGID)) | ||
1373 | return -EPERM; | ||
1374 | } | ||
1375 | |||
1376 | /* | ||
1361 | * We hope to recycle these flags after 2.6.26 | 1377 | * We hope to recycle these flags after 2.6.26 |
1362 | */ | 1378 | */ |
1363 | if (unlikely(clone_flags & CLONE_STOPPED)) { | 1379 | if (unlikely(clone_flags & CLONE_STOPPED)) { |
@@ -1605,8 +1621,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1605 | err = -EINVAL; | 1621 | err = -EINVAL; |
1606 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1622 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
1607 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1623 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
1608 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| | 1624 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) |
1609 | CLONE_NEWNET)) | ||
1610 | goto bad_unshare_out; | 1625 | goto bad_unshare_out; |
1611 | 1626 | ||
1612 | /* | 1627 | /* |