aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-15 07:46:29 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-15 07:46:29 -0400
commitb2aaf8f74cdc84a9182f6cabf198b7763bcb9d40 (patch)
tree53ccb1c2c14751fe69cf93102e76e97021f6df07 /kernel/fork.c
parent4f962d4d65923d7b722192e729840cfb79af0a5a (diff)
parent278429cff8809958d25415ba0ed32b59866ab1a8 (diff)
Merge branch 'linus' into stackprotector
Conflicts: arch/x86/kernel/Makefile include/asm-x86/pda.h
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c143
1 files changed, 77 insertions, 66 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index d428336e7aa1..99c5c655b098 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -23,18 +23,22 @@
23#include <linux/sem.h> 23#include <linux/sem.h>
24#include <linux/file.h> 24#include <linux/file.h>
25#include <linux/fdtable.h> 25#include <linux/fdtable.h>
26#include <linux/iocontext.h>
26#include <linux/key.h> 27#include <linux/key.h>
27#include <linux/binfmts.h> 28#include <linux/binfmts.h>
28#include <linux/mman.h> 29#include <linux/mman.h>
30#include <linux/mmu_notifier.h>
29#include <linux/fs.h> 31#include <linux/fs.h>
30#include <linux/nsproxy.h> 32#include <linux/nsproxy.h>
31#include <linux/capability.h> 33#include <linux/capability.h>
32#include <linux/cpu.h> 34#include <linux/cpu.h>
33#include <linux/cgroup.h> 35#include <linux/cgroup.h>
34#include <linux/security.h> 36#include <linux/security.h>
37#include <linux/hugetlb.h>
35#include <linux/swap.h> 38#include <linux/swap.h>
36#include <linux/syscalls.h> 39#include <linux/syscalls.h>
37#include <linux/jiffies.h> 40#include <linux/jiffies.h>
41#include <linux/tracehook.h>
38#include <linux/futex.h> 42#include <linux/futex.h>
39#include <linux/task_io_accounting_ops.h> 43#include <linux/task_io_accounting_ops.h>
40#include <linux/rcupdate.h> 44#include <linux/rcupdate.h>
@@ -92,6 +96,23 @@ int nr_processes(void)
92static struct kmem_cache *task_struct_cachep; 96static struct kmem_cache *task_struct_cachep;
93#endif 97#endif
94 98
99#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
100static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
101{
102#ifdef CONFIG_DEBUG_STACK_USAGE
103 gfp_t mask = GFP_KERNEL | __GFP_ZERO;
104#else
105 gfp_t mask = GFP_KERNEL;
106#endif
107 return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
108}
109
110static inline void free_thread_info(struct thread_info *ti)
111{
112 free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
113}
114#endif
115
95/* SLAB cache for signal_struct structures (tsk->signal) */ 116/* SLAB cache for signal_struct structures (tsk->signal) */
96static struct kmem_cache *signal_cachep; 117static struct kmem_cache *signal_cachep;
97 118
@@ -311,6 +332,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
311 } 332 }
312 333
313 /* 334 /*
335 * Clear hugetlb-related page reserves for children. This only
336 * affects MAP_PRIVATE mappings. Faults generated by the child
337 * are not guaranteed to succeed, even if read-only
338 */
339 if (is_vm_hugetlb_page(tmp))
340 reset_vma_resv_huge_pages(tmp);
341
342 /*
314 * Link in the new vma and copy the page table entries. 343 * Link in the new vma and copy the page table entries.
315 */ 344 */
316 *pprev = tmp; 345 *pprev = tmp;
@@ -378,7 +407,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
378 INIT_LIST_HEAD(&mm->mmlist); 407 INIT_LIST_HEAD(&mm->mmlist);
379 mm->flags = (current->mm) ? current->mm->flags 408 mm->flags = (current->mm) ? current->mm->flags
380 : MMF_DUMP_FILTER_DEFAULT; 409 : MMF_DUMP_FILTER_DEFAULT;
381 mm->core_waiters = 0; 410 mm->core_state = NULL;
382 mm->nr_ptes = 0; 411 mm->nr_ptes = 0;
383 set_mm_counter(mm, file_rss, 0); 412 set_mm_counter(mm, file_rss, 0);
384 set_mm_counter(mm, anon_rss, 0); 413 set_mm_counter(mm, anon_rss, 0);
@@ -391,6 +420,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
391 420
392 if (likely(!mm_alloc_pgd(mm))) { 421 if (likely(!mm_alloc_pgd(mm))) {
393 mm->def_flags = 0; 422 mm->def_flags = 0;
423 mmu_notifier_mm_init(mm);
394 return mm; 424 return mm;
395 } 425 }
396 426
@@ -423,6 +453,7 @@ void __mmdrop(struct mm_struct *mm)
423 BUG_ON(mm == &init_mm); 453 BUG_ON(mm == &init_mm);
424 mm_free_pgd(mm); 454 mm_free_pgd(mm);
425 destroy_context(mm); 455 destroy_context(mm);
456 mmu_notifier_mm_destroy(mm);
426 free_mm(mm); 457 free_mm(mm);
427} 458}
428EXPORT_SYMBOL_GPL(__mmdrop); 459EXPORT_SYMBOL_GPL(__mmdrop);
@@ -452,7 +483,7 @@ EXPORT_SYMBOL_GPL(mmput);
452/** 483/**
453 * get_task_mm - acquire a reference to the task's mm 484 * get_task_mm - acquire a reference to the task's mm
454 * 485 *
455 * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning 486 * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
456 * this kernel workthread has transiently adopted a user mm with use_mm, 487 * this kernel workthread has transiently adopted a user mm with use_mm,
457 * to do its AIO) is not set and if so returns a reference to it, after 488 * to do its AIO) is not set and if so returns a reference to it, after
458 * bumping up the use count. User must release the mm via mmput() 489 * bumping up the use count. User must release the mm via mmput()
@@ -465,7 +496,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
465 task_lock(task); 496 task_lock(task);
466 mm = task->mm; 497 mm = task->mm;
467 if (mm) { 498 if (mm) {
468 if (task->flags & PF_BORROWED_MM) 499 if (task->flags & PF_KTHREAD)
469 mm = NULL; 500 mm = NULL;
470 else 501 else
471 atomic_inc(&mm->mm_users); 502 atomic_inc(&mm->mm_users);
@@ -634,13 +665,6 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
634 path_get(&old->root); 665 path_get(&old->root);
635 fs->pwd = old->pwd; 666 fs->pwd = old->pwd;
636 path_get(&old->pwd); 667 path_get(&old->pwd);
637 if (old->altroot.dentry) {
638 fs->altroot = old->altroot;
639 path_get(&old->altroot);
640 } else {
641 fs->altroot.mnt = NULL;
642 fs->altroot.dentry = NULL;
643 }
644 read_unlock(&old->lock); 668 read_unlock(&old->lock);
645 } 669 }
646 return fs; 670 return fs;
@@ -783,6 +807,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
783 807
784 sig->leader = 0; /* session leadership doesn't inherit */ 808 sig->leader = 0; /* session leadership doesn't inherit */
785 sig->tty_old_pgrp = NULL; 809 sig->tty_old_pgrp = NULL;
810 sig->tty = NULL;
786 811
787 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; 812 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
788 sig->gtime = cputime_zero; 813 sig->gtime = cputime_zero;
@@ -790,6 +815,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
790 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 815 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
791 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 816 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
792 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 817 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
818 task_io_accounting_init(&sig->ioac);
793 sig->sum_sched_runtime = 0; 819 sig->sum_sched_runtime = 0;
794 INIT_LIST_HEAD(&sig->cpu_timers[0]); 820 INIT_LIST_HEAD(&sig->cpu_timers[0]);
795 INIT_LIST_HEAD(&sig->cpu_timers[1]); 821 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -818,6 +844,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
818void __cleanup_signal(struct signal_struct *sig) 844void __cleanup_signal(struct signal_struct *sig)
819{ 845{
820 exit_thread_group_keys(sig); 846 exit_thread_group_keys(sig);
847 tty_kref_put(sig->tty);
821 kmem_cache_free(signal_cachep, sig); 848 kmem_cache_free(signal_cachep, sig);
822} 849}
823 850
@@ -837,8 +864,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
837 864
838 new_flags &= ~PF_SUPERPRIV; 865 new_flags &= ~PF_SUPERPRIV;
839 new_flags |= PF_FORKNOEXEC; 866 new_flags |= PF_FORKNOEXEC;
840 if (!(clone_flags & CLONE_PTRACE)) 867 new_flags |= PF_STARTING;
841 p->ptrace = 0;
842 p->flags = new_flags; 868 p->flags = new_flags;
843 clear_freeze_flag(p); 869 clear_freeze_flag(p);
844} 870}
@@ -879,7 +905,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
879 struct pt_regs *regs, 905 struct pt_regs *regs,
880 unsigned long stack_size, 906 unsigned long stack_size,
881 int __user *child_tidptr, 907 int __user *child_tidptr,
882 struct pid *pid) 908 struct pid *pid,
909 int trace)
883{ 910{
884 int retval; 911 int retval;
885 struct task_struct *p; 912 struct task_struct *p;
@@ -914,7 +941,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
914 941
915 rt_mutex_init_task(p); 942 rt_mutex_init_task(p);
916 943
917#ifdef CONFIG_TRACE_IRQFLAGS 944#ifdef CONFIG_PROVE_LOCKING
918 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); 945 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
919 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); 946 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
920#endif 947#endif
@@ -972,13 +999,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
972 p->last_switch_timestamp = 0; 999 p->last_switch_timestamp = 0;
973#endif 1000#endif
974 1001
975#ifdef CONFIG_TASK_XACCT 1002 task_io_accounting_init(&p->ioac);
976 p->rchar = 0; /* I/O counter: bytes read */
977 p->wchar = 0; /* I/O counter: bytes written */
978 p->syscr = 0; /* I/O counter: read syscalls */
979 p->syscw = 0; /* I/O counter: write syscalls */
980#endif
981 task_io_accounting_init(p);
982 acct_clear_integrals(p); 1003 acct_clear_integrals(p);
983 1004
984 p->it_virt_expires = cputime_zero; 1005 p->it_virt_expires = cputime_zero;
@@ -1085,6 +1106,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1085 if (clone_flags & CLONE_THREAD) 1106 if (clone_flags & CLONE_THREAD)
1086 p->tgid = current->tgid; 1107 p->tgid = current->tgid;
1087 1108
1109 if (current->nsproxy != p->nsproxy) {
1110 retval = ns_cgroup_clone(p, pid);
1111 if (retval)
1112 goto bad_fork_free_pid;
1113 }
1114
1088 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 1115 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1089 /* 1116 /*
1090 * Clear TID on mm_release()? 1117 * Clear TID on mm_release()?
@@ -1129,8 +1156,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1129 */ 1156 */
1130 p->group_leader = p; 1157 p->group_leader = p;
1131 INIT_LIST_HEAD(&p->thread_group); 1158 INIT_LIST_HEAD(&p->thread_group);
1132 INIT_LIST_HEAD(&p->ptrace_children);
1133 INIT_LIST_HEAD(&p->ptrace_list);
1134 1159
1135 /* Now that the task is set up, run cgroup callbacks if 1160 /* Now that the task is set up, run cgroup callbacks if
1136 * necessary. We need to run them before the task is visible 1161 * necessary. We need to run them before the task is visible
@@ -1161,7 +1186,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1161 p->real_parent = current->real_parent; 1186 p->real_parent = current->real_parent;
1162 else 1187 else
1163 p->real_parent = current; 1188 p->real_parent = current;
1164 p->parent = p->real_parent;
1165 1189
1166 spin_lock(&current->sighand->siglock); 1190 spin_lock(&current->sighand->siglock);
1167 1191
@@ -1202,16 +1226,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1202 } 1226 }
1203 1227
1204 if (likely(p->pid)) { 1228 if (likely(p->pid)) {
1205 add_parent(p); 1229 list_add_tail(&p->sibling, &p->real_parent->children);
1206 if (unlikely(p->ptrace & PT_PTRACED)) 1230 tracehook_finish_clone(p, clone_flags, trace);
1207 __ptrace_link(p, current->parent);
1208 1231
1209 if (thread_group_leader(p)) { 1232 if (thread_group_leader(p)) {
1210 if (clone_flags & CLONE_NEWPID) 1233 if (clone_flags & CLONE_NEWPID)
1211 p->nsproxy->pid_ns->child_reaper = p; 1234 p->nsproxy->pid_ns->child_reaper = p;
1212 1235
1213 p->signal->leader_pid = pid; 1236 p->signal->leader_pid = pid;
1214 p->signal->tty = current->signal->tty; 1237 tty_kref_put(p->signal->tty);
1238 p->signal->tty = tty_kref_get(current->signal->tty);
1215 set_task_pgrp(p, task_pgrp_nr(current)); 1239 set_task_pgrp(p, task_pgrp_nr(current));
1216 set_task_session(p, task_session_nr(current)); 1240 set_task_session(p, task_session_nr(current));
1217 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1241 attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
@@ -1289,29 +1313,13 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1289 struct pt_regs regs; 1313 struct pt_regs regs;
1290 1314
1291 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, 1315 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1292 &init_struct_pid); 1316 &init_struct_pid, 0);
1293 if (!IS_ERR(task)) 1317 if (!IS_ERR(task))
1294 init_idle(task, cpu); 1318 init_idle(task, cpu);
1295 1319
1296 return task; 1320 return task;
1297} 1321}
1298 1322
1299static int fork_traceflag(unsigned clone_flags)
1300{
1301 if (clone_flags & CLONE_UNTRACED)
1302 return 0;
1303 else if (clone_flags & CLONE_VFORK) {
1304 if (current->ptrace & PT_TRACE_VFORK)
1305 return PTRACE_EVENT_VFORK;
1306 } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
1307 if (current->ptrace & PT_TRACE_CLONE)
1308 return PTRACE_EVENT_CLONE;
1309 } else if (current->ptrace & PT_TRACE_FORK)
1310 return PTRACE_EVENT_FORK;
1311
1312 return 0;
1313}
1314
1315/* 1323/*
1316 * Ok, this is the main fork-routine. 1324 * Ok, this is the main fork-routine.
1317 * 1325 *
@@ -1346,14 +1354,14 @@ long do_fork(unsigned long clone_flags,
1346 } 1354 }
1347 } 1355 }
1348 1356
1349 if (unlikely(current->ptrace)) { 1357 /*
1350 trace = fork_traceflag (clone_flags); 1358 * When called from kernel_thread, don't do user tracing stuff.
1351 if (trace) 1359 */
1352 clone_flags |= CLONE_PTRACE; 1360 if (likely(user_mode(regs)))
1353 } 1361 trace = tracehook_prepare_clone(clone_flags);
1354 1362
1355 p = copy_process(clone_flags, stack_start, regs, stack_size, 1363 p = copy_process(clone_flags, stack_start, regs, stack_size,
1356 child_tidptr, NULL); 1364 child_tidptr, NULL, trace);
1357 /* 1365 /*
1358 * Do this prior waking up the new thread - the thread pointer 1366 * Do this prior waking up the new thread - the thread pointer
1359 * might get invalid after that point, if the thread exits quickly. 1367 * might get invalid after that point, if the thread exits quickly.
@@ -1371,32 +1379,35 @@ long do_fork(unsigned long clone_flags,
1371 init_completion(&vfork); 1379 init_completion(&vfork);
1372 } 1380 }
1373 1381
1374 if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { 1382 tracehook_report_clone(trace, regs, clone_flags, nr, p);
1383
1384 /*
1385 * We set PF_STARTING at creation in case tracing wants to
1386 * use this to distinguish a fully live task from one that
1387 * hasn't gotten to tracehook_report_clone() yet. Now we
1388 * clear it and set the child going.
1389 */
1390 p->flags &= ~PF_STARTING;
1391
1392 if (unlikely(clone_flags & CLONE_STOPPED)) {
1375 /* 1393 /*
1376 * We'll start up with an immediate SIGSTOP. 1394 * We'll start up with an immediate SIGSTOP.
1377 */ 1395 */
1378 sigaddset(&p->pending.signal, SIGSTOP); 1396 sigaddset(&p->pending.signal, SIGSTOP);
1379 set_tsk_thread_flag(p, TIF_SIGPENDING); 1397 set_tsk_thread_flag(p, TIF_SIGPENDING);
1380 }
1381
1382 if (!(clone_flags & CLONE_STOPPED))
1383 wake_up_new_task(p, clone_flags);
1384 else
1385 __set_task_state(p, TASK_STOPPED); 1398 __set_task_state(p, TASK_STOPPED);
1386 1399 } else {
1387 if (unlikely (trace)) { 1400 wake_up_new_task(p, clone_flags);
1388 current->ptrace_message = nr;
1389 ptrace_notify ((trace << 8) | SIGTRAP);
1390 } 1401 }
1391 1402
1403 tracehook_report_clone_complete(trace, regs,
1404 clone_flags, nr, p);
1405
1392 if (clone_flags & CLONE_VFORK) { 1406 if (clone_flags & CLONE_VFORK) {
1393 freezer_do_not_count(); 1407 freezer_do_not_count();
1394 wait_for_completion(&vfork); 1408 wait_for_completion(&vfork);
1395 freezer_count(); 1409 freezer_count();
1396 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { 1410 tracehook_report_vfork_done(p, nr);
1397 current->ptrace_message = nr;
1398 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1399 }
1400 } 1411 }
1401 } else { 1412 } else {
1402 nr = PTR_ERR(p); 1413 nr = PTR_ERR(p);
@@ -1408,7 +1419,7 @@ long do_fork(unsigned long clone_flags,
1408#define ARCH_MIN_MMSTRUCT_ALIGN 0 1419#define ARCH_MIN_MMSTRUCT_ALIGN 0
1409#endif 1420#endif
1410 1421
1411static void sighand_ctor(struct kmem_cache *cachep, void *data) 1422static void sighand_ctor(void *data)
1412{ 1423{
1413 struct sighand_struct *sighand = data; 1424 struct sighand_struct *sighand = data;
1414 1425