diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 115 |
1 files changed, 99 insertions, 16 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index e7548dee636b..ca406d916713 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -59,7 +59,6 @@ | |||
59 | #include <linux/taskstats_kern.h> | 59 | #include <linux/taskstats_kern.h> |
60 | #include <linux/random.h> | 60 | #include <linux/random.h> |
61 | #include <linux/tty.h> | 61 | #include <linux/tty.h> |
62 | #include <linux/proc_fs.h> | ||
63 | #include <linux/blkdev.h> | 62 | #include <linux/blkdev.h> |
64 | #include <linux/fs_struct.h> | 63 | #include <linux/fs_struct.h> |
65 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
@@ -383,15 +382,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
383 | get_file(file); | 382 | get_file(file); |
384 | if (tmp->vm_flags & VM_DENYWRITE) | 383 | if (tmp->vm_flags & VM_DENYWRITE) |
385 | atomic_dec(&inode->i_writecount); | 384 | atomic_dec(&inode->i_writecount); |
386 | spin_lock(&mapping->i_mmap_lock); | 385 | mutex_lock(&mapping->i_mmap_mutex); |
387 | if (tmp->vm_flags & VM_SHARED) | 386 | if (tmp->vm_flags & VM_SHARED) |
388 | mapping->i_mmap_writable++; | 387 | mapping->i_mmap_writable++; |
389 | tmp->vm_truncate_count = mpnt->vm_truncate_count; | ||
390 | flush_dcache_mmap_lock(mapping); | 388 | flush_dcache_mmap_lock(mapping); |
391 | /* insert tmp into the share list, just after mpnt */ | 389 | /* insert tmp into the share list, just after mpnt */ |
392 | vma_prio_tree_add(tmp, mpnt); | 390 | vma_prio_tree_add(tmp, mpnt); |
393 | flush_dcache_mmap_unlock(mapping); | 391 | flush_dcache_mmap_unlock(mapping); |
394 | spin_unlock(&mapping->i_mmap_lock); | 392 | mutex_unlock(&mapping->i_mmap_mutex); |
395 | } | 393 | } |
396 | 394 | ||
397 | /* | 395 | /* |
@@ -486,6 +484,20 @@ static void mm_init_aio(struct mm_struct *mm) | |||
486 | #endif | 484 | #endif |
487 | } | 485 | } |
488 | 486 | ||
487 | int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm) | ||
488 | { | ||
489 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
490 | if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL)) | ||
491 | return -ENOMEM; | ||
492 | |||
493 | if (oldmm) | ||
494 | cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm)); | ||
495 | else | ||
496 | memset(mm_cpumask(mm), 0, cpumask_size()); | ||
497 | #endif | ||
498 | return 0; | ||
499 | } | ||
500 | |||
489 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 501 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
490 | { | 502 | { |
491 | atomic_set(&mm->mm_users, 1); | 503 | atomic_set(&mm->mm_users, 1); |
@@ -522,10 +534,20 @@ struct mm_struct * mm_alloc(void) | |||
522 | struct mm_struct * mm; | 534 | struct mm_struct * mm; |
523 | 535 | ||
524 | mm = allocate_mm(); | 536 | mm = allocate_mm(); |
525 | if (mm) { | 537 | if (!mm) |
526 | memset(mm, 0, sizeof(*mm)); | 538 | return NULL; |
527 | mm = mm_init(mm, current); | 539 | |
540 | memset(mm, 0, sizeof(*mm)); | ||
541 | mm = mm_init(mm, current); | ||
542 | if (!mm) | ||
543 | return NULL; | ||
544 | |||
545 | if (mm_init_cpumask(mm, NULL)) { | ||
546 | mm_free_pgd(mm); | ||
547 | free_mm(mm); | ||
548 | return NULL; | ||
528 | } | 549 | } |
550 | |||
529 | return mm; | 551 | return mm; |
530 | } | 552 | } |
531 | 553 | ||
@@ -537,6 +559,7 @@ struct mm_struct * mm_alloc(void) | |||
537 | void __mmdrop(struct mm_struct *mm) | 559 | void __mmdrop(struct mm_struct *mm) |
538 | { | 560 | { |
539 | BUG_ON(mm == &init_mm); | 561 | BUG_ON(mm == &init_mm); |
562 | free_cpumask_var(mm->cpu_vm_mask_var); | ||
540 | mm_free_pgd(mm); | 563 | mm_free_pgd(mm); |
541 | destroy_context(mm); | 564 | destroy_context(mm); |
542 | mmu_notifier_mm_destroy(mm); | 565 | mmu_notifier_mm_destroy(mm); |
@@ -573,6 +596,57 @@ void mmput(struct mm_struct *mm) | |||
573 | } | 596 | } |
574 | EXPORT_SYMBOL_GPL(mmput); | 597 | EXPORT_SYMBOL_GPL(mmput); |
575 | 598 | ||
599 | /* | ||
600 | * We added or removed a vma mapping the executable. The vmas are only mapped | ||
601 | * during exec and are not mapped with the mmap system call. | ||
602 | * Callers must hold down_write() on the mm's mmap_sem for these | ||
603 | */ | ||
604 | void added_exe_file_vma(struct mm_struct *mm) | ||
605 | { | ||
606 | mm->num_exe_file_vmas++; | ||
607 | } | ||
608 | |||
609 | void removed_exe_file_vma(struct mm_struct *mm) | ||
610 | { | ||
611 | mm->num_exe_file_vmas--; | ||
612 | if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ | ||
613 | fput(mm->exe_file); | ||
614 | mm->exe_file = NULL; | ||
615 | } | ||
616 | |||
617 | } | ||
618 | |||
619 | void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) | ||
620 | { | ||
621 | if (new_exe_file) | ||
622 | get_file(new_exe_file); | ||
623 | if (mm->exe_file) | ||
624 | fput(mm->exe_file); | ||
625 | mm->exe_file = new_exe_file; | ||
626 | mm->num_exe_file_vmas = 0; | ||
627 | } | ||
628 | |||
629 | struct file *get_mm_exe_file(struct mm_struct *mm) | ||
630 | { | ||
631 | struct file *exe_file; | ||
632 | |||
633 | /* We need mmap_sem to protect against races with removal of | ||
634 | * VM_EXECUTABLE vmas */ | ||
635 | down_read(&mm->mmap_sem); | ||
636 | exe_file = mm->exe_file; | ||
637 | if (exe_file) | ||
638 | get_file(exe_file); | ||
639 | up_read(&mm->mmap_sem); | ||
640 | return exe_file; | ||
641 | } | ||
642 | |||
643 | static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) | ||
644 | { | ||
645 | /* It's safe to write the exe_file pointer without exe_file_lock because | ||
646 | * this is called during fork when the task is not yet in /proc */ | ||
647 | newmm->exe_file = get_mm_exe_file(oldmm); | ||
648 | } | ||
649 | |||
576 | /** | 650 | /** |
577 | * get_task_mm - acquire a reference to the task's mm | 651 | * get_task_mm - acquire a reference to the task's mm |
578 | * | 652 | * |
@@ -691,6 +765,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
691 | if (!mm_init(mm, tsk)) | 765 | if (!mm_init(mm, tsk)) |
692 | goto fail_nomem; | 766 | goto fail_nomem; |
693 | 767 | ||
768 | if (mm_init_cpumask(mm, oldmm)) | ||
769 | goto fail_nocpumask; | ||
770 | |||
694 | if (init_new_context(tsk, mm)) | 771 | if (init_new_context(tsk, mm)) |
695 | goto fail_nocontext; | 772 | goto fail_nocontext; |
696 | 773 | ||
@@ -717,6 +794,9 @@ fail_nomem: | |||
717 | return NULL; | 794 | return NULL; |
718 | 795 | ||
719 | fail_nocontext: | 796 | fail_nocontext: |
797 | free_cpumask_var(mm->cpu_vm_mask_var); | ||
798 | |||
799 | fail_nocpumask: | ||
720 | /* | 800 | /* |
721 | * If init_new_context() failed, we cannot use mmput() to free the mm | 801 | * If init_new_context() failed, we cannot use mmput() to free the mm |
722 | * because it calls destroy_context() | 802 | * because it calls destroy_context() |
@@ -927,6 +1007,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
927 | tty_audit_fork(sig); | 1007 | tty_audit_fork(sig); |
928 | sched_autogroup_fork(sig); | 1008 | sched_autogroup_fork(sig); |
929 | 1009 | ||
1010 | #ifdef CONFIG_CGROUPS | ||
1011 | init_rwsem(&sig->threadgroup_fork_lock); | ||
1012 | #endif | ||
1013 | |||
930 | sig->oom_adj = current->signal->oom_adj; | 1014 | sig->oom_adj = current->signal->oom_adj; |
931 | sig->oom_score_adj = current->signal->oom_score_adj; | 1015 | sig->oom_score_adj = current->signal->oom_score_adj; |
932 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | 1016 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
@@ -1103,12 +1187,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1103 | 1187 | ||
1104 | posix_cpu_timers_init(p); | 1188 | posix_cpu_timers_init(p); |
1105 | 1189 | ||
1106 | p->lock_depth = -1; /* -1 = no lock */ | ||
1107 | do_posix_clock_monotonic_gettime(&p->start_time); | 1190 | do_posix_clock_monotonic_gettime(&p->start_time); |
1108 | p->real_start_time = p->start_time; | 1191 | p->real_start_time = p->start_time; |
1109 | monotonic_to_bootbased(&p->real_start_time); | 1192 | monotonic_to_bootbased(&p->real_start_time); |
1110 | p->io_context = NULL; | 1193 | p->io_context = NULL; |
1111 | p->audit_context = NULL; | 1194 | p->audit_context = NULL; |
1195 | if (clone_flags & CLONE_THREAD) | ||
1196 | threadgroup_fork_read_lock(current); | ||
1112 | cgroup_fork(p); | 1197 | cgroup_fork(p); |
1113 | #ifdef CONFIG_NUMA | 1198 | #ifdef CONFIG_NUMA |
1114 | p->mempolicy = mpol_dup(p->mempolicy); | 1199 | p->mempolicy = mpol_dup(p->mempolicy); |
@@ -1153,7 +1238,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1153 | #endif | 1238 | #endif |
1154 | 1239 | ||
1155 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1240 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1156 | sched_fork(p, clone_flags); | 1241 | sched_fork(p); |
1157 | 1242 | ||
1158 | retval = perf_event_init_task(p); | 1243 | retval = perf_event_init_task(p); |
1159 | if (retval) | 1244 | if (retval) |
@@ -1194,12 +1279,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1194 | if (clone_flags & CLONE_THREAD) | 1279 | if (clone_flags & CLONE_THREAD) |
1195 | p->tgid = current->tgid; | 1280 | p->tgid = current->tgid; |
1196 | 1281 | ||
1197 | if (current->nsproxy != p->nsproxy) { | ||
1198 | retval = ns_cgroup_clone(p, pid); | ||
1199 | if (retval) | ||
1200 | goto bad_fork_free_pid; | ||
1201 | } | ||
1202 | |||
1203 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; | 1282 | p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; |
1204 | /* | 1283 | /* |
1205 | * Clear TID on mm_release()? | 1284 | * Clear TID on mm_release()? |
@@ -1313,6 +1392,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1313 | write_unlock_irq(&tasklist_lock); | 1392 | write_unlock_irq(&tasklist_lock); |
1314 | proc_fork_connector(p); | 1393 | proc_fork_connector(p); |
1315 | cgroup_post_fork(p); | 1394 | cgroup_post_fork(p); |
1395 | if (clone_flags & CLONE_THREAD) | ||
1396 | threadgroup_fork_read_unlock(current); | ||
1316 | perf_event_fork(p); | 1397 | perf_event_fork(p); |
1317 | return p; | 1398 | return p; |
1318 | 1399 | ||
@@ -1351,6 +1432,8 @@ bad_fork_cleanup_policy: | |||
1351 | mpol_put(p->mempolicy); | 1432 | mpol_put(p->mempolicy); |
1352 | bad_fork_cleanup_cgroup: | 1433 | bad_fork_cleanup_cgroup: |
1353 | #endif | 1434 | #endif |
1435 | if (clone_flags & CLONE_THREAD) | ||
1436 | threadgroup_fork_read_unlock(current); | ||
1354 | cgroup_exit(p, cgroup_callbacks_done); | 1437 | cgroup_exit(p, cgroup_callbacks_done); |
1355 | delayacct_tsk_free(p); | 1438 | delayacct_tsk_free(p); |
1356 | module_put(task_thread_info(p)->exec_domain->module); | 1439 | module_put(task_thread_info(p)->exec_domain->module); |
@@ -1464,7 +1547,7 @@ long do_fork(unsigned long clone_flags, | |||
1464 | */ | 1547 | */ |
1465 | p->flags &= ~PF_STARTING; | 1548 | p->flags &= ~PF_STARTING; |
1466 | 1549 | ||
1467 | wake_up_new_task(p, clone_flags); | 1550 | wake_up_new_task(p); |
1468 | 1551 | ||
1469 | tracehook_report_clone_complete(trace, regs, | 1552 | tracehook_report_clone_complete(trace, regs, |
1470 | clone_flags, nr, p); | 1553 | clone_flags, nr, p); |