diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 134 |
1 files changed, 90 insertions, 44 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 6a13c46cd87d..1380d8ace334 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
315 | goto free_ti; | 315 | goto free_ti; |
316 | 316 | ||
317 | tsk->stack = ti; | 317 | tsk->stack = ti; |
318 | #ifdef CONFIG_SECCOMP | ||
319 | /* | ||
320 | * We must handle setting up seccomp filters once we're under | ||
321 | * the sighand lock in case orig has changed between now and | ||
322 | * then. Until then, filter must be NULL to avoid messing up | ||
323 | * the usage counts on the error path calling free_task. | ||
324 | */ | ||
325 | tsk->seccomp.filter = NULL; | ||
326 | #endif | ||
318 | 327 | ||
319 | setup_thread_stack(tsk, orig); | 328 | setup_thread_stack(tsk, orig); |
320 | clear_user_return_notifier(tsk); | 329 | clear_user_return_notifier(tsk); |
@@ -365,12 +374,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
365 | */ | 374 | */ |
366 | down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); | 375 | down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); |
367 | 376 | ||
368 | mm->locked_vm = 0; | 377 | mm->total_vm = oldmm->total_vm; |
369 | mm->mmap = NULL; | 378 | mm->shared_vm = oldmm->shared_vm; |
370 | mm->vmacache_seqnum = 0; | 379 | mm->exec_vm = oldmm->exec_vm; |
371 | mm->map_count = 0; | 380 | mm->stack_vm = oldmm->stack_vm; |
372 | cpumask_clear(mm_cpumask(mm)); | 381 | |
373 | mm->mm_rb = RB_ROOT; | ||
374 | rb_link = &mm->mm_rb.rb_node; | 382 | rb_link = &mm->mm_rb.rb_node; |
375 | rb_parent = NULL; | 383 | rb_parent = NULL; |
376 | pprev = &mm->mmap; | 384 | pprev = &mm->mmap; |
@@ -421,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
421 | atomic_dec(&inode->i_writecount); | 429 | atomic_dec(&inode->i_writecount); |
422 | mutex_lock(&mapping->i_mmap_mutex); | 430 | mutex_lock(&mapping->i_mmap_mutex); |
423 | if (tmp->vm_flags & VM_SHARED) | 431 | if (tmp->vm_flags & VM_SHARED) |
424 | mapping->i_mmap_writable++; | 432 | atomic_inc(&mapping->i_mmap_writable); |
425 | flush_dcache_mmap_lock(mapping); | 433 | flush_dcache_mmap_lock(mapping); |
426 | /* insert tmp into the share list, just after mpnt */ | 434 | /* insert tmp into the share list, just after mpnt */ |
427 | if (unlikely(tmp->vm_flags & VM_NONLINEAR)) | 435 | if (unlikely(tmp->vm_flags & VM_NONLINEAR)) |
@@ -527,19 +535,37 @@ static void mm_init_aio(struct mm_struct *mm) | |||
527 | #endif | 535 | #endif |
528 | } | 536 | } |
529 | 537 | ||
538 | static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) | ||
539 | { | ||
540 | #ifdef CONFIG_MEMCG | ||
541 | mm->owner = p; | ||
542 | #endif | ||
543 | } | ||
544 | |||
530 | static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) | 545 | static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) |
531 | { | 546 | { |
547 | mm->mmap = NULL; | ||
548 | mm->mm_rb = RB_ROOT; | ||
549 | mm->vmacache_seqnum = 0; | ||
532 | atomic_set(&mm->mm_users, 1); | 550 | atomic_set(&mm->mm_users, 1); |
533 | atomic_set(&mm->mm_count, 1); | 551 | atomic_set(&mm->mm_count, 1); |
534 | init_rwsem(&mm->mmap_sem); | 552 | init_rwsem(&mm->mmap_sem); |
535 | INIT_LIST_HEAD(&mm->mmlist); | 553 | INIT_LIST_HEAD(&mm->mmlist); |
536 | mm->core_state = NULL; | 554 | mm->core_state = NULL; |
537 | atomic_long_set(&mm->nr_ptes, 0); | 555 | atomic_long_set(&mm->nr_ptes, 0); |
556 | mm->map_count = 0; | ||
557 | mm->locked_vm = 0; | ||
558 | mm->pinned_vm = 0; | ||
538 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); | 559 | memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); |
539 | spin_lock_init(&mm->page_table_lock); | 560 | spin_lock_init(&mm->page_table_lock); |
561 | mm_init_cpumask(mm); | ||
540 | mm_init_aio(mm); | 562 | mm_init_aio(mm); |
541 | mm_init_owner(mm, p); | 563 | mm_init_owner(mm, p); |
564 | mmu_notifier_mm_init(mm); | ||
542 | clear_tlb_flush_pending(mm); | 565 | clear_tlb_flush_pending(mm); |
566 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS | ||
567 | mm->pmd_huge_pte = NULL; | ||
568 | #endif | ||
543 | 569 | ||
544 | if (current->mm) { | 570 | if (current->mm) { |
545 | mm->flags = current->mm->flags & MMF_INIT_MASK; | 571 | mm->flags = current->mm->flags & MMF_INIT_MASK; |
@@ -549,11 +575,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) | |||
549 | mm->def_flags = 0; | 575 | mm->def_flags = 0; |
550 | } | 576 | } |
551 | 577 | ||
552 | if (likely(!mm_alloc_pgd(mm))) { | 578 | if (mm_alloc_pgd(mm)) |
553 | mmu_notifier_mm_init(mm); | 579 | goto fail_nopgd; |
554 | return mm; | 580 | |
555 | } | 581 | if (init_new_context(p, mm)) |
582 | goto fail_nocontext; | ||
556 | 583 | ||
584 | return mm; | ||
585 | |||
586 | fail_nocontext: | ||
587 | mm_free_pgd(mm); | ||
588 | fail_nopgd: | ||
557 | free_mm(mm); | 589 | free_mm(mm); |
558 | return NULL; | 590 | return NULL; |
559 | } | 591 | } |
@@ -587,7 +619,6 @@ struct mm_struct *mm_alloc(void) | |||
587 | return NULL; | 619 | return NULL; |
588 | 620 | ||
589 | memset(mm, 0, sizeof(*mm)); | 621 | memset(mm, 0, sizeof(*mm)); |
590 | mm_init_cpumask(mm); | ||
591 | return mm_init(mm, current); | 622 | return mm_init(mm, current); |
592 | } | 623 | } |
593 | 624 | ||
@@ -819,17 +850,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) | |||
819 | goto fail_nomem; | 850 | goto fail_nomem; |
820 | 851 | ||
821 | memcpy(mm, oldmm, sizeof(*mm)); | 852 | memcpy(mm, oldmm, sizeof(*mm)); |
822 | mm_init_cpumask(mm); | ||
823 | 853 | ||
824 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS | ||
825 | mm->pmd_huge_pte = NULL; | ||
826 | #endif | ||
827 | if (!mm_init(mm, tsk)) | 854 | if (!mm_init(mm, tsk)) |
828 | goto fail_nomem; | 855 | goto fail_nomem; |
829 | 856 | ||
830 | if (init_new_context(tsk, mm)) | ||
831 | goto fail_nocontext; | ||
832 | |||
833 | dup_mm_exe_file(oldmm, mm); | 857 | dup_mm_exe_file(oldmm, mm); |
834 | 858 | ||
835 | err = dup_mmap(mm, oldmm); | 859 | err = dup_mmap(mm, oldmm); |
@@ -851,15 +875,6 @@ free_pt: | |||
851 | 875 | ||
852 | fail_nomem: | 876 | fail_nomem: |
853 | return NULL; | 877 | return NULL; |
854 | |||
855 | fail_nocontext: | ||
856 | /* | ||
857 | * If init_new_context() failed, we cannot use mmput() to free the mm | ||
858 | * because it calls destroy_context() | ||
859 | */ | ||
860 | mm_free_pgd(mm); | ||
861 | free_mm(mm); | ||
862 | return NULL; | ||
863 | } | 878 | } |
864 | 879 | ||
865 | static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) | 880 | static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) |
@@ -1081,6 +1096,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
1081 | return 0; | 1096 | return 0; |
1082 | } | 1097 | } |
1083 | 1098 | ||
1099 | static void copy_seccomp(struct task_struct *p) | ||
1100 | { | ||
1101 | #ifdef CONFIG_SECCOMP | ||
1102 | /* | ||
1103 | * Must be called with sighand->lock held, which is common to | ||
1104 | * all threads in the group. Holding cred_guard_mutex is not | ||
1105 | * needed because this new task is not yet running and cannot | ||
1106 | * be racing exec. | ||
1107 | */ | ||
1108 | BUG_ON(!spin_is_locked(¤t->sighand->siglock)); | ||
1109 | |||
1110 | /* Ref-count the new filter user, and assign it. */ | ||
1111 | get_seccomp_filter(current); | ||
1112 | p->seccomp = current->seccomp; | ||
1113 | |||
1114 | /* | ||
1115 | * Explicitly enable no_new_privs here in case it got set | ||
1116 | * between the task_struct being duplicated and holding the | ||
1117 | * sighand lock. The seccomp state and nnp must be in sync. | ||
1118 | */ | ||
1119 | if (task_no_new_privs(current)) | ||
1120 | task_set_no_new_privs(p); | ||
1121 | |||
1122 | /* | ||
1123 | * If the parent gained a seccomp mode after copying thread | ||
1124 | * flags and between before we held the sighand lock, we have | ||
1125 | * to manually enable the seccomp thread flag here. | ||
1126 | */ | ||
1127 | if (p->seccomp.mode != SECCOMP_MODE_DISABLED) | ||
1128 | set_tsk_thread_flag(p, TIF_SECCOMP); | ||
1129 | #endif | ||
1130 | } | ||
1131 | |||
1084 | SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) | 1132 | SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) |
1085 | { | 1133 | { |
1086 | current->clear_child_tid = tidptr; | 1134 | current->clear_child_tid = tidptr; |
@@ -1095,17 +1143,9 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
1095 | p->pi_waiters = RB_ROOT; | 1143 | p->pi_waiters = RB_ROOT; |
1096 | p->pi_waiters_leftmost = NULL; | 1144 | p->pi_waiters_leftmost = NULL; |
1097 | p->pi_blocked_on = NULL; | 1145 | p->pi_blocked_on = NULL; |
1098 | p->pi_top_task = NULL; | ||
1099 | #endif | 1146 | #endif |
1100 | } | 1147 | } |
1101 | 1148 | ||
1102 | #ifdef CONFIG_MEMCG | ||
1103 | void mm_init_owner(struct mm_struct *mm, struct task_struct *p) | ||
1104 | { | ||
1105 | mm->owner = p; | ||
1106 | } | ||
1107 | #endif /* CONFIG_MEMCG */ | ||
1108 | |||
1109 | /* | 1149 | /* |
1110 | * Initialize POSIX timer handling for a single task. | 1150 | * Initialize POSIX timer handling for a single task. |
1111 | */ | 1151 | */ |
@@ -1196,7 +1236,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1196 | goto fork_out; | 1236 | goto fork_out; |
1197 | 1237 | ||
1198 | ftrace_graph_init_task(p); | 1238 | ftrace_graph_init_task(p); |
1199 | get_seccomp_filter(p); | ||
1200 | 1239 | ||
1201 | rt_mutex_init_task(p); | 1240 | rt_mutex_init_task(p); |
1202 | 1241 | ||
@@ -1262,9 +1301,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1262 | 1301 | ||
1263 | posix_cpu_timers_init(p); | 1302 | posix_cpu_timers_init(p); |
1264 | 1303 | ||
1265 | do_posix_clock_monotonic_gettime(&p->start_time); | 1304 | p->start_time = ktime_get_ns(); |
1266 | p->real_start_time = p->start_time; | 1305 | p->real_start_time = ktime_get_boot_ns(); |
1267 | monotonic_to_bootbased(&p->real_start_time); | ||
1268 | p->io_context = NULL; | 1306 | p->io_context = NULL; |
1269 | p->audit_context = NULL; | 1307 | p->audit_context = NULL; |
1270 | if (clone_flags & CLONE_THREAD) | 1308 | if (clone_flags & CLONE_THREAD) |
@@ -1307,10 +1345,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1307 | #ifdef CONFIG_DEBUG_MUTEXES | 1345 | #ifdef CONFIG_DEBUG_MUTEXES |
1308 | p->blocked_on = NULL; /* not blocked yet */ | 1346 | p->blocked_on = NULL; /* not blocked yet */ |
1309 | #endif | 1347 | #endif |
1310 | #ifdef CONFIG_MEMCG | ||
1311 | p->memcg_batch.do_batch = 0; | ||
1312 | p->memcg_batch.memcg = NULL; | ||
1313 | #endif | ||
1314 | #ifdef CONFIG_BCACHE | 1348 | #ifdef CONFIG_BCACHE |
1315 | p->sequential_io = 0; | 1349 | p->sequential_io = 0; |
1316 | p->sequential_io_avg = 0; | 1350 | p->sequential_io_avg = 0; |
@@ -1328,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1328 | if (retval) | 1362 | if (retval) |
1329 | goto bad_fork_cleanup_policy; | 1363 | goto bad_fork_cleanup_policy; |
1330 | /* copy all the process information */ | 1364 | /* copy all the process information */ |
1365 | shm_init_task(p); | ||
1331 | retval = copy_semundo(clone_flags, p); | 1366 | retval = copy_semundo(clone_flags, p); |
1332 | if (retval) | 1367 | if (retval) |
1333 | goto bad_fork_cleanup_audit; | 1368 | goto bad_fork_cleanup_audit; |
@@ -1437,6 +1472,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1437 | spin_lock(¤t->sighand->siglock); | 1472 | spin_lock(¤t->sighand->siglock); |
1438 | 1473 | ||
1439 | /* | 1474 | /* |
1475 | * Copy seccomp details explicitly here, in case they were changed | ||
1476 | * before holding sighand lock. | ||
1477 | */ | ||
1478 | copy_seccomp(p); | ||
1479 | |||
1480 | /* | ||
1440 | * Process group and session signals need to be delivered to just the | 1481 | * Process group and session signals need to be delivered to just the |
1441 | * parent before the fork or both the parent and the child after the | 1482 | * parent before the fork or both the parent and the child after the |
1442 | * fork. Restart if a signal comes in before we add the new process to | 1483 | * fork. Restart if a signal comes in before we add the new process to |
@@ -1873,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1873 | */ | 1914 | */ |
1874 | exit_sem(current); | 1915 | exit_sem(current); |
1875 | } | 1916 | } |
1917 | if (unshare_flags & CLONE_NEWIPC) { | ||
1918 | /* Orphan segments in old ns (see sem above). */ | ||
1919 | exit_shm(current); | ||
1920 | shm_init_task(current); | ||
1921 | } | ||
1876 | 1922 | ||
1877 | if (new_nsproxy) | 1923 | if (new_nsproxy) |
1878 | switch_task_namespaces(current, new_nsproxy); | 1924 | switch_task_namespaces(current, new_nsproxy); |