aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c134
1 files changed, 90 insertions, 44 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 6a13c46cd87d..1380d8ace334 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
315 goto free_ti; 315 goto free_ti;
316 316
317 tsk->stack = ti; 317 tsk->stack = ti;
318#ifdef CONFIG_SECCOMP
319 /*
320 * We must handle setting up seccomp filters once we're under
321 * the sighand lock in case orig has changed between now and
322 * then. Until then, filter must be NULL to avoid messing up
323 * the usage counts on the error path calling free_task.
324 */
325 tsk->seccomp.filter = NULL;
326#endif
318 327
319 setup_thread_stack(tsk, orig); 328 setup_thread_stack(tsk, orig);
320 clear_user_return_notifier(tsk); 329 clear_user_return_notifier(tsk);
@@ -365,12 +374,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
365 */ 374 */
366 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); 375 down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
367 376
368 mm->locked_vm = 0; 377 mm->total_vm = oldmm->total_vm;
369 mm->mmap = NULL; 378 mm->shared_vm = oldmm->shared_vm;
370 mm->vmacache_seqnum = 0; 379 mm->exec_vm = oldmm->exec_vm;
371 mm->map_count = 0; 380 mm->stack_vm = oldmm->stack_vm;
372 cpumask_clear(mm_cpumask(mm)); 381
373 mm->mm_rb = RB_ROOT;
374 rb_link = &mm->mm_rb.rb_node; 382 rb_link = &mm->mm_rb.rb_node;
375 rb_parent = NULL; 383 rb_parent = NULL;
376 pprev = &mm->mmap; 384 pprev = &mm->mmap;
@@ -421,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
421 atomic_dec(&inode->i_writecount); 429 atomic_dec(&inode->i_writecount);
422 mutex_lock(&mapping->i_mmap_mutex); 430 mutex_lock(&mapping->i_mmap_mutex);
423 if (tmp->vm_flags & VM_SHARED) 431 if (tmp->vm_flags & VM_SHARED)
424 mapping->i_mmap_writable++; 432 atomic_inc(&mapping->i_mmap_writable);
425 flush_dcache_mmap_lock(mapping); 433 flush_dcache_mmap_lock(mapping);
426 /* insert tmp into the share list, just after mpnt */ 434 /* insert tmp into the share list, just after mpnt */
427 if (unlikely(tmp->vm_flags & VM_NONLINEAR)) 435 if (unlikely(tmp->vm_flags & VM_NONLINEAR))
@@ -527,19 +535,37 @@ static void mm_init_aio(struct mm_struct *mm)
527#endif 535#endif
528} 536}
529 537
538static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
539{
540#ifdef CONFIG_MEMCG
541 mm->owner = p;
542#endif
543}
544
530static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) 545static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
531{ 546{
547 mm->mmap = NULL;
548 mm->mm_rb = RB_ROOT;
549 mm->vmacache_seqnum = 0;
532 atomic_set(&mm->mm_users, 1); 550 atomic_set(&mm->mm_users, 1);
533 atomic_set(&mm->mm_count, 1); 551 atomic_set(&mm->mm_count, 1);
534 init_rwsem(&mm->mmap_sem); 552 init_rwsem(&mm->mmap_sem);
535 INIT_LIST_HEAD(&mm->mmlist); 553 INIT_LIST_HEAD(&mm->mmlist);
536 mm->core_state = NULL; 554 mm->core_state = NULL;
537 atomic_long_set(&mm->nr_ptes, 0); 555 atomic_long_set(&mm->nr_ptes, 0);
556 mm->map_count = 0;
557 mm->locked_vm = 0;
558 mm->pinned_vm = 0;
538 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); 559 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
539 spin_lock_init(&mm->page_table_lock); 560 spin_lock_init(&mm->page_table_lock);
561 mm_init_cpumask(mm);
540 mm_init_aio(mm); 562 mm_init_aio(mm);
541 mm_init_owner(mm, p); 563 mm_init_owner(mm, p);
564 mmu_notifier_mm_init(mm);
542 clear_tlb_flush_pending(mm); 565 clear_tlb_flush_pending(mm);
566#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
567 mm->pmd_huge_pte = NULL;
568#endif
543 569
544 if (current->mm) { 570 if (current->mm) {
545 mm->flags = current->mm->flags & MMF_INIT_MASK; 571 mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -549,11 +575,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
549 mm->def_flags = 0; 575 mm->def_flags = 0;
550 } 576 }
551 577
552 if (likely(!mm_alloc_pgd(mm))) { 578 if (mm_alloc_pgd(mm))
553 mmu_notifier_mm_init(mm); 579 goto fail_nopgd;
554 return mm; 580
555 } 581 if (init_new_context(p, mm))
582 goto fail_nocontext;
556 583
584 return mm;
585
586fail_nocontext:
587 mm_free_pgd(mm);
588fail_nopgd:
557 free_mm(mm); 589 free_mm(mm);
558 return NULL; 590 return NULL;
559} 591}
@@ -587,7 +619,6 @@ struct mm_struct *mm_alloc(void)
587 return NULL; 619 return NULL;
588 620
589 memset(mm, 0, sizeof(*mm)); 621 memset(mm, 0, sizeof(*mm));
590 mm_init_cpumask(mm);
591 return mm_init(mm, current); 622 return mm_init(mm, current);
592} 623}
593 624
@@ -819,17 +850,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
819 goto fail_nomem; 850 goto fail_nomem;
820 851
821 memcpy(mm, oldmm, sizeof(*mm)); 852 memcpy(mm, oldmm, sizeof(*mm));
822 mm_init_cpumask(mm);
823 853
824#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
825 mm->pmd_huge_pte = NULL;
826#endif
827 if (!mm_init(mm, tsk)) 854 if (!mm_init(mm, tsk))
828 goto fail_nomem; 855 goto fail_nomem;
829 856
830 if (init_new_context(tsk, mm))
831 goto fail_nocontext;
832
833 dup_mm_exe_file(oldmm, mm); 857 dup_mm_exe_file(oldmm, mm);
834 858
835 err = dup_mmap(mm, oldmm); 859 err = dup_mmap(mm, oldmm);
@@ -851,15 +875,6 @@ free_pt:
851 875
852fail_nomem: 876fail_nomem:
853 return NULL; 877 return NULL;
854
855fail_nocontext:
856 /*
857 * If init_new_context() failed, we cannot use mmput() to free the mm
858 * because it calls destroy_context()
859 */
860 mm_free_pgd(mm);
861 free_mm(mm);
862 return NULL;
863} 878}
864 879
865static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) 880static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@ -1081,6 +1096,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1081 return 0; 1096 return 0;
1082} 1097}
1083 1098
1099static void copy_seccomp(struct task_struct *p)
1100{
1101#ifdef CONFIG_SECCOMP
1102 /*
1103 * Must be called with sighand->lock held, which is common to
1104 * all threads in the group. Holding cred_guard_mutex is not
1105 * needed because this new task is not yet running and cannot
1106 * be racing exec.
1107 */
1108 BUG_ON(!spin_is_locked(&current->sighand->siglock));
1109
1110 /* Ref-count the new filter user, and assign it. */
1111 get_seccomp_filter(current);
1112 p->seccomp = current->seccomp;
1113
1114 /*
1115 * Explicitly enable no_new_privs here in case it got set
1116 * between the task_struct being duplicated and holding the
1117 * sighand lock. The seccomp state and nnp must be in sync.
1118 */
1119 if (task_no_new_privs(current))
1120 task_set_no_new_privs(p);
1121
1122 /*
1123 * If the parent gained a seccomp mode after copying thread
1124 * flags and between before we held the sighand lock, we have
1125 * to manually enable the seccomp thread flag here.
1126 */
1127 if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
1128 set_tsk_thread_flag(p, TIF_SECCOMP);
1129#endif
1130}
1131
1084SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) 1132SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
1085{ 1133{
1086 current->clear_child_tid = tidptr; 1134 current->clear_child_tid = tidptr;
@@ -1095,17 +1143,9 @@ static void rt_mutex_init_task(struct task_struct *p)
1095 p->pi_waiters = RB_ROOT; 1143 p->pi_waiters = RB_ROOT;
1096 p->pi_waiters_leftmost = NULL; 1144 p->pi_waiters_leftmost = NULL;
1097 p->pi_blocked_on = NULL; 1145 p->pi_blocked_on = NULL;
1098 p->pi_top_task = NULL;
1099#endif 1146#endif
1100} 1147}
1101 1148
1102#ifdef CONFIG_MEMCG
1103void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
1104{
1105 mm->owner = p;
1106}
1107#endif /* CONFIG_MEMCG */
1108
1109/* 1149/*
1110 * Initialize POSIX timer handling for a single task. 1150 * Initialize POSIX timer handling for a single task.
1111 */ 1151 */
@@ -1196,7 +1236,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1196 goto fork_out; 1236 goto fork_out;
1197 1237
1198 ftrace_graph_init_task(p); 1238 ftrace_graph_init_task(p);
1199 get_seccomp_filter(p);
1200 1239
1201 rt_mutex_init_task(p); 1240 rt_mutex_init_task(p);
1202 1241
@@ -1262,9 +1301,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1262 1301
1263 posix_cpu_timers_init(p); 1302 posix_cpu_timers_init(p);
1264 1303
1265 do_posix_clock_monotonic_gettime(&p->start_time); 1304 p->start_time = ktime_get_ns();
1266 p->real_start_time = p->start_time; 1305 p->real_start_time = ktime_get_boot_ns();
1267 monotonic_to_bootbased(&p->real_start_time);
1268 p->io_context = NULL; 1306 p->io_context = NULL;
1269 p->audit_context = NULL; 1307 p->audit_context = NULL;
1270 if (clone_flags & CLONE_THREAD) 1308 if (clone_flags & CLONE_THREAD)
@@ -1307,10 +1345,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1307#ifdef CONFIG_DEBUG_MUTEXES 1345#ifdef CONFIG_DEBUG_MUTEXES
1308 p->blocked_on = NULL; /* not blocked yet */ 1346 p->blocked_on = NULL; /* not blocked yet */
1309#endif 1347#endif
1310#ifdef CONFIG_MEMCG
1311 p->memcg_batch.do_batch = 0;
1312 p->memcg_batch.memcg = NULL;
1313#endif
1314#ifdef CONFIG_BCACHE 1348#ifdef CONFIG_BCACHE
1315 p->sequential_io = 0; 1349 p->sequential_io = 0;
1316 p->sequential_io_avg = 0; 1350 p->sequential_io_avg = 0;
@@ -1328,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1328 if (retval) 1362 if (retval)
1329 goto bad_fork_cleanup_policy; 1363 goto bad_fork_cleanup_policy;
1330 /* copy all the process information */ 1364 /* copy all the process information */
1365 shm_init_task(p);
1331 retval = copy_semundo(clone_flags, p); 1366 retval = copy_semundo(clone_flags, p);
1332 if (retval) 1367 if (retval)
1333 goto bad_fork_cleanup_audit; 1368 goto bad_fork_cleanup_audit;
@@ -1437,6 +1472,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1437 spin_lock(&current->sighand->siglock); 1472 spin_lock(&current->sighand->siglock);
1438 1473
1439 /* 1474 /*
1475 * Copy seccomp details explicitly here, in case they were changed
1476 * before holding sighand lock.
1477 */
1478 copy_seccomp(p);
1479
1480 /*
1440 * Process group and session signals need to be delivered to just the 1481 * Process group and session signals need to be delivered to just the
1441 * parent before the fork or both the parent and the child after the 1482 * parent before the fork or both the parent and the child after the
1442 * fork. Restart if a signal comes in before we add the new process to 1483 * fork. Restart if a signal comes in before we add the new process to
@@ -1873,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1873 */ 1914 */
1874 exit_sem(current); 1915 exit_sem(current);
1875 } 1916 }
1917 if (unshare_flags & CLONE_NEWIPC) {
1918 /* Orphan segments in old ns (see sem above). */
1919 exit_shm(current);
1920 shm_init_task(current);
1921 }
1876 1922
1877 if (new_nsproxy) 1923 if (new_nsproxy)
1878 switch_task_namespaces(current, new_nsproxy); 1924 switch_task_namespaces(current, new_nsproxy);