aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c188
1 files changed, 139 insertions, 49 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index b9d3bc6c21ec..64879bdff921 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -50,6 +50,7 @@
50#include <linux/perf_event.h> 50#include <linux/perf_event.h>
51#include <trace/events/sched.h> 51#include <trace/events/sched.h>
52#include <linux/hw_breakpoint.h> 52#include <linux/hw_breakpoint.h>
53#include <linux/oom.h>
53 54
54#include <asm/uaccess.h> 55#include <asm/uaccess.h>
55#include <asm/unistd.h> 56#include <asm/unistd.h>
@@ -70,7 +71,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
70 71
71 list_del_rcu(&p->tasks); 72 list_del_rcu(&p->tasks);
72 list_del_init(&p->sibling); 73 list_del_init(&p->sibling);
73 __get_cpu_var(process_counts)--; 74 __this_cpu_dec(process_counts);
74 } 75 }
75 list_del_rcu(&p->thread_group); 76 list_del_rcu(&p->thread_group);
76} 77}
@@ -97,6 +98,14 @@ static void __exit_signal(struct task_struct *tsk)
97 sig->tty = NULL; 98 sig->tty = NULL;
98 } else { 99 } else {
99 /* 100 /*
101 * This can only happen if the caller is de_thread().
102 * FIXME: this is the temporary hack, we should teach
103 * posix-cpu-timers to handle this case correctly.
104 */
105 if (unlikely(has_group_leader_pid(tsk)))
106 posix_cpu_timers_exit_group(tsk);
107
108 /*
100 * If there is any task waiting for the group exit 109 * If there is any task waiting for the group exit
101 * then notify it: 110 * then notify it:
102 */ 111 */
@@ -151,9 +160,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
151{ 160{
152 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 161 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
153 162
154#ifdef CONFIG_PERF_EVENTS 163 perf_event_delayed_put(tsk);
155 WARN_ON_ONCE(tsk->perf_event_ctxp);
156#endif
157 trace_sched_process_free(tsk); 164 trace_sched_process_free(tsk);
158 put_task_struct(tsk); 165 put_task_struct(tsk);
159} 166}
@@ -556,29 +563,28 @@ void exit_files(struct task_struct *tsk)
556 563
557#ifdef CONFIG_MM_OWNER 564#ifdef CONFIG_MM_OWNER
558/* 565/*
559 * Task p is exiting and it owned mm, lets find a new owner for it 566 * A task is exiting. If it owned this mm, find a new owner for the mm.
560 */ 567 */
561static inline int
562mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
563{
564 /*
565 * If there are other users of the mm and the owner (us) is exiting
566 * we need to find a new owner to take on the responsibility.
567 */
568 if (atomic_read(&mm->mm_users) <= 1)
569 return 0;
570 if (mm->owner != p)
571 return 0;
572 return 1;
573}
574
575void mm_update_next_owner(struct mm_struct *mm) 568void mm_update_next_owner(struct mm_struct *mm)
576{ 569{
577 struct task_struct *c, *g, *p = current; 570 struct task_struct *c, *g, *p = current;
578 571
579retry: 572retry:
580 if (!mm_need_new_owner(mm, p)) 573 /*
574 * If the exiting or execing task is not the owner, it's
575 * someone else's problem.
576 */
577 if (mm->owner != p)
578 return;
579 /*
580 * The current owner is exiting/execing and there are no other
581 * candidates. Do not leave the mm pointing to a possibly
582 * freed task structure.
583 */
584 if (atomic_read(&mm->mm_users) <= 1) {
585 mm->owner = NULL;
581 return; 586 return;
587 }
582 588
583 read_lock(&tasklist_lock); 589 read_lock(&tasklist_lock);
584 /* 590 /*
@@ -691,6 +697,8 @@ static void exit_mm(struct task_struct * tsk)
691 enter_lazy_tlb(mm, current); 697 enter_lazy_tlb(mm, current);
692 /* We don't want this task to be frozen prematurely */ 698 /* We don't want this task to be frozen prematurely */
693 clear_freeze_flag(tsk); 699 clear_freeze_flag(tsk);
700 if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
701 atomic_dec(&mm->oom_disable_count);
694 task_unlock(tsk); 702 task_unlock(tsk);
695 mm_update_next_owner(mm); 703 mm_update_next_owner(mm);
696 mmput(mm); 704 mmput(mm);
@@ -704,6 +712,8 @@ static void exit_mm(struct task_struct * tsk)
704 * space. 712 * space.
705 */ 713 */
706static struct task_struct *find_new_reaper(struct task_struct *father) 714static struct task_struct *find_new_reaper(struct task_struct *father)
715 __releases(&tasklist_lock)
716 __acquires(&tasklist_lock)
707{ 717{
708 struct pid_namespace *pid_ns = task_active_pid_ns(father); 718 struct pid_namespace *pid_ns = task_active_pid_ns(father);
709 struct task_struct *thread; 719 struct task_struct *thread;
@@ -832,7 +842,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
832 /* Let father know we died 842 /* Let father know we died
833 * 843 *
834 * Thread signals are configurable, but you aren't going to use 844 * Thread signals are configurable, but you aren't going to use
835 * that to send signals to arbitary processes. 845 * that to send signals to arbitrary processes.
836 * That stops right now. 846 * That stops right now.
837 * 847 *
838 * If the parent exec id doesn't match the exec id we saved 848 * If the parent exec id doesn't match the exec id we saved
@@ -899,12 +909,22 @@ NORET_TYPE void do_exit(long code)
899 profile_task_exit(tsk); 909 profile_task_exit(tsk);
900 910
901 WARN_ON(atomic_read(&tsk->fs_excl)); 911 WARN_ON(atomic_read(&tsk->fs_excl));
912 WARN_ON(blk_needs_flush_plug(tsk));
902 913
903 if (unlikely(in_interrupt())) 914 if (unlikely(in_interrupt()))
904 panic("Aiee, killing interrupt handler!"); 915 panic("Aiee, killing interrupt handler!");
905 if (unlikely(!tsk->pid)) 916 if (unlikely(!tsk->pid))
906 panic("Attempted to kill the idle task!"); 917 panic("Attempted to kill the idle task!");
907 918
919 /*
920 * If do_exit is called because this processes oopsed, it's possible
921 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
922 * continuing. Amongst other possible reasons, this is to prevent
923 * mm_release()->clear_child_tid() from writing to a user-controlled
924 * kernel address.
925 */
926 set_fs(USER_DS);
927
908 tracehook_report_exit(&code); 928 tracehook_report_exit(&code);
909 929
910 validate_creds_for_do_exit(tsk); 930 validate_creds_for_do_exit(tsk);
@@ -978,6 +998,15 @@ NORET_TYPE void do_exit(long code)
978 exit_fs(tsk); 998 exit_fs(tsk);
979 check_stack_usage(); 999 check_stack_usage();
980 exit_thread(); 1000 exit_thread();
1001
1002 /*
1003 * Flush inherited counters to the parent - before the parent
1004 * gets woken up by child-exit notifications.
1005 *
1006 * because of cgroup mode, must be called before cgroup_exit()
1007 */
1008 perf_event_exit_task(tsk);
1009
981 cgroup_exit(tsk, 1); 1010 cgroup_exit(tsk, 1);
982 1011
983 if (group_dead) 1012 if (group_dead)
@@ -990,12 +1019,7 @@ NORET_TYPE void do_exit(long code)
990 /* 1019 /*
991 * FIXME: do that only when needed, using sched_exit tracepoint 1020 * FIXME: do that only when needed, using sched_exit tracepoint
992 */ 1021 */
993 flush_ptrace_hw_breakpoint(tsk); 1022 ptrace_put_breakpoints(tsk);
994 /*
995 * Flush inherited counters to the parent - before the parent
996 * gets woken up by child-exit notifications.
997 */
998 perf_event_exit_task(tsk);
999 1023
1000 exit_notify(tsk, group_dead); 1024 exit_notify(tsk, group_dead);
1001#ifdef CONFIG_NUMA 1025#ifdef CONFIG_NUMA
@@ -1356,11 +1380,23 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
1356 return NULL; 1380 return NULL;
1357} 1381}
1358 1382
1359/* 1383/**
1360 * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold 1384 * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
1361 * read_lock(&tasklist_lock) on entry. If we return zero, we still hold 1385 * @wo: wait options
1362 * the lock and this task is uninteresting. If we return nonzero, we have 1386 * @ptrace: is the wait for ptrace
1363 * released the lock and the system call should return. 1387 * @p: task to wait for
1388 *
1389 * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
1390 *
1391 * CONTEXT:
1392 * read_lock(&tasklist_lock), which is released if return value is
1393 * non-zero. Also, grabs and releases @p->sighand->siglock.
1394 *
1395 * RETURNS:
1396 * 0 if wait condition didn't exist and search for other wait conditions
1397 * should continue. Non-zero return, -errno on failure and @p's pid on
1398 * success, implies that tasklist_lock is released and wait condition
1399 * search should terminate.
1364 */ 1400 */
1365static int wait_task_stopped(struct wait_opts *wo, 1401static int wait_task_stopped(struct wait_opts *wo,
1366 int ptrace, struct task_struct *p) 1402 int ptrace, struct task_struct *p)
@@ -1376,6 +1412,9 @@ static int wait_task_stopped(struct wait_opts *wo,
1376 if (!ptrace && !(wo->wo_flags & WUNTRACED)) 1412 if (!ptrace && !(wo->wo_flags & WUNTRACED))
1377 return 0; 1413 return 0;
1378 1414
1415 if (!task_stopped_code(p, ptrace))
1416 return 0;
1417
1379 exit_code = 0; 1418 exit_code = 0;
1380 spin_lock_irq(&p->sighand->siglock); 1419 spin_lock_irq(&p->sighand->siglock);
1381 1420
@@ -1517,33 +1556,84 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
1517 return 0; 1556 return 0;
1518 } 1557 }
1519 1558
1520 if (likely(!ptrace) && unlikely(task_ptrace(p))) { 1559 /* dead body doesn't have much to contribute */
1560 if (p->exit_state == EXIT_DEAD)
1561 return 0;
1562
1563 /* slay zombie? */
1564 if (p->exit_state == EXIT_ZOMBIE) {
1521 /* 1565 /*
1522 * This child is hidden by ptrace. 1566 * A zombie ptracee is only visible to its ptracer.
1523 * We aren't allowed to see it now, but eventually we will. 1567 * Notification and reaping will be cascaded to the real
1568 * parent when the ptracer detaches.
1569 */
1570 if (likely(!ptrace) && unlikely(task_ptrace(p))) {
1571 /* it will become visible, clear notask_error */
1572 wo->notask_error = 0;
1573 return 0;
1574 }
1575
1576 /* we don't reap group leaders with subthreads */
1577 if (!delay_group_leader(p))
1578 return wait_task_zombie(wo, p);
1579
1580 /*
1581 * Allow access to stopped/continued state via zombie by
1582 * falling through. Clearing of notask_error is complex.
1583 *
1584 * When !@ptrace:
1585 *
1586 * If WEXITED is set, notask_error should naturally be
1587 * cleared. If not, subset of WSTOPPED|WCONTINUED is set,
1588 * so, if there are live subthreads, there are events to
1589 * wait for. If all subthreads are dead, it's still safe
1590 * to clear - this function will be called again in finite
1591 * amount time once all the subthreads are released and
1592 * will then return without clearing.
1593 *
1594 * When @ptrace:
1595 *
1596 * Stopped state is per-task and thus can't change once the
1597 * target task dies. Only continued and exited can happen.
1598 * Clear notask_error if WCONTINUED | WEXITED.
1599 */
1600 if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
1601 wo->notask_error = 0;
1602 } else {
1603 /*
1604 * If @p is ptraced by a task in its real parent's group,
1605 * hide group stop/continued state when looking at @p as
1606 * the real parent; otherwise, a single stop can be
1607 * reported twice as group and ptrace stops.
1608 *
1609 * If a ptracer wants to distinguish the two events for its
1610 * own children, it should create a separate process which
1611 * takes the role of real parent.
1612 */
1613 if (likely(!ptrace) && task_ptrace(p) &&
1614 same_thread_group(p->parent, p->real_parent))
1615 return 0;
1616
1617 /*
1618 * @p is alive and it's gonna stop, continue or exit, so
1619 * there always is something to wait for.
1524 */ 1620 */
1525 wo->notask_error = 0; 1621 wo->notask_error = 0;
1526 return 0;
1527 } 1622 }
1528 1623
1529 if (p->exit_state == EXIT_DEAD)
1530 return 0;
1531
1532 /* 1624 /*
1533 * We don't reap group leaders with subthreads. 1625 * Wait for stopped. Depending on @ptrace, different stopped state
1626 * is used and the two don't interact with each other.
1534 */ 1627 */
1535 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) 1628 ret = wait_task_stopped(wo, ptrace, p);
1536 return wait_task_zombie(wo, p); 1629 if (ret)
1630 return ret;
1537 1631
1538 /* 1632 /*
1539 * It's stopped or running now, so it might 1633 * Wait for continued. There's only one continued state and the
1540 * later continue, exit, or stop again. 1634 * ptracer can consume it which can confuse the real parent. Don't
1635 * use WCONTINUED from ptracer. You don't need or want it.
1541 */ 1636 */
1542 wo->notask_error = 0;
1543
1544 if (task_stopped_code(p, ptrace))
1545 return wait_task_stopped(wo, ptrace, p);
1546
1547 return wait_task_continued(wo, p); 1637 return wait_task_continued(wo, p);
1548} 1638}
1549 1639