aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
authorAlexey Kuznetsov <kuznet@ms2.inr.ac.ru>2007-06-08 16:47:00 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-06-08 20:23:34 -0400
commit778e9a9c3e7193ea9f434f382947155ffb59c755 (patch)
tree2ceb8c7ce1d55124982b77966dcd65cee5cc623b /kernel/exit.c
parent1a539a87280b3032fd12bc93a4a82f1d8aa97ca8 (diff)
pi-futex: fix exit races and locking problems
1. New entries can be added to tsk->pi_state_list after task completed exit_pi_state_list(). The result is memory leakage and deadlocks. 2. handle_mm_fault() is called under spinlock. The result is obvious. 3. results in self-inflicted deadlock inside glibc. Sometimes futex_lock_pi returns -ESRCH, when it is not expected and glibc enters to for(;;) sleep() to simulate deadlock. This problem is quite obvious and I think the patch is right. Though it looks like each "if" in futex_lock_pi() got some stupid special case "else if". :-) 4. sometimes futex_lock_pi() returns -EDEADLK, when nobody has the lock. The reason is also obvious (see comment in the patch), but correct fix is far beyond my comprehension. I guess someone already saw this, the chunk: if (rt_mutex_trylock(&q.pi_state->pi_mutex)) ret = 0; is obviously from the same opera. But it does not work, because the rtmutex is really taken at this point: wake_futex_pi() of previous owner reassigned it to us. My fix works. But it looks very stupid. I would think about removal of shift of ownership in wake_futex_pi() and making all the work in context of process taking lock. From: Thomas Gleixner <tglx@linutronix.de> Fix 1) Avoid the tasklist lock variant of the exit race fix by adding an additional state transition to the exit code. This fixes also the issue, when a task with recursive segfaults is not able to release the futexes. Fix 2) Cleanup the lookup_pi_state() failure path and solve the -ESRCH problem finally. Fix 3) Solve the fixup_pi_state_owner() problem which needs to do the fixup in the lock protected section by using the in_atomic userspace access functions. This removes also the ugly lock drop / unqueue inside of fixup_pi_state() Fix 4) Fix a stale lock in the error path of futex_wake_pi() Added some error checks for verification. The -EDEADLK problem is solved by the rtmutex fixups. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Ingo Molnar <mingo@elte.hu> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Ulrich Drepper <drepper@redhat.com> Cc: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c24
1 files changed, 23 insertions, 1 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 5b888c24e43e..5c8ecbaa19a5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -892,13 +892,29 @@ fastcall NORET_TYPE void do_exit(long code)
892 if (unlikely(tsk->flags & PF_EXITING)) { 892 if (unlikely(tsk->flags & PF_EXITING)) {
893 printk(KERN_ALERT 893 printk(KERN_ALERT
894 "Fixing recursive fault but reboot is needed!\n"); 894 "Fixing recursive fault but reboot is needed!\n");
895 /*
896 * We can do this unlocked here. The futex code uses
897 * this flag just to verify whether the pi state
898 * cleanup has been done or not. In the worst case it
899 * loops once more. We pretend that the cleanup was
900 * done as there is no way to return. Either the
901 * OWNER_DIED bit is set by now or we push the blocked
902 * task into the wait for ever nirwana as well.
903 */
904 tsk->flags |= PF_EXITPIDONE;
895 if (tsk->io_context) 905 if (tsk->io_context)
896 exit_io_context(); 906 exit_io_context();
897 set_current_state(TASK_UNINTERRUPTIBLE); 907 set_current_state(TASK_UNINTERRUPTIBLE);
898 schedule(); 908 schedule();
899 } 909 }
900 910
911 /*
912 * tsk->flags are checked in the futex code to protect against
913 * an exiting task cleaning up the robust pi futexes.
914 */
915 spin_lock_irq(&tsk->pi_lock);
901 tsk->flags |= PF_EXITING; 916 tsk->flags |= PF_EXITING;
917 spin_unlock_irq(&tsk->pi_lock);
902 918
903 if (unlikely(in_atomic())) 919 if (unlikely(in_atomic()))
904 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 920 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -912,7 +928,7 @@ fastcall NORET_TYPE void do_exit(long code)
912 } 928 }
913 group_dead = atomic_dec_and_test(&tsk->signal->live); 929 group_dead = atomic_dec_and_test(&tsk->signal->live);
914 if (group_dead) { 930 if (group_dead) {
915 hrtimer_cancel(&tsk->signal->real_timer); 931 hrtimer_cancel(&tsk->signal->real_timer);
916 exit_itimers(tsk->signal); 932 exit_itimers(tsk->signal);
917 } 933 }
918 acct_collect(code, group_dead); 934 acct_collect(code, group_dead);
@@ -965,6 +981,12 @@ fastcall NORET_TYPE void do_exit(long code)
965 * Make sure we are holding no locks: 981 * Make sure we are holding no locks:
966 */ 982 */
967 debug_check_no_locks_held(tsk); 983 debug_check_no_locks_held(tsk);
984 /*
985 * We can do this unlocked here. The futex code uses this flag
986 * just to verify whether the pi state cleanup has been done
987 * or not. In the worst case it loops once more.
988 */
989 tsk->flags |= PF_EXITPIDONE;
968 990
969 if (tsk->io_context) 991 if (tsk->io_context)
970 exit_io_context(); 992 exit_io_context();