diff options
author | Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | 2007-06-08 16:47:00 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-06-08 20:23:34 -0400 |
commit | 778e9a9c3e7193ea9f434f382947155ffb59c755 (patch) | |
tree | 2ceb8c7ce1d55124982b77966dcd65cee5cc623b /kernel/exit.c | |
parent | 1a539a87280b3032fd12bc93a4a82f1d8aa97ca8 (diff) |
pi-futex: fix exit races and locking problems
1. New entries can be added to tsk->pi_state_list after task completed
exit_pi_state_list(). The result is memory leakage and deadlocks.
2. handle_mm_fault() is called under spinlock. The result is obvious.
3. results in self-inflicted deadlock inside glibc.
Sometimes futex_lock_pi returns -ESRCH, when it is not expected
and glibc enters to for(;;) sleep() to simulate deadlock. This problem
is quite obvious and I think the patch is right. Though it looks like
each "if" in futex_lock_pi() got some stupid special case "else if". :-)
4. sometimes futex_lock_pi() returns -EDEADLK,
when nobody has the lock. The reason is also obvious (see comment
in the patch), but correct fix is far beyond my comprehension.
I guess someone already saw this, the chunk:
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
ret = 0;
is obviously from the same opera. But it does not work, because the
rtmutex is really taken at this point: wake_futex_pi() of previous
owner reassigned it to us. My fix works. But it looks very stupid.
I would think about removal of shift of ownership in wake_futex_pi()
and making all the work in context of process taking lock.
From: Thomas Gleixner <tglx@linutronix.de>
Fix 1) Avoid the tasklist lock variant of the exit race fix by adding
an additional state transition to the exit code.
This fixes also the issue, when a task with recursive segfaults
is not able to release the futexes.
Fix 2) Cleanup the lookup_pi_state() failure path and solve the -ESRCH
problem finally.
Fix 3) Solve the fixup_pi_state_owner() problem which needs to do the fixup
in the lock protected section by using the in_atomic userspace access
functions.
This removes also the ugly lock drop / unqueue inside of fixup_pi_state()
Fix 4) Fix a stale lock in the error path of futex_wake_pi()
Added some error checks for verification.
The -EDEADLK problem is solved by the rtmutex fixups.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/exit.c')
-rw-r--r-- | kernel/exit.c | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 5b888c24e43e..5c8ecbaa19a5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -892,13 +892,29 @@ fastcall NORET_TYPE void do_exit(long code) | |||
892 | if (unlikely(tsk->flags & PF_EXITING)) { | 892 | if (unlikely(tsk->flags & PF_EXITING)) { |
893 | printk(KERN_ALERT | 893 | printk(KERN_ALERT |
894 | "Fixing recursive fault but reboot is needed!\n"); | 894 | "Fixing recursive fault but reboot is needed!\n"); |
895 | /* | ||
896 | * We can do this unlocked here. The futex code uses | ||
897 | * this flag just to verify whether the pi state | ||
898 | * cleanup has been done or not. In the worst case it | ||
899 | * loops once more. We pretend that the cleanup was | ||
900 | * done as there is no way to return. Either the | ||
901 | * OWNER_DIED bit is set by now or we push the blocked | ||
902 | * task into the wait for ever nirwana as well. | ||
903 | */ | ||
904 | tsk->flags |= PF_EXITPIDONE; | ||
895 | if (tsk->io_context) | 905 | if (tsk->io_context) |
896 | exit_io_context(); | 906 | exit_io_context(); |
897 | set_current_state(TASK_UNINTERRUPTIBLE); | 907 | set_current_state(TASK_UNINTERRUPTIBLE); |
898 | schedule(); | 908 | schedule(); |
899 | } | 909 | } |
900 | 910 | ||
911 | /* | ||
912 | * tsk->flags are checked in the futex code to protect against | ||
913 | * an exiting task cleaning up the robust pi futexes. | ||
914 | */ | ||
915 | spin_lock_irq(&tsk->pi_lock); | ||
901 | tsk->flags |= PF_EXITING; | 916 | tsk->flags |= PF_EXITING; |
917 | spin_unlock_irq(&tsk->pi_lock); | ||
902 | 918 | ||
903 | if (unlikely(in_atomic())) | 919 | if (unlikely(in_atomic())) |
904 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", | 920 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", |
@@ -912,7 +928,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
912 | } | 928 | } |
913 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 929 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
914 | if (group_dead) { | 930 | if (group_dead) { |
915 | hrtimer_cancel(&tsk->signal->real_timer); | 931 | hrtimer_cancel(&tsk->signal->real_timer); |
916 | exit_itimers(tsk->signal); | 932 | exit_itimers(tsk->signal); |
917 | } | 933 | } |
918 | acct_collect(code, group_dead); | 934 | acct_collect(code, group_dead); |
@@ -965,6 +981,12 @@ fastcall NORET_TYPE void do_exit(long code) | |||
965 | * Make sure we are holding no locks: | 981 | * Make sure we are holding no locks: |
966 | */ | 982 | */ |
967 | debug_check_no_locks_held(tsk); | 983 | debug_check_no_locks_held(tsk); |
984 | /* | ||
985 | * We can do this unlocked here. The futex code uses this flag | ||
986 | * just to verify whether the pi state cleanup has been done | ||
987 | * or not. In the worst case it loops once more. | ||
988 | */ | ||
989 | tsk->flags |= PF_EXITPIDONE; | ||
968 | 990 | ||
969 | if (tsk->io_context) | 991 | if (tsk->io_context) |
970 | exit_io_context(); | 992 | exit_io_context(); |