diff options
author | Ingo Molnar <mingo@elte.hu> | 2006-07-28 23:17:57 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-29 00:02:00 -0400 |
commit | e3f2ddeac718c768fdac4b7fe69d465172f788a8 (patch) | |
tree | 5428532dc6c87710c35a71858425f6d726f0e44c | |
parent | 627371d73cdd04ed23fe098755b4f855138ad9e0 (diff) |
[PATCH] pi-futex: robust-futex exit
Fix robust PI-futexes to be properly unlocked on unexpected exit.
For this to work the kernel has to know whether a futex is a PI or a
non-PI one, because the semantics are different. Since the space in
relevant glibc data structures is extremely scarce, the best solution is
to encode the 'PI' information in bit 0 of the robust list pointer.
Existing (non-PI) glibc robust futexes have this bit always zero, so the
ABI is kept. New glibc with PI-robust-futexes will set this bit.
Further fixes from Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/futex.h | 3 | ||||
-rw-r--r-- | kernel/futex.c | 91 | ||||
-rw-r--r-- | kernel/futex_compat.c | 34 |
3 files changed, 89 insertions, 39 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h index 34c3a215f2cd..d097b5b72bc6 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
@@ -96,7 +96,8 @@ struct robust_list_head { | |||
96 | long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, | 96 | long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, |
97 | u32 __user *uaddr2, u32 val2, u32 val3); | 97 | u32 __user *uaddr2, u32 val2, u32 val3); |
98 | 98 | ||
99 | extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr); | 99 | extern int |
100 | handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); | ||
100 | 101 | ||
101 | #ifdef CONFIG_FUTEX | 102 | #ifdef CONFIG_FUTEX |
102 | extern void exit_robust_list(struct task_struct *curr); | 103 | extern void exit_robust_list(struct task_struct *curr); |
diff --git a/kernel/futex.c b/kernel/futex.c index f59003b1d8f9..dda2049692a2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -495,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
495 | } | 495 | } |
496 | 496 | ||
497 | /* | 497 | /* |
498 | * We are the first waiter - try to look up the real owner and | 498 | * We are the first waiter - try to look up the real owner and attach |
499 | * attach the new pi_state to it: | 499 | * the new pi_state to it, but bail out when the owner died bit is set |
500 | * and TID = 0: | ||
500 | */ | 501 | */ |
501 | pid = uval & FUTEX_TID_MASK; | 502 | pid = uval & FUTEX_TID_MASK; |
503 | if (!pid && (uval & FUTEX_OWNER_DIED)) | ||
504 | return -ESRCH; | ||
502 | p = futex_find_get_task(pid); | 505 | p = futex_find_get_task(pid); |
503 | if (!p) | 506 | if (!p) |
504 | return -ESRCH; | 507 | return -ESRCH; |
@@ -579,16 +582,17 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
579 | * kept enabled while there is PI state around. We must also | 582 | * kept enabled while there is PI state around. We must also |
580 | * preserve the owner died bit.) | 583 | * preserve the owner died bit.) |
581 | */ | 584 | */ |
582 | newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; | 585 | if (!(uval & FUTEX_OWNER_DIED)) { |
583 | 586 | newval = FUTEX_WAITERS | new_owner->pid; | |
584 | inc_preempt_count(); | ||
585 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | ||
586 | dec_preempt_count(); | ||
587 | 587 | ||
588 | if (curval == -EFAULT) | 588 | inc_preempt_count(); |
589 | return -EFAULT; | 589 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
590 | if (curval != uval) | 590 | dec_preempt_count(); |
591 | return -EINVAL; | 591 | if (curval == -EFAULT) |
592 | return -EFAULT; | ||
593 | if (curval != uval) | ||
594 | return -EINVAL; | ||
595 | } | ||
592 | 596 | ||
593 | spin_lock_irq(&pi_state->owner->pi_lock); | 597 | spin_lock_irq(&pi_state->owner->pi_lock); |
594 | WARN_ON(list_empty(&pi_state->list)); | 598 | WARN_ON(list_empty(&pi_state->list)); |
@@ -1443,9 +1447,11 @@ retry_locked: | |||
1443 | * again. If it succeeds then we can return without waking | 1447 | * again. If it succeeds then we can return without waking |
1444 | * anyone else up: | 1448 | * anyone else up: |
1445 | */ | 1449 | */ |
1446 | inc_preempt_count(); | 1450 | if (!(uval & FUTEX_OWNER_DIED)) { |
1447 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); | 1451 | inc_preempt_count(); |
1448 | dec_preempt_count(); | 1452 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); |
1453 | dec_preempt_count(); | ||
1454 | } | ||
1449 | 1455 | ||
1450 | if (unlikely(uval == -EFAULT)) | 1456 | if (unlikely(uval == -EFAULT)) |
1451 | goto pi_faulted; | 1457 | goto pi_faulted; |
@@ -1478,9 +1484,11 @@ retry_locked: | |||
1478 | /* | 1484 | /* |
1479 | * No waiters - kernel unlocks the futex: | 1485 | * No waiters - kernel unlocks the futex: |
1480 | */ | 1486 | */ |
1481 | ret = unlock_futex_pi(uaddr, uval); | 1487 | if (!(uval & FUTEX_OWNER_DIED)) { |
1482 | if (ret == -EFAULT) | 1488 | ret = unlock_futex_pi(uaddr, uval); |
1483 | goto pi_faulted; | 1489 | if (ret == -EFAULT) |
1490 | goto pi_faulted; | ||
1491 | } | ||
1484 | 1492 | ||
1485 | out_unlock: | 1493 | out_unlock: |
1486 | spin_unlock(&hb->lock); | 1494 | spin_unlock(&hb->lock); |
@@ -1699,9 +1707,9 @@ err_unlock: | |||
1699 | * Process a futex-list entry, check whether it's owned by the | 1707 | * Process a futex-list entry, check whether it's owned by the |
1700 | * dying task, and do notification if so: | 1708 | * dying task, and do notification if so: |
1701 | */ | 1709 | */ |
1702 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) | 1710 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) |
1703 | { | 1711 | { |
1704 | u32 uval, nval; | 1712 | u32 uval, nval, mval; |
1705 | 1713 | ||
1706 | retry: | 1714 | retry: |
1707 | if (get_user(uval, uaddr)) | 1715 | if (get_user(uval, uaddr)) |
@@ -1718,21 +1726,45 @@ retry: | |||
1718 | * thread-death.) The rest of the cleanup is done in | 1726 | * thread-death.) The rest of the cleanup is done in |
1719 | * userspace. | 1727 | * userspace. |
1720 | */ | 1728 | */ |
1721 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, | 1729 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; |
1722 | uval | FUTEX_OWNER_DIED); | 1730 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); |
1731 | |||
1723 | if (nval == -EFAULT) | 1732 | if (nval == -EFAULT) |
1724 | return -1; | 1733 | return -1; |
1725 | 1734 | ||
1726 | if (nval != uval) | 1735 | if (nval != uval) |
1727 | goto retry; | 1736 | goto retry; |
1728 | 1737 | ||
1729 | if (uval & FUTEX_WAITERS) | 1738 | /* |
1730 | futex_wake(uaddr, 1); | 1739 | * Wake robust non-PI futexes here. The wakeup of |
1740 | * PI futexes happens in exit_pi_state(): | ||
1741 | */ | ||
1742 | if (!pi) { | ||
1743 | if (uval & FUTEX_WAITERS) | ||
1744 | futex_wake(uaddr, 1); | ||
1745 | } | ||
1731 | } | 1746 | } |
1732 | return 0; | 1747 | return 0; |
1733 | } | 1748 | } |
1734 | 1749 | ||
1735 | /* | 1750 | /* |
1751 | * Fetch a robust-list pointer. Bit 0 signals PI futexes: | ||
1752 | */ | ||
1753 | static inline int fetch_robust_entry(struct robust_list __user **entry, | ||
1754 | struct robust_list __user **head, int *pi) | ||
1755 | { | ||
1756 | unsigned long uentry; | ||
1757 | |||
1758 | if (get_user(uentry, (unsigned long *)head)) | ||
1759 | return -EFAULT; | ||
1760 | |||
1761 | *entry = (void *)(uentry & ~1UL); | ||
1762 | *pi = uentry & 1; | ||
1763 | |||
1764 | return 0; | ||
1765 | } | ||
1766 | |||
1767 | /* | ||
1736 | * Walk curr->robust_list (very carefully, it's a userspace list!) | 1768 | * Walk curr->robust_list (very carefully, it's a userspace list!) |
1737 | * and mark any locks found there dead, and notify any waiters. | 1769 | * and mark any locks found there dead, and notify any waiters. |
1738 | * | 1770 | * |
@@ -1742,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr) | |||
1742 | { | 1774 | { |
1743 | struct robust_list_head __user *head = curr->robust_list; | 1775 | struct robust_list_head __user *head = curr->robust_list; |
1744 | struct robust_list __user *entry, *pending; | 1776 | struct robust_list __user *entry, *pending; |
1745 | unsigned int limit = ROBUST_LIST_LIMIT; | 1777 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
1746 | unsigned long futex_offset; | 1778 | unsigned long futex_offset; |
1747 | 1779 | ||
1748 | /* | 1780 | /* |
1749 | * Fetch the list head (which was registered earlier, via | 1781 | * Fetch the list head (which was registered earlier, via |
1750 | * sys_set_robust_list()): | 1782 | * sys_set_robust_list()): |
1751 | */ | 1783 | */ |
1752 | if (get_user(entry, &head->list.next)) | 1784 | if (fetch_robust_entry(&entry, &head->list.next, &pi)) |
1753 | return; | 1785 | return; |
1754 | /* | 1786 | /* |
1755 | * Fetch the relative futex offset: | 1787 | * Fetch the relative futex offset: |
@@ -1760,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr) | |||
1760 | * Fetch any possibly pending lock-add first, and handle it | 1792 | * Fetch any possibly pending lock-add first, and handle it |
1761 | * if it exists: | 1793 | * if it exists: |
1762 | */ | 1794 | */ |
1763 | if (get_user(pending, &head->list_op_pending)) | 1795 | if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) |
1764 | return; | 1796 | return; |
1797 | |||
1765 | if (pending) | 1798 | if (pending) |
1766 | handle_futex_death((void *)pending + futex_offset, curr); | 1799 | handle_futex_death((void *)pending + futex_offset, curr, pip); |
1767 | 1800 | ||
1768 | while (entry != &head->list) { | 1801 | while (entry != &head->list) { |
1769 | /* | 1802 | /* |
@@ -1772,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr) | |||
1772 | */ | 1805 | */ |
1773 | if (entry != pending) | 1806 | if (entry != pending) |
1774 | if (handle_futex_death((void *)entry + futex_offset, | 1807 | if (handle_futex_death((void *)entry + futex_offset, |
1775 | curr)) | 1808 | curr, pi)) |
1776 | return; | 1809 | return; |
1777 | /* | 1810 | /* |
1778 | * Fetch the next entry in the list: | 1811 | * Fetch the next entry in the list: |
1779 | */ | 1812 | */ |
1780 | if (get_user(entry, &entry->next)) | 1813 | if (fetch_robust_entry(&entry, &entry->next, &pi)) |
1781 | return; | 1814 | return; |
1782 | /* | 1815 | /* |
1783 | * Avoid excessively long or circular lists: | 1816 | * Avoid excessively long or circular lists: |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index d1d92b441fb7..d1aab1a452cc 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -12,6 +12,23 @@ | |||
12 | 12 | ||
13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
14 | 14 | ||
15 | |||
16 | /* | ||
17 | * Fetch a robust-list pointer. Bit 0 signals PI futexes: | ||
18 | */ | ||
19 | static inline int | ||
20 | fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, | ||
21 | compat_uptr_t *head, int *pi) | ||
22 | { | ||
23 | if (get_user(*uentry, head)) | ||
24 | return -EFAULT; | ||
25 | |||
26 | *entry = compat_ptr((*uentry) & ~1); | ||
27 | *pi = (unsigned int)(*uentry) & 1; | ||
28 | |||
29 | return 0; | ||
30 | } | ||
31 | |||
15 | /* | 32 | /* |
16 | * Walk curr->robust_list (very carefully, it's a userspace list!) | 33 | * Walk curr->robust_list (very carefully, it's a userspace list!) |
17 | * and mark any locks found there dead, and notify any waiters. | 34 | * and mark any locks found there dead, and notify any waiters. |
@@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
22 | { | 39 | { |
23 | struct compat_robust_list_head __user *head = curr->compat_robust_list; | 40 | struct compat_robust_list_head __user *head = curr->compat_robust_list; |
24 | struct robust_list __user *entry, *pending; | 41 | struct robust_list __user *entry, *pending; |
42 | unsigned int limit = ROBUST_LIST_LIMIT, pi; | ||
25 | compat_uptr_t uentry, upending; | 43 | compat_uptr_t uentry, upending; |
26 | unsigned int limit = ROBUST_LIST_LIMIT; | ||
27 | compat_long_t futex_offset; | 44 | compat_long_t futex_offset; |
28 | 45 | ||
29 | /* | 46 | /* |
30 | * Fetch the list head (which was registered earlier, via | 47 | * Fetch the list head (which was registered earlier, via |
31 | * sys_set_robust_list()): | 48 | * sys_set_robust_list()): |
32 | */ | 49 | */ |
33 | if (get_user(uentry, &head->list.next)) | 50 | if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) |
34 | return; | 51 | return; |
35 | entry = compat_ptr(uentry); | ||
36 | /* | 52 | /* |
37 | * Fetch the relative futex offset: | 53 | * Fetch the relative futex offset: |
38 | */ | 54 | */ |
@@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
42 | * Fetch any possibly pending lock-add first, and handle it | 58 | * Fetch any possibly pending lock-add first, and handle it |
43 | * if it exists: | 59 | * if it exists: |
44 | */ | 60 | */ |
45 | if (get_user(upending, &head->list_op_pending)) | 61 | if (fetch_robust_entry(&upending, &pending, |
62 | &head->list_op_pending, &pi)) | ||
46 | return; | 63 | return; |
47 | pending = compat_ptr(upending); | ||
48 | if (upending) | 64 | if (upending) |
49 | handle_futex_death((void *)pending + futex_offset, curr); | 65 | handle_futex_death((void *)pending + futex_offset, curr, pi); |
50 | 66 | ||
51 | while (compat_ptr(uentry) != &head->list) { | 67 | while (compat_ptr(uentry) != &head->list) { |
52 | /* | 68 | /* |
@@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
55 | */ | 71 | */ |
56 | if (entry != pending) | 72 | if (entry != pending) |
57 | if (handle_futex_death((void *)entry + futex_offset, | 73 | if (handle_futex_death((void *)entry + futex_offset, |
58 | curr)) | 74 | curr, pi)) |
59 | return; | 75 | return; |
60 | 76 | ||
61 | /* | 77 | /* |
62 | * Fetch the next entry in the list: | 78 | * Fetch the next entry in the list: |
63 | */ | 79 | */ |
64 | if (get_user(uentry, (compat_uptr_t *)&entry->next)) | 80 | if (fetch_robust_entry(&uentry, &entry, |
81 | (compat_uptr_t *)&entry->next, &pi)) | ||
65 | return; | 82 | return; |
66 | entry = compat_ptr(uentry); | ||
67 | /* | 83 | /* |
68 | * Avoid excessively long or circular lists: | 84 | * Avoid excessively long or circular lists: |
69 | */ | 85 | */ |