aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c140
1 files changed, 97 insertions, 43 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index cf0c8e21d1ab..b9b8aea5389e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -297,7 +297,7 @@ static int futex_handle_fault(unsigned long address, int attempt)
297 struct vm_area_struct * vma; 297 struct vm_area_struct * vma;
298 struct mm_struct *mm = current->mm; 298 struct mm_struct *mm = current->mm;
299 299
300 if (attempt >= 2 || !(vma = find_vma(mm, address)) || 300 if (attempt > 2 || !(vma = find_vma(mm, address)) ||
301 vma->vm_start > address || !(vma->vm_flags & VM_WRITE)) 301 vma->vm_start > address || !(vma->vm_flags & VM_WRITE))
302 return -EFAULT; 302 return -EFAULT;
303 303
@@ -397,7 +397,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
397 p = NULL; 397 p = NULL;
398 goto out_unlock; 398 goto out_unlock;
399 } 399 }
400 if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE) { 400 if (p->exit_state != 0) {
401 p = NULL; 401 p = NULL;
402 goto out_unlock; 402 goto out_unlock;
403 } 403 }
@@ -415,15 +415,15 @@ out_unlock:
415 */ 415 */
416void exit_pi_state_list(struct task_struct *curr) 416void exit_pi_state_list(struct task_struct *curr)
417{ 417{
418 struct futex_hash_bucket *hb;
419 struct list_head *next, *head = &curr->pi_state_list; 418 struct list_head *next, *head = &curr->pi_state_list;
420 struct futex_pi_state *pi_state; 419 struct futex_pi_state *pi_state;
420 struct futex_hash_bucket *hb;
421 union futex_key key; 421 union futex_key key;
422 422
423 /* 423 /*
424 * We are a ZOMBIE and nobody can enqueue itself on 424 * We are a ZOMBIE and nobody can enqueue itself on
425 * pi_state_list anymore, but we have to be careful 425 * pi_state_list anymore, but we have to be careful
426 * versus waiters unqueueing themselfs 426 * versus waiters unqueueing themselves:
427 */ 427 */
428 spin_lock_irq(&curr->pi_lock); 428 spin_lock_irq(&curr->pi_lock);
429 while (!list_empty(head)) { 429 while (!list_empty(head)) {
@@ -431,21 +431,24 @@ void exit_pi_state_list(struct task_struct *curr)
431 next = head->next; 431 next = head->next;
432 pi_state = list_entry(next, struct futex_pi_state, list); 432 pi_state = list_entry(next, struct futex_pi_state, list);
433 key = pi_state->key; 433 key = pi_state->key;
434 hb = hash_futex(&key);
434 spin_unlock_irq(&curr->pi_lock); 435 spin_unlock_irq(&curr->pi_lock);
435 436
436 hb = hash_futex(&key);
437 spin_lock(&hb->lock); 437 spin_lock(&hb->lock);
438 438
439 spin_lock_irq(&curr->pi_lock); 439 spin_lock_irq(&curr->pi_lock);
440 /*
441 * We dropped the pi-lock, so re-check whether this
442 * task still owns the PI-state:
443 */
440 if (head->next != next) { 444 if (head->next != next) {
441 spin_unlock(&hb->lock); 445 spin_unlock(&hb->lock);
442 continue; 446 continue;
443 } 447 }
444 448
445 list_del_init(&pi_state->list);
446
447 WARN_ON(pi_state->owner != curr); 449 WARN_ON(pi_state->owner != curr);
448 450 WARN_ON(list_empty(&pi_state->list));
451 list_del_init(&pi_state->list);
449 pi_state->owner = NULL; 452 pi_state->owner = NULL;
450 spin_unlock_irq(&curr->pi_lock); 453 spin_unlock_irq(&curr->pi_lock);
451 454
@@ -470,7 +473,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
470 head = &hb->chain; 473 head = &hb->chain;
471 474
472 list_for_each_entry_safe(this, next, head, list) { 475 list_for_each_entry_safe(this, next, head, list) {
473 if (match_futex (&this->key, &me->key)) { 476 if (match_futex(&this->key, &me->key)) {
474 /* 477 /*
475 * Another waiter already exists - bump up 478 * Another waiter already exists - bump up
476 * the refcount and return its pi_state: 479 * the refcount and return its pi_state:
@@ -482,6 +485,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
482 if (unlikely(!pi_state)) 485 if (unlikely(!pi_state))
483 return -EINVAL; 486 return -EINVAL;
484 487
488 WARN_ON(!atomic_read(&pi_state->refcount));
489
485 atomic_inc(&pi_state->refcount); 490 atomic_inc(&pi_state->refcount);
486 me->pi_state = pi_state; 491 me->pi_state = pi_state;
487 492
@@ -490,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
490 } 495 }
491 496
492 /* 497 /*
493 * We are the first waiter - try to look up the real owner and 498 * We are the first waiter - try to look up the real owner and attach
494 * attach the new pi_state to it: 499 * the new pi_state to it, but bail out when the owner died bit is set
500 * and TID = 0:
495 */ 501 */
496 pid = uval & FUTEX_TID_MASK; 502 pid = uval & FUTEX_TID_MASK;
503 if (!pid && (uval & FUTEX_OWNER_DIED))
504 return -ESRCH;
497 p = futex_find_get_task(pid); 505 p = futex_find_get_task(pid);
498 if (!p) 506 if (!p)
499 return -ESRCH; 507 return -ESRCH;
@@ -510,6 +518,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
510 pi_state->key = me->key; 518 pi_state->key = me->key;
511 519
512 spin_lock_irq(&p->pi_lock); 520 spin_lock_irq(&p->pi_lock);
521 WARN_ON(!list_empty(&pi_state->list));
513 list_add(&pi_state->list, &p->pi_state_list); 522 list_add(&pi_state->list, &p->pi_state_list);
514 pi_state->owner = p; 523 pi_state->owner = p;
515 spin_unlock_irq(&p->pi_lock); 524 spin_unlock_irq(&p->pi_lock);
@@ -573,20 +582,29 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
573 * kept enabled while there is PI state around. We must also 582 * kept enabled while there is PI state around. We must also
574 * preserve the owner died bit.) 583 * preserve the owner died bit.)
575 */ 584 */
576 newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; 585 if (!(uval & FUTEX_OWNER_DIED)) {
586 newval = FUTEX_WAITERS | new_owner->pid;
577 587
578 inc_preempt_count(); 588 inc_preempt_count();
579 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 589 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
580 dec_preempt_count(); 590 dec_preempt_count();
591 if (curval == -EFAULT)
592 return -EFAULT;
593 if (curval != uval)
594 return -EINVAL;
595 }
581 596
582 if (curval == -EFAULT) 597 spin_lock_irq(&pi_state->owner->pi_lock);
583 return -EFAULT; 598 WARN_ON(list_empty(&pi_state->list));
584 if (curval != uval) 599 list_del_init(&pi_state->list);
585 return -EINVAL; 600 spin_unlock_irq(&pi_state->owner->pi_lock);
586 601
587 list_del_init(&pi_state->owner->pi_state_list); 602 spin_lock_irq(&new_owner->pi_lock);
603 WARN_ON(!list_empty(&pi_state->list));
588 list_add(&pi_state->list, &new_owner->pi_state_list); 604 list_add(&pi_state->list, &new_owner->pi_state_list);
589 pi_state->owner = new_owner; 605 pi_state->owner = new_owner;
606 spin_unlock_irq(&new_owner->pi_lock);
607
590 rt_mutex_unlock(&pi_state->pi_mutex); 608 rt_mutex_unlock(&pi_state->pi_mutex);
591 609
592 return 0; 610 return 0;
@@ -729,8 +747,10 @@ retry:
729 */ 747 */
730 if (attempt++) { 748 if (attempt++) {
731 if (futex_handle_fault((unsigned long)uaddr2, 749 if (futex_handle_fault((unsigned long)uaddr2,
732 attempt)) 750 attempt)) {
751 ret = -EFAULT;
733 goto out; 752 goto out;
753 }
734 goto retry; 754 goto retry;
735 } 755 }
736 756
@@ -930,6 +950,7 @@ static int unqueue_me(struct futex_q *q)
930 /* In the common case we don't take the spinlock, which is nice. */ 950 /* In the common case we don't take the spinlock, which is nice. */
931 retry: 951 retry:
932 lock_ptr = q->lock_ptr; 952 lock_ptr = q->lock_ptr;
953 barrier();
933 if (lock_ptr != 0) { 954 if (lock_ptr != 0) {
934 spin_lock(lock_ptr); 955 spin_lock(lock_ptr);
935 /* 956 /*
@@ -1236,6 +1257,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1236 /* Owner died? */ 1257 /* Owner died? */
1237 if (q.pi_state->owner != NULL) { 1258 if (q.pi_state->owner != NULL) {
1238 spin_lock_irq(&q.pi_state->owner->pi_lock); 1259 spin_lock_irq(&q.pi_state->owner->pi_lock);
1260 WARN_ON(list_empty(&q.pi_state->list));
1239 list_del_init(&q.pi_state->list); 1261 list_del_init(&q.pi_state->list);
1240 spin_unlock_irq(&q.pi_state->owner->pi_lock); 1262 spin_unlock_irq(&q.pi_state->owner->pi_lock);
1241 } else 1263 } else
@@ -1244,6 +1266,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1244 q.pi_state->owner = current; 1266 q.pi_state->owner = current;
1245 1267
1246 spin_lock_irq(&current->pi_lock); 1268 spin_lock_irq(&current->pi_lock);
1269 WARN_ON(!list_empty(&q.pi_state->list));
1247 list_add(&q.pi_state->list, &current->pi_state_list); 1270 list_add(&q.pi_state->list, &current->pi_state_list);
1248 spin_unlock_irq(&current->pi_lock); 1271 spin_unlock_irq(&current->pi_lock);
1249 1272
@@ -1301,9 +1324,10 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1301 * still holding the mmap_sem. 1324 * still holding the mmap_sem.
1302 */ 1325 */
1303 if (attempt++) { 1326 if (attempt++) {
1304 if (futex_handle_fault((unsigned long)uaddr, attempt)) 1327 if (futex_handle_fault((unsigned long)uaddr, attempt)) {
1328 ret = -EFAULT;
1305 goto out_unlock_release_sem; 1329 goto out_unlock_release_sem;
1306 1330 }
1307 goto retry_locked; 1331 goto retry_locked;
1308 } 1332 }
1309 1333
@@ -1427,9 +1451,11 @@ retry_locked:
1427 * again. If it succeeds then we can return without waking 1451 * again. If it succeeds then we can return without waking
1428 * anyone else up: 1452 * anyone else up:
1429 */ 1453 */
1430 inc_preempt_count(); 1454 if (!(uval & FUTEX_OWNER_DIED)) {
1431 uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); 1455 inc_preempt_count();
1432 dec_preempt_count(); 1456 uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
1457 dec_preempt_count();
1458 }
1433 1459
1434 if (unlikely(uval == -EFAULT)) 1460 if (unlikely(uval == -EFAULT))
1435 goto pi_faulted; 1461 goto pi_faulted;
@@ -1462,9 +1488,11 @@ retry_locked:
1462 /* 1488 /*
1463 * No waiters - kernel unlocks the futex: 1489 * No waiters - kernel unlocks the futex:
1464 */ 1490 */
1465 ret = unlock_futex_pi(uaddr, uval); 1491 if (!(uval & FUTEX_OWNER_DIED)) {
1466 if (ret == -EFAULT) 1492 ret = unlock_futex_pi(uaddr, uval);
1467 goto pi_faulted; 1493 if (ret == -EFAULT)
1494 goto pi_faulted;
1495 }
1468 1496
1469out_unlock: 1497out_unlock:
1470 spin_unlock(&hb->lock); 1498 spin_unlock(&hb->lock);
@@ -1481,9 +1509,10 @@ pi_faulted:
1481 * still holding the mmap_sem. 1509 * still holding the mmap_sem.
1482 */ 1510 */
1483 if (attempt++) { 1511 if (attempt++) {
1484 if (futex_handle_fault((unsigned long)uaddr, attempt)) 1512 if (futex_handle_fault((unsigned long)uaddr, attempt)) {
1513 ret = -EFAULT;
1485 goto out_unlock; 1514 goto out_unlock;
1486 1515 }
1487 goto retry_locked; 1516 goto retry_locked;
1488 } 1517 }
1489 1518
@@ -1683,9 +1712,9 @@ err_unlock:
1683 * Process a futex-list entry, check whether it's owned by the 1712 * Process a futex-list entry, check whether it's owned by the
1684 * dying task, and do notification if so: 1713 * dying task, and do notification if so:
1685 */ 1714 */
1686int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) 1715int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
1687{ 1716{
1688 u32 uval, nval; 1717 u32 uval, nval, mval;
1689 1718
1690retry: 1719retry:
1691 if (get_user(uval, uaddr)) 1720 if (get_user(uval, uaddr))
@@ -1702,21 +1731,45 @@ retry:
1702 * thread-death.) The rest of the cleanup is done in 1731 * thread-death.) The rest of the cleanup is done in
1703 * userspace. 1732 * userspace.
1704 */ 1733 */
1705 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 1734 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1706 uval | FUTEX_OWNER_DIED); 1735 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
1736
1707 if (nval == -EFAULT) 1737 if (nval == -EFAULT)
1708 return -1; 1738 return -1;
1709 1739
1710 if (nval != uval) 1740 if (nval != uval)
1711 goto retry; 1741 goto retry;
1712 1742
1713 if (uval & FUTEX_WAITERS) 1743 /*
1714 futex_wake(uaddr, 1); 1744 * Wake robust non-PI futexes here. The wakeup of
1745 * PI futexes happens in exit_pi_state():
1746 */
1747 if (!pi) {
1748 if (uval & FUTEX_WAITERS)
1749 futex_wake(uaddr, 1);
1750 }
1715 } 1751 }
1716 return 0; 1752 return 0;
1717} 1753}
1718 1754
1719/* 1755/*
1756 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
1757 */
1758static inline int fetch_robust_entry(struct robust_list __user **entry,
1759 struct robust_list __user **head, int *pi)
1760{
1761 unsigned long uentry;
1762
1763 if (get_user(uentry, (unsigned long *)head))
1764 return -EFAULT;
1765
1766 *entry = (void *)(uentry & ~1UL);
1767 *pi = uentry & 1;
1768
1769 return 0;
1770}
1771
1772/*
1720 * Walk curr->robust_list (very carefully, it's a userspace list!) 1773 * Walk curr->robust_list (very carefully, it's a userspace list!)
1721 * and mark any locks found there dead, and notify any waiters. 1774 * and mark any locks found there dead, and notify any waiters.
1722 * 1775 *
@@ -1726,14 +1779,14 @@ void exit_robust_list(struct task_struct *curr)
1726{ 1779{
1727 struct robust_list_head __user *head = curr->robust_list; 1780 struct robust_list_head __user *head = curr->robust_list;
1728 struct robust_list __user *entry, *pending; 1781 struct robust_list __user *entry, *pending;
1729 unsigned int limit = ROBUST_LIST_LIMIT; 1782 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
1730 unsigned long futex_offset; 1783 unsigned long futex_offset;
1731 1784
1732 /* 1785 /*
1733 * Fetch the list head (which was registered earlier, via 1786 * Fetch the list head (which was registered earlier, via
1734 * sys_set_robust_list()): 1787 * sys_set_robust_list()):
1735 */ 1788 */
1736 if (get_user(entry, &head->list.next)) 1789 if (fetch_robust_entry(&entry, &head->list.next, &pi))
1737 return; 1790 return;
1738 /* 1791 /*
1739 * Fetch the relative futex offset: 1792 * Fetch the relative futex offset:
@@ -1744,10 +1797,11 @@ void exit_robust_list(struct task_struct *curr)
1744 * Fetch any possibly pending lock-add first, and handle it 1797 * Fetch any possibly pending lock-add first, and handle it
1745 * if it exists: 1798 * if it exists:
1746 */ 1799 */
1747 if (get_user(pending, &head->list_op_pending)) 1800 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
1748 return; 1801 return;
1802
1749 if (pending) 1803 if (pending)
1750 handle_futex_death((void *)pending + futex_offset, curr); 1804 handle_futex_death((void *)pending + futex_offset, curr, pip);
1751 1805
1752 while (entry != &head->list) { 1806 while (entry != &head->list) {
1753 /* 1807 /*
@@ -1756,12 +1810,12 @@ void exit_robust_list(struct task_struct *curr)
1756 */ 1810 */
1757 if (entry != pending) 1811 if (entry != pending)
1758 if (handle_futex_death((void *)entry + futex_offset, 1812 if (handle_futex_death((void *)entry + futex_offset,
1759 curr)) 1813 curr, pi))
1760 return; 1814 return;
1761 /* 1815 /*
1762 * Fetch the next entry in the list: 1816 * Fetch the next entry in the list:
1763 */ 1817 */
1764 if (get_user(entry, &entry->next)) 1818 if (fetch_robust_entry(&entry, &entry->next, &pi))
1765 return; 1819 return;
1766 /* 1820 /*
1767 * Avoid excessively long or circular lists: 1821 * Avoid excessively long or circular lists: