diff options
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 127 |
1 files changed, 91 insertions, 36 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 1dc98e4dd287..dda2049692a2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -415,15 +415,15 @@ out_unlock: | |||
415 | */ | 415 | */ |
416 | void exit_pi_state_list(struct task_struct *curr) | 416 | void exit_pi_state_list(struct task_struct *curr) |
417 | { | 417 | { |
418 | struct futex_hash_bucket *hb; | ||
419 | struct list_head *next, *head = &curr->pi_state_list; | 418 | struct list_head *next, *head = &curr->pi_state_list; |
420 | struct futex_pi_state *pi_state; | 419 | struct futex_pi_state *pi_state; |
420 | struct futex_hash_bucket *hb; | ||
421 | union futex_key key; | 421 | union futex_key key; |
422 | 422 | ||
423 | /* | 423 | /* |
424 | * We are a ZOMBIE and nobody can enqueue itself on | 424 | * We are a ZOMBIE and nobody can enqueue itself on |
425 | * pi_state_list anymore, but we have to be careful | 425 | * pi_state_list anymore, but we have to be careful |
426 | * versus waiters unqueueing themselfs | 426 | * versus waiters unqueueing themselves: |
427 | */ | 427 | */ |
428 | spin_lock_irq(&curr->pi_lock); | 428 | spin_lock_irq(&curr->pi_lock); |
429 | while (!list_empty(head)) { | 429 | while (!list_empty(head)) { |
@@ -431,21 +431,24 @@ void exit_pi_state_list(struct task_struct *curr) | |||
431 | next = head->next; | 431 | next = head->next; |
432 | pi_state = list_entry(next, struct futex_pi_state, list); | 432 | pi_state = list_entry(next, struct futex_pi_state, list); |
433 | key = pi_state->key; | 433 | key = pi_state->key; |
434 | hb = hash_futex(&key); | ||
434 | spin_unlock_irq(&curr->pi_lock); | 435 | spin_unlock_irq(&curr->pi_lock); |
435 | 436 | ||
436 | hb = hash_futex(&key); | ||
437 | spin_lock(&hb->lock); | 437 | spin_lock(&hb->lock); |
438 | 438 | ||
439 | spin_lock_irq(&curr->pi_lock); | 439 | spin_lock_irq(&curr->pi_lock); |
440 | /* | ||
441 | * We dropped the pi-lock, so re-check whether this | ||
442 | * task still owns the PI-state: | ||
443 | */ | ||
440 | if (head->next != next) { | 444 | if (head->next != next) { |
441 | spin_unlock(&hb->lock); | 445 | spin_unlock(&hb->lock); |
442 | continue; | 446 | continue; |
443 | } | 447 | } |
444 | 448 | ||
445 | list_del_init(&pi_state->list); | ||
446 | |||
447 | WARN_ON(pi_state->owner != curr); | 449 | WARN_ON(pi_state->owner != curr); |
448 | 450 | WARN_ON(list_empty(&pi_state->list)); | |
451 | list_del_init(&pi_state->list); | ||
449 | pi_state->owner = NULL; | 452 | pi_state->owner = NULL; |
450 | spin_unlock_irq(&curr->pi_lock); | 453 | spin_unlock_irq(&curr->pi_lock); |
451 | 454 | ||
@@ -470,12 +473,20 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
470 | head = &hb->chain; | 473 | head = &hb->chain; |
471 | 474 | ||
472 | list_for_each_entry_safe(this, next, head, list) { | 475 | list_for_each_entry_safe(this, next, head, list) { |
473 | if (match_futex (&this->key, &me->key)) { | 476 | if (match_futex(&this->key, &me->key)) { |
474 | /* | 477 | /* |
475 | * Another waiter already exists - bump up | 478 | * Another waiter already exists - bump up |
476 | * the refcount and return its pi_state: | 479 | * the refcount and return its pi_state: |
477 | */ | 480 | */ |
478 | pi_state = this->pi_state; | 481 | pi_state = this->pi_state; |
482 | /* | ||
483 | * Userspace might have messed up non PI and PI futexes | ||
484 | */ | ||
485 | if (unlikely(!pi_state)) | ||
486 | return -EINVAL; | ||
487 | |||
488 | WARN_ON(!atomic_read(&pi_state->refcount)); | ||
489 | |||
479 | atomic_inc(&pi_state->refcount); | 490 | atomic_inc(&pi_state->refcount); |
480 | me->pi_state = pi_state; | 491 | me->pi_state = pi_state; |
481 | 492 | ||
@@ -484,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
484 | } | 495 | } |
485 | 496 | ||
486 | /* | 497 | /* |
487 | * We are the first waiter - try to look up the real owner and | 498 | * We are the first waiter - try to look up the real owner and attach |
488 | * attach the new pi_state to it: | 499 | * the new pi_state to it, but bail out when the owner died bit is set |
500 | * and TID = 0: | ||
489 | */ | 501 | */ |
490 | pid = uval & FUTEX_TID_MASK; | 502 | pid = uval & FUTEX_TID_MASK; |
503 | if (!pid && (uval & FUTEX_OWNER_DIED)) | ||
504 | return -ESRCH; | ||
491 | p = futex_find_get_task(pid); | 505 | p = futex_find_get_task(pid); |
492 | if (!p) | 506 | if (!p) |
493 | return -ESRCH; | 507 | return -ESRCH; |
@@ -504,6 +518,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
504 | pi_state->key = me->key; | 518 | pi_state->key = me->key; |
505 | 519 | ||
506 | spin_lock_irq(&p->pi_lock); | 520 | spin_lock_irq(&p->pi_lock); |
521 | WARN_ON(!list_empty(&pi_state->list)); | ||
507 | list_add(&pi_state->list, &p->pi_state_list); | 522 | list_add(&pi_state->list, &p->pi_state_list); |
508 | pi_state->owner = p; | 523 | pi_state->owner = p; |
509 | spin_unlock_irq(&p->pi_lock); | 524 | spin_unlock_irq(&p->pi_lock); |
@@ -567,20 +582,29 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
567 | * kept enabled while there is PI state around. We must also | 582 | * kept enabled while there is PI state around. We must also |
568 | * preserve the owner died bit.) | 583 | * preserve the owner died bit.) |
569 | */ | 584 | */ |
570 | newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; | 585 | if (!(uval & FUTEX_OWNER_DIED)) { |
586 | newval = FUTEX_WAITERS | new_owner->pid; | ||
571 | 587 | ||
572 | inc_preempt_count(); | 588 | inc_preempt_count(); |
573 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 589 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
574 | dec_preempt_count(); | 590 | dec_preempt_count(); |
591 | if (curval == -EFAULT) | ||
592 | return -EFAULT; | ||
593 | if (curval != uval) | ||
594 | return -EINVAL; | ||
595 | } | ||
575 | 596 | ||
576 | if (curval == -EFAULT) | 597 | spin_lock_irq(&pi_state->owner->pi_lock); |
577 | return -EFAULT; | 598 | WARN_ON(list_empty(&pi_state->list)); |
578 | if (curval != uval) | 599 | list_del_init(&pi_state->list); |
579 | return -EINVAL; | 600 | spin_unlock_irq(&pi_state->owner->pi_lock); |
580 | 601 | ||
581 | list_del_init(&pi_state->owner->pi_state_list); | 602 | spin_lock_irq(&new_owner->pi_lock); |
603 | WARN_ON(!list_empty(&pi_state->list)); | ||
582 | list_add(&pi_state->list, &new_owner->pi_state_list); | 604 | list_add(&pi_state->list, &new_owner->pi_state_list); |
583 | pi_state->owner = new_owner; | 605 | pi_state->owner = new_owner; |
606 | spin_unlock_irq(&new_owner->pi_lock); | ||
607 | |||
584 | rt_mutex_unlock(&pi_state->pi_mutex); | 608 | rt_mutex_unlock(&pi_state->pi_mutex); |
585 | 609 | ||
586 | return 0; | 610 | return 0; |
@@ -1230,6 +1254,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, | |||
1230 | /* Owner died? */ | 1254 | /* Owner died? */ |
1231 | if (q.pi_state->owner != NULL) { | 1255 | if (q.pi_state->owner != NULL) { |
1232 | spin_lock_irq(&q.pi_state->owner->pi_lock); | 1256 | spin_lock_irq(&q.pi_state->owner->pi_lock); |
1257 | WARN_ON(list_empty(&q.pi_state->list)); | ||
1233 | list_del_init(&q.pi_state->list); | 1258 | list_del_init(&q.pi_state->list); |
1234 | spin_unlock_irq(&q.pi_state->owner->pi_lock); | 1259 | spin_unlock_irq(&q.pi_state->owner->pi_lock); |
1235 | } else | 1260 | } else |
@@ -1238,6 +1263,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, | |||
1238 | q.pi_state->owner = current; | 1263 | q.pi_state->owner = current; |
1239 | 1264 | ||
1240 | spin_lock_irq(¤t->pi_lock); | 1265 | spin_lock_irq(¤t->pi_lock); |
1266 | WARN_ON(!list_empty(&q.pi_state->list)); | ||
1241 | list_add(&q.pi_state->list, ¤t->pi_state_list); | 1267 | list_add(&q.pi_state->list, ¤t->pi_state_list); |
1242 | spin_unlock_irq(¤t->pi_lock); | 1268 | spin_unlock_irq(¤t->pi_lock); |
1243 | 1269 | ||
@@ -1421,9 +1447,11 @@ retry_locked: | |||
1421 | * again. If it succeeds then we can return without waking | 1447 | * again. If it succeeds then we can return without waking |
1422 | * anyone else up: | 1448 | * anyone else up: |
1423 | */ | 1449 | */ |
1424 | inc_preempt_count(); | 1450 | if (!(uval & FUTEX_OWNER_DIED)) { |
1425 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); | 1451 | inc_preempt_count(); |
1426 | dec_preempt_count(); | 1452 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); |
1453 | dec_preempt_count(); | ||
1454 | } | ||
1427 | 1455 | ||
1428 | if (unlikely(uval == -EFAULT)) | 1456 | if (unlikely(uval == -EFAULT)) |
1429 | goto pi_faulted; | 1457 | goto pi_faulted; |
@@ -1456,9 +1484,11 @@ retry_locked: | |||
1456 | /* | 1484 | /* |
1457 | * No waiters - kernel unlocks the futex: | 1485 | * No waiters - kernel unlocks the futex: |
1458 | */ | 1486 | */ |
1459 | ret = unlock_futex_pi(uaddr, uval); | 1487 | if (!(uval & FUTEX_OWNER_DIED)) { |
1460 | if (ret == -EFAULT) | 1488 | ret = unlock_futex_pi(uaddr, uval); |
1461 | goto pi_faulted; | 1489 | if (ret == -EFAULT) |
1490 | goto pi_faulted; | ||
1491 | } | ||
1462 | 1492 | ||
1463 | out_unlock: | 1493 | out_unlock: |
1464 | spin_unlock(&hb->lock); | 1494 | spin_unlock(&hb->lock); |
@@ -1677,9 +1707,9 @@ err_unlock: | |||
1677 | * Process a futex-list entry, check whether it's owned by the | 1707 | * Process a futex-list entry, check whether it's owned by the |
1678 | * dying task, and do notification if so: | 1708 | * dying task, and do notification if so: |
1679 | */ | 1709 | */ |
1680 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) | 1710 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) |
1681 | { | 1711 | { |
1682 | u32 uval, nval; | 1712 | u32 uval, nval, mval; |
1683 | 1713 | ||
1684 | retry: | 1714 | retry: |
1685 | if (get_user(uval, uaddr)) | 1715 | if (get_user(uval, uaddr)) |
@@ -1696,21 +1726,45 @@ retry: | |||
1696 | * thread-death.) The rest of the cleanup is done in | 1726 | * thread-death.) The rest of the cleanup is done in |
1697 | * userspace. | 1727 | * userspace. |
1698 | */ | 1728 | */ |
1699 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, | 1729 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; |
1700 | uval | FUTEX_OWNER_DIED); | 1730 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); |
1731 | |||
1701 | if (nval == -EFAULT) | 1732 | if (nval == -EFAULT) |
1702 | return -1; | 1733 | return -1; |
1703 | 1734 | ||
1704 | if (nval != uval) | 1735 | if (nval != uval) |
1705 | goto retry; | 1736 | goto retry; |
1706 | 1737 | ||
1707 | if (uval & FUTEX_WAITERS) | 1738 | /* |
1708 | futex_wake(uaddr, 1); | 1739 | * Wake robust non-PI futexes here. The wakeup of |
1740 | * PI futexes happens in exit_pi_state(): | ||
1741 | */ | ||
1742 | if (!pi) { | ||
1743 | if (uval & FUTEX_WAITERS) | ||
1744 | futex_wake(uaddr, 1); | ||
1745 | } | ||
1709 | } | 1746 | } |
1710 | return 0; | 1747 | return 0; |
1711 | } | 1748 | } |
1712 | 1749 | ||
1713 | /* | 1750 | /* |
1751 | * Fetch a robust-list pointer. Bit 0 signals PI futexes: | ||
1752 | */ | ||
1753 | static inline int fetch_robust_entry(struct robust_list __user **entry, | ||
1754 | struct robust_list __user **head, int *pi) | ||
1755 | { | ||
1756 | unsigned long uentry; | ||
1757 | |||
1758 | if (get_user(uentry, (unsigned long *)head)) | ||
1759 | return -EFAULT; | ||
1760 | |||
1761 | *entry = (void *)(uentry & ~1UL); | ||
1762 | *pi = uentry & 1; | ||
1763 | |||
1764 | return 0; | ||
1765 | } | ||
1766 | |||
1767 | /* | ||
1714 | * Walk curr->robust_list (very carefully, it's a userspace list!) | 1768 | * Walk curr->robust_list (very carefully, it's a userspace list!) |
1715 | * and mark any locks found there dead, and notify any waiters. | 1769 | * and mark any locks found there dead, and notify any waiters. |
1716 | * | 1770 | * |
@@ -1720,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr) | |||
1720 | { | 1774 | { |
1721 | struct robust_list_head __user *head = curr->robust_list; | 1775 | struct robust_list_head __user *head = curr->robust_list; |
1722 | struct robust_list __user *entry, *pending; | 1776 | struct robust_list __user *entry, *pending; |
1723 | unsigned int limit = ROBUST_LIST_LIMIT; | 1777 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
1724 | unsigned long futex_offset; | 1778 | unsigned long futex_offset; |
1725 | 1779 | ||
1726 | /* | 1780 | /* |
1727 | * Fetch the list head (which was registered earlier, via | 1781 | * Fetch the list head (which was registered earlier, via |
1728 | * sys_set_robust_list()): | 1782 | * sys_set_robust_list()): |
1729 | */ | 1783 | */ |
1730 | if (get_user(entry, &head->list.next)) | 1784 | if (fetch_robust_entry(&entry, &head->list.next, &pi)) |
1731 | return; | 1785 | return; |
1732 | /* | 1786 | /* |
1733 | * Fetch the relative futex offset: | 1787 | * Fetch the relative futex offset: |
@@ -1738,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr) | |||
1738 | * Fetch any possibly pending lock-add first, and handle it | 1792 | * Fetch any possibly pending lock-add first, and handle it |
1739 | * if it exists: | 1793 | * if it exists: |
1740 | */ | 1794 | */ |
1741 | if (get_user(pending, &head->list_op_pending)) | 1795 | if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) |
1742 | return; | 1796 | return; |
1797 | |||
1743 | if (pending) | 1798 | if (pending) |
1744 | handle_futex_death((void *)pending + futex_offset, curr); | 1799 | handle_futex_death((void *)pending + futex_offset, curr, pip); |
1745 | 1800 | ||
1746 | while (entry != &head->list) { | 1801 | while (entry != &head->list) { |
1747 | /* | 1802 | /* |
@@ -1750,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr) | |||
1750 | */ | 1805 | */ |
1751 | if (entry != pending) | 1806 | if (entry != pending) |
1752 | if (handle_futex_death((void *)entry + futex_offset, | 1807 | if (handle_futex_death((void *)entry + futex_offset, |
1753 | curr)) | 1808 | curr, pi)) |
1754 | return; | 1809 | return; |
1755 | /* | 1810 | /* |
1756 | * Fetch the next entry in the list: | 1811 | * Fetch the next entry in the list: |
1757 | */ | 1812 | */ |
1758 | if (get_user(entry, &entry->next)) | 1813 | if (fetch_robust_entry(&entry, &entry->next, &pi)) |
1759 | return; | 1814 | return; |
1760 | /* | 1815 | /* |
1761 | * Avoid excessively long or circular lists: | 1816 | * Avoid excessively long or circular lists: |