diff options
| author | Dave Jones <davej@redhat.com> | 2006-09-05 17:20:21 -0400 |
|---|---|---|
| committer | Dave Jones <davej@redhat.com> | 2006-09-05 17:20:21 -0400 |
| commit | 115b384cf87249d76adb0b21aca11ee22128927d (patch) | |
| tree | f39a2a54863e9d82d1196906f92c82ab5991c6af /kernel/futex.c | |
| parent | 8eb7925f93af75e66a240d148efdec212f95bcb7 (diff) | |
| parent | c336923b668fdcf0312efbec3b44895d713f4d81 (diff) | |
Merge ../linus
Diffstat (limited to 'kernel/futex.c')
| -rw-r--r-- | kernel/futex.c | 140 |
1 files changed, 97 insertions, 43 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index cf0c8e21d1ab..b9b8aea5389e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -297,7 +297,7 @@ static int futex_handle_fault(unsigned long address, int attempt) | |||
| 297 | struct vm_area_struct * vma; | 297 | struct vm_area_struct * vma; |
| 298 | struct mm_struct *mm = current->mm; | 298 | struct mm_struct *mm = current->mm; |
| 299 | 299 | ||
| 300 | if (attempt >= 2 || !(vma = find_vma(mm, address)) || | 300 | if (attempt > 2 || !(vma = find_vma(mm, address)) || |
| 301 | vma->vm_start > address || !(vma->vm_flags & VM_WRITE)) | 301 | vma->vm_start > address || !(vma->vm_flags & VM_WRITE)) |
| 302 | return -EFAULT; | 302 | return -EFAULT; |
| 303 | 303 | ||
| @@ -397,7 +397,7 @@ static struct task_struct * futex_find_get_task(pid_t pid) | |||
| 397 | p = NULL; | 397 | p = NULL; |
| 398 | goto out_unlock; | 398 | goto out_unlock; |
| 399 | } | 399 | } |
| 400 | if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE) { | 400 | if (p->exit_state != 0) { |
| 401 | p = NULL; | 401 | p = NULL; |
| 402 | goto out_unlock; | 402 | goto out_unlock; |
| 403 | } | 403 | } |
| @@ -415,15 +415,15 @@ out_unlock: | |||
| 415 | */ | 415 | */ |
| 416 | void exit_pi_state_list(struct task_struct *curr) | 416 | void exit_pi_state_list(struct task_struct *curr) |
| 417 | { | 417 | { |
| 418 | struct futex_hash_bucket *hb; | ||
| 419 | struct list_head *next, *head = &curr->pi_state_list; | 418 | struct list_head *next, *head = &curr->pi_state_list; |
| 420 | struct futex_pi_state *pi_state; | 419 | struct futex_pi_state *pi_state; |
| 420 | struct futex_hash_bucket *hb; | ||
| 421 | union futex_key key; | 421 | union futex_key key; |
| 422 | 422 | ||
| 423 | /* | 423 | /* |
| 424 | * We are a ZOMBIE and nobody can enqueue itself on | 424 | * We are a ZOMBIE and nobody can enqueue itself on |
| 425 | * pi_state_list anymore, but we have to be careful | 425 | * pi_state_list anymore, but we have to be careful |
| 426 | * versus waiters unqueueing themselfs | 426 | * versus waiters unqueueing themselves: |
| 427 | */ | 427 | */ |
| 428 | spin_lock_irq(&curr->pi_lock); | 428 | spin_lock_irq(&curr->pi_lock); |
| 429 | while (!list_empty(head)) { | 429 | while (!list_empty(head)) { |
| @@ -431,21 +431,24 @@ void exit_pi_state_list(struct task_struct *curr) | |||
| 431 | next = head->next; | 431 | next = head->next; |
| 432 | pi_state = list_entry(next, struct futex_pi_state, list); | 432 | pi_state = list_entry(next, struct futex_pi_state, list); |
| 433 | key = pi_state->key; | 433 | key = pi_state->key; |
| 434 | hb = hash_futex(&key); | ||
| 434 | spin_unlock_irq(&curr->pi_lock); | 435 | spin_unlock_irq(&curr->pi_lock); |
| 435 | 436 | ||
| 436 | hb = hash_futex(&key); | ||
| 437 | spin_lock(&hb->lock); | 437 | spin_lock(&hb->lock); |
| 438 | 438 | ||
| 439 | spin_lock_irq(&curr->pi_lock); | 439 | spin_lock_irq(&curr->pi_lock); |
| 440 | /* | ||
| 441 | * We dropped the pi-lock, so re-check whether this | ||
| 442 | * task still owns the PI-state: | ||
| 443 | */ | ||
| 440 | if (head->next != next) { | 444 | if (head->next != next) { |
| 441 | spin_unlock(&hb->lock); | 445 | spin_unlock(&hb->lock); |
| 442 | continue; | 446 | continue; |
| 443 | } | 447 | } |
| 444 | 448 | ||
| 445 | list_del_init(&pi_state->list); | ||
| 446 | |||
| 447 | WARN_ON(pi_state->owner != curr); | 449 | WARN_ON(pi_state->owner != curr); |
| 448 | 450 | WARN_ON(list_empty(&pi_state->list)); | |
| 451 | list_del_init(&pi_state->list); | ||
| 449 | pi_state->owner = NULL; | 452 | pi_state->owner = NULL; |
| 450 | spin_unlock_irq(&curr->pi_lock); | 453 | spin_unlock_irq(&curr->pi_lock); |
| 451 | 454 | ||
| @@ -470,7 +473,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
| 470 | head = &hb->chain; | 473 | head = &hb->chain; |
| 471 | 474 | ||
| 472 | list_for_each_entry_safe(this, next, head, list) { | 475 | list_for_each_entry_safe(this, next, head, list) { |
| 473 | if (match_futex (&this->key, &me->key)) { | 476 | if (match_futex(&this->key, &me->key)) { |
| 474 | /* | 477 | /* |
| 475 | * Another waiter already exists - bump up | 478 | * Another waiter already exists - bump up |
| 476 | * the refcount and return its pi_state: | 479 | * the refcount and return its pi_state: |
| @@ -482,6 +485,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
| 482 | if (unlikely(!pi_state)) | 485 | if (unlikely(!pi_state)) |
| 483 | return -EINVAL; | 486 | return -EINVAL; |
| 484 | 487 | ||
| 488 | WARN_ON(!atomic_read(&pi_state->refcount)); | ||
| 489 | |||
| 485 | atomic_inc(&pi_state->refcount); | 490 | atomic_inc(&pi_state->refcount); |
| 486 | me->pi_state = pi_state; | 491 | me->pi_state = pi_state; |
| 487 | 492 | ||
| @@ -490,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
| 490 | } | 495 | } |
| 491 | 496 | ||
| 492 | /* | 497 | /* |
| 493 | * We are the first waiter - try to look up the real owner and | 498 | * We are the first waiter - try to look up the real owner and attach |
| 494 | * attach the new pi_state to it: | 499 | * the new pi_state to it, but bail out when the owner died bit is set |
| 500 | * and TID = 0: | ||
| 495 | */ | 501 | */ |
| 496 | pid = uval & FUTEX_TID_MASK; | 502 | pid = uval & FUTEX_TID_MASK; |
| 503 | if (!pid && (uval & FUTEX_OWNER_DIED)) | ||
| 504 | return -ESRCH; | ||
| 497 | p = futex_find_get_task(pid); | 505 | p = futex_find_get_task(pid); |
| 498 | if (!p) | 506 | if (!p) |
| 499 | return -ESRCH; | 507 | return -ESRCH; |
| @@ -510,6 +518,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) | |||
| 510 | pi_state->key = me->key; | 518 | pi_state->key = me->key; |
| 511 | 519 | ||
| 512 | spin_lock_irq(&p->pi_lock); | 520 | spin_lock_irq(&p->pi_lock); |
| 521 | WARN_ON(!list_empty(&pi_state->list)); | ||
| 513 | list_add(&pi_state->list, &p->pi_state_list); | 522 | list_add(&pi_state->list, &p->pi_state_list); |
| 514 | pi_state->owner = p; | 523 | pi_state->owner = p; |
| 515 | spin_unlock_irq(&p->pi_lock); | 524 | spin_unlock_irq(&p->pi_lock); |
| @@ -573,20 +582,29 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
| 573 | * kept enabled while there is PI state around. We must also | 582 | * kept enabled while there is PI state around. We must also |
| 574 | * preserve the owner died bit.) | 583 | * preserve the owner died bit.) |
| 575 | */ | 584 | */ |
| 576 | newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; | 585 | if (!(uval & FUTEX_OWNER_DIED)) { |
| 586 | newval = FUTEX_WAITERS | new_owner->pid; | ||
| 577 | 587 | ||
| 578 | inc_preempt_count(); | 588 | inc_preempt_count(); |
| 579 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 589 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
| 580 | dec_preempt_count(); | 590 | dec_preempt_count(); |
| 591 | if (curval == -EFAULT) | ||
| 592 | return -EFAULT; | ||
| 593 | if (curval != uval) | ||
| 594 | return -EINVAL; | ||
| 595 | } | ||
| 581 | 596 | ||
| 582 | if (curval == -EFAULT) | 597 | spin_lock_irq(&pi_state->owner->pi_lock); |
| 583 | return -EFAULT; | 598 | WARN_ON(list_empty(&pi_state->list)); |
| 584 | if (curval != uval) | 599 | list_del_init(&pi_state->list); |
| 585 | return -EINVAL; | 600 | spin_unlock_irq(&pi_state->owner->pi_lock); |
| 586 | 601 | ||
| 587 | list_del_init(&pi_state->owner->pi_state_list); | 602 | spin_lock_irq(&new_owner->pi_lock); |
| 603 | WARN_ON(!list_empty(&pi_state->list)); | ||
| 588 | list_add(&pi_state->list, &new_owner->pi_state_list); | 604 | list_add(&pi_state->list, &new_owner->pi_state_list); |
| 589 | pi_state->owner = new_owner; | 605 | pi_state->owner = new_owner; |
| 606 | spin_unlock_irq(&new_owner->pi_lock); | ||
| 607 | |||
| 590 | rt_mutex_unlock(&pi_state->pi_mutex); | 608 | rt_mutex_unlock(&pi_state->pi_mutex); |
| 591 | 609 | ||
| 592 | return 0; | 610 | return 0; |
| @@ -729,8 +747,10 @@ retry: | |||
| 729 | */ | 747 | */ |
| 730 | if (attempt++) { | 748 | if (attempt++) { |
| 731 | if (futex_handle_fault((unsigned long)uaddr2, | 749 | if (futex_handle_fault((unsigned long)uaddr2, |
| 732 | attempt)) | 750 | attempt)) { |
| 751 | ret = -EFAULT; | ||
| 733 | goto out; | 752 | goto out; |
| 753 | } | ||
| 734 | goto retry; | 754 | goto retry; |
| 735 | } | 755 | } |
| 736 | 756 | ||
| @@ -930,6 +950,7 @@ static int unqueue_me(struct futex_q *q) | |||
| 930 | /* In the common case we don't take the spinlock, which is nice. */ | 950 | /* In the common case we don't take the spinlock, which is nice. */ |
| 931 | retry: | 951 | retry: |
| 932 | lock_ptr = q->lock_ptr; | 952 | lock_ptr = q->lock_ptr; |
| 953 | barrier(); | ||
| 933 | if (lock_ptr != 0) { | 954 | if (lock_ptr != 0) { |
| 934 | spin_lock(lock_ptr); | 955 | spin_lock(lock_ptr); |
| 935 | /* | 956 | /* |
| @@ -1236,6 +1257,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, | |||
| 1236 | /* Owner died? */ | 1257 | /* Owner died? */ |
| 1237 | if (q.pi_state->owner != NULL) { | 1258 | if (q.pi_state->owner != NULL) { |
| 1238 | spin_lock_irq(&q.pi_state->owner->pi_lock); | 1259 | spin_lock_irq(&q.pi_state->owner->pi_lock); |
| 1260 | WARN_ON(list_empty(&q.pi_state->list)); | ||
| 1239 | list_del_init(&q.pi_state->list); | 1261 | list_del_init(&q.pi_state->list); |
| 1240 | spin_unlock_irq(&q.pi_state->owner->pi_lock); | 1262 | spin_unlock_irq(&q.pi_state->owner->pi_lock); |
| 1241 | } else | 1263 | } else |
| @@ -1244,6 +1266,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, | |||
| 1244 | q.pi_state->owner = current; | 1266 | q.pi_state->owner = current; |
| 1245 | 1267 | ||
| 1246 | spin_lock_irq(¤t->pi_lock); | 1268 | spin_lock_irq(¤t->pi_lock); |
| 1269 | WARN_ON(!list_empty(&q.pi_state->list)); | ||
| 1247 | list_add(&q.pi_state->list, ¤t->pi_state_list); | 1270 | list_add(&q.pi_state->list, ¤t->pi_state_list); |
| 1248 | spin_unlock_irq(¤t->pi_lock); | 1271 | spin_unlock_irq(¤t->pi_lock); |
| 1249 | 1272 | ||
| @@ -1301,9 +1324,10 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, | |||
| 1301 | * still holding the mmap_sem. | 1324 | * still holding the mmap_sem. |
| 1302 | */ | 1325 | */ |
| 1303 | if (attempt++) { | 1326 | if (attempt++) { |
| 1304 | if (futex_handle_fault((unsigned long)uaddr, attempt)) | 1327 | if (futex_handle_fault((unsigned long)uaddr, attempt)) { |
| 1328 | ret = -EFAULT; | ||
| 1305 | goto out_unlock_release_sem; | 1329 | goto out_unlock_release_sem; |
| 1306 | 1330 | } | |
| 1307 | goto retry_locked; | 1331 | goto retry_locked; |
| 1308 | } | 1332 | } |
| 1309 | 1333 | ||
| @@ -1427,9 +1451,11 @@ retry_locked: | |||
| 1427 | * again. If it succeeds then we can return without waking | 1451 | * again. If it succeeds then we can return without waking |
| 1428 | * anyone else up: | 1452 | * anyone else up: |
| 1429 | */ | 1453 | */ |
| 1430 | inc_preempt_count(); | 1454 | if (!(uval & FUTEX_OWNER_DIED)) { |
| 1431 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); | 1455 | inc_preempt_count(); |
| 1432 | dec_preempt_count(); | 1456 | uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); |
| 1457 | dec_preempt_count(); | ||
| 1458 | } | ||
| 1433 | 1459 | ||
| 1434 | if (unlikely(uval == -EFAULT)) | 1460 | if (unlikely(uval == -EFAULT)) |
| 1435 | goto pi_faulted; | 1461 | goto pi_faulted; |
| @@ -1462,9 +1488,11 @@ retry_locked: | |||
| 1462 | /* | 1488 | /* |
| 1463 | * No waiters - kernel unlocks the futex: | 1489 | * No waiters - kernel unlocks the futex: |
| 1464 | */ | 1490 | */ |
| 1465 | ret = unlock_futex_pi(uaddr, uval); | 1491 | if (!(uval & FUTEX_OWNER_DIED)) { |
| 1466 | if (ret == -EFAULT) | 1492 | ret = unlock_futex_pi(uaddr, uval); |
| 1467 | goto pi_faulted; | 1493 | if (ret == -EFAULT) |
| 1494 | goto pi_faulted; | ||
| 1495 | } | ||
| 1468 | 1496 | ||
| 1469 | out_unlock: | 1497 | out_unlock: |
| 1470 | spin_unlock(&hb->lock); | 1498 | spin_unlock(&hb->lock); |
| @@ -1481,9 +1509,10 @@ pi_faulted: | |||
| 1481 | * still holding the mmap_sem. | 1509 | * still holding the mmap_sem. |
| 1482 | */ | 1510 | */ |
| 1483 | if (attempt++) { | 1511 | if (attempt++) { |
| 1484 | if (futex_handle_fault((unsigned long)uaddr, attempt)) | 1512 | if (futex_handle_fault((unsigned long)uaddr, attempt)) { |
| 1513 | ret = -EFAULT; | ||
| 1485 | goto out_unlock; | 1514 | goto out_unlock; |
| 1486 | 1515 | } | |
| 1487 | goto retry_locked; | 1516 | goto retry_locked; |
| 1488 | } | 1517 | } |
| 1489 | 1518 | ||
| @@ -1683,9 +1712,9 @@ err_unlock: | |||
| 1683 | * Process a futex-list entry, check whether it's owned by the | 1712 | * Process a futex-list entry, check whether it's owned by the |
| 1684 | * dying task, and do notification if so: | 1713 | * dying task, and do notification if so: |
| 1685 | */ | 1714 | */ |
| 1686 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) | 1715 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) |
| 1687 | { | 1716 | { |
| 1688 | u32 uval, nval; | 1717 | u32 uval, nval, mval; |
| 1689 | 1718 | ||
| 1690 | retry: | 1719 | retry: |
| 1691 | if (get_user(uval, uaddr)) | 1720 | if (get_user(uval, uaddr)) |
| @@ -1702,21 +1731,45 @@ retry: | |||
| 1702 | * thread-death.) The rest of the cleanup is done in | 1731 | * thread-death.) The rest of the cleanup is done in |
| 1703 | * userspace. | 1732 | * userspace. |
| 1704 | */ | 1733 | */ |
| 1705 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, | 1734 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; |
| 1706 | uval | FUTEX_OWNER_DIED); | 1735 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); |
| 1736 | |||
| 1707 | if (nval == -EFAULT) | 1737 | if (nval == -EFAULT) |
| 1708 | return -1; | 1738 | return -1; |
| 1709 | 1739 | ||
| 1710 | if (nval != uval) | 1740 | if (nval != uval) |
| 1711 | goto retry; | 1741 | goto retry; |
| 1712 | 1742 | ||
| 1713 | if (uval & FUTEX_WAITERS) | 1743 | /* |
| 1714 | futex_wake(uaddr, 1); | 1744 | * Wake robust non-PI futexes here. The wakeup of |
| 1745 | * PI futexes happens in exit_pi_state(): | ||
| 1746 | */ | ||
| 1747 | if (!pi) { | ||
| 1748 | if (uval & FUTEX_WAITERS) | ||
| 1749 | futex_wake(uaddr, 1); | ||
| 1750 | } | ||
| 1715 | } | 1751 | } |
| 1716 | return 0; | 1752 | return 0; |
| 1717 | } | 1753 | } |
| 1718 | 1754 | ||
| 1719 | /* | 1755 | /* |
| 1756 | * Fetch a robust-list pointer. Bit 0 signals PI futexes: | ||
| 1757 | */ | ||
| 1758 | static inline int fetch_robust_entry(struct robust_list __user **entry, | ||
| 1759 | struct robust_list __user **head, int *pi) | ||
| 1760 | { | ||
| 1761 | unsigned long uentry; | ||
| 1762 | |||
| 1763 | if (get_user(uentry, (unsigned long *)head)) | ||
| 1764 | return -EFAULT; | ||
| 1765 | |||
| 1766 | *entry = (void *)(uentry & ~1UL); | ||
| 1767 | *pi = uentry & 1; | ||
| 1768 | |||
| 1769 | return 0; | ||
| 1770 | } | ||
| 1771 | |||
| 1772 | /* | ||
| 1720 | * Walk curr->robust_list (very carefully, it's a userspace list!) | 1773 | * Walk curr->robust_list (very carefully, it's a userspace list!) |
| 1721 | * and mark any locks found there dead, and notify any waiters. | 1774 | * and mark any locks found there dead, and notify any waiters. |
| 1722 | * | 1775 | * |
| @@ -1726,14 +1779,14 @@ void exit_robust_list(struct task_struct *curr) | |||
| 1726 | { | 1779 | { |
| 1727 | struct robust_list_head __user *head = curr->robust_list; | 1780 | struct robust_list_head __user *head = curr->robust_list; |
| 1728 | struct robust_list __user *entry, *pending; | 1781 | struct robust_list __user *entry, *pending; |
| 1729 | unsigned int limit = ROBUST_LIST_LIMIT; | 1782 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
| 1730 | unsigned long futex_offset; | 1783 | unsigned long futex_offset; |
| 1731 | 1784 | ||
| 1732 | /* | 1785 | /* |
| 1733 | * Fetch the list head (which was registered earlier, via | 1786 | * Fetch the list head (which was registered earlier, via |
| 1734 | * sys_set_robust_list()): | 1787 | * sys_set_robust_list()): |
| 1735 | */ | 1788 | */ |
| 1736 | if (get_user(entry, &head->list.next)) | 1789 | if (fetch_robust_entry(&entry, &head->list.next, &pi)) |
| 1737 | return; | 1790 | return; |
| 1738 | /* | 1791 | /* |
| 1739 | * Fetch the relative futex offset: | 1792 | * Fetch the relative futex offset: |
| @@ -1744,10 +1797,11 @@ void exit_robust_list(struct task_struct *curr) | |||
| 1744 | * Fetch any possibly pending lock-add first, and handle it | 1797 | * Fetch any possibly pending lock-add first, and handle it |
| 1745 | * if it exists: | 1798 | * if it exists: |
| 1746 | */ | 1799 | */ |
| 1747 | if (get_user(pending, &head->list_op_pending)) | 1800 | if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) |
| 1748 | return; | 1801 | return; |
| 1802 | |||
| 1749 | if (pending) | 1803 | if (pending) |
| 1750 | handle_futex_death((void *)pending + futex_offset, curr); | 1804 | handle_futex_death((void *)pending + futex_offset, curr, pip); |
| 1751 | 1805 | ||
| 1752 | while (entry != &head->list) { | 1806 | while (entry != &head->list) { |
| 1753 | /* | 1807 | /* |
| @@ -1756,12 +1810,12 @@ void exit_robust_list(struct task_struct *curr) | |||
| 1756 | */ | 1810 | */ |
| 1757 | if (entry != pending) | 1811 | if (entry != pending) |
| 1758 | if (handle_futex_death((void *)entry + futex_offset, | 1812 | if (handle_futex_death((void *)entry + futex_offset, |
| 1759 | curr)) | 1813 | curr, pi)) |
| 1760 | return; | 1814 | return; |
| 1761 | /* | 1815 | /* |
| 1762 | * Fetch the next entry in the list: | 1816 | * Fetch the next entry in the list: |
| 1763 | */ | 1817 | */ |
| 1764 | if (get_user(entry, &entry->next)) | 1818 | if (fetch_robust_entry(&entry, &entry->next, &pi)) |
| 1765 | return; | 1819 | return; |
| 1766 | /* | 1820 | /* |
| 1767 | * Avoid excessively long or circular lists: | 1821 | * Avoid excessively long or circular lists: |
