diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/futex.c | 1188 | ||||
| -rw-r--r-- | kernel/rtmutex.c | 240 | ||||
| -rw-r--r-- | kernel/rtmutex_common.h | 8 |
3 files changed, 1084 insertions, 352 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index d546b2d53a62..80b5ce716596 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -19,6 +19,10 @@ | |||
| 19 | * PRIVATE futexes by Eric Dumazet | 19 | * PRIVATE futexes by Eric Dumazet |
| 20 | * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> | 20 | * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> |
| 21 | * | 21 | * |
| 22 | * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com> | ||
| 23 | * Copyright (C) IBM Corporation, 2009 | ||
| 24 | * Thanks to Thomas Gleixner for conceptual design and careful reviews. | ||
| 25 | * | ||
| 22 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly | 26 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly |
| 23 | * enough at me, Linus for the original (flawed) idea, Matthew | 27 | * enough at me, Linus for the original (flawed) idea, Matthew |
| 24 | * Kirkwood for proof-of-concept implementation. | 28 | * Kirkwood for proof-of-concept implementation. |
| @@ -96,8 +100,8 @@ struct futex_pi_state { | |||
| 96 | */ | 100 | */ |
| 97 | struct futex_q { | 101 | struct futex_q { |
| 98 | struct plist_node list; | 102 | struct plist_node list; |
| 99 | /* There can only be a single waiter */ | 103 | /* Waiter reference */ |
| 100 | wait_queue_head_t waiter; | 104 | struct task_struct *task; |
| 101 | 105 | ||
| 102 | /* Which hash list lock to use: */ | 106 | /* Which hash list lock to use: */ |
| 103 | spinlock_t *lock_ptr; | 107 | spinlock_t *lock_ptr; |
| @@ -107,7 +111,9 @@ struct futex_q { | |||
| 107 | 111 | ||
| 108 | /* Optional priority inheritance state: */ | 112 | /* Optional priority inheritance state: */ |
| 109 | struct futex_pi_state *pi_state; | 113 | struct futex_pi_state *pi_state; |
| 110 | struct task_struct *task; | 114 | |
| 115 | /* rt_waiter storage for requeue_pi: */ | ||
| 116 | struct rt_mutex_waiter *rt_waiter; | ||
| 111 | 117 | ||
| 112 | /* Bitset for the optional bitmasked wakeup */ | 118 | /* Bitset for the optional bitmasked wakeup */ |
| 113 | u32 bitset; | 119 | u32 bitset; |
| @@ -278,6 +284,25 @@ void put_futex_key(int fshared, union futex_key *key) | |||
| 278 | drop_futex_key_refs(key); | 284 | drop_futex_key_refs(key); |
| 279 | } | 285 | } |
| 280 | 286 | ||
| 287 | /** | ||
| 288 | * futex_top_waiter() - Return the highest priority waiter on a futex | ||
| 289 | * @hb: the hash bucket the futex_q's reside in | ||
| 290 | * @key: the futex key (to distinguish it from other futex futex_q's) | ||
| 291 | * | ||
| 292 | * Must be called with the hb lock held. | ||
| 293 | */ | ||
| 294 | static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, | ||
| 295 | union futex_key *key) | ||
| 296 | { | ||
| 297 | struct futex_q *this; | ||
| 298 | |||
| 299 | plist_for_each_entry(this, &hb->chain, list) { | ||
| 300 | if (match_futex(&this->key, key)) | ||
| 301 | return this; | ||
| 302 | } | ||
| 303 | return NULL; | ||
| 304 | } | ||
| 305 | |||
| 281 | static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) | 306 | static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) |
| 282 | { | 307 | { |
| 283 | u32 curval; | 308 | u32 curval; |
| @@ -539,28 +564,160 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 539 | return 0; | 564 | return 0; |
| 540 | } | 565 | } |
| 541 | 566 | ||
| 567 | /** | ||
| 568 | * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex | ||
| 569 | * @uaddr: the pi futex user address | ||
| 570 | * @hb: the pi futex hash bucket | ||
| 571 | * @key: the futex key associated with uaddr and hb | ||
| 572 | * @ps: the pi_state pointer where we store the result of the | ||
| 573 | * lookup | ||
| 574 | * @task: the task to perform the atomic lock work for. This will | ||
| 575 | * be "current" except in the case of requeue pi. | ||
| 576 | * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) | ||
| 577 | * | ||
| 578 | * Returns: | ||
| 579 | * 0 - ready to wait | ||
| 580 | * 1 - acquired the lock | ||
| 581 | * <0 - error | ||
| 582 | * | ||
| 583 | * The hb->lock and futex_key refs shall be held by the caller. | ||
| 584 | */ | ||
| 585 | static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, | ||
| 586 | union futex_key *key, | ||
| 587 | struct futex_pi_state **ps, | ||
| 588 | struct task_struct *task, int set_waiters) | ||
| 589 | { | ||
| 590 | int lock_taken, ret, ownerdied = 0; | ||
| 591 | u32 uval, newval, curval; | ||
| 592 | |||
| 593 | retry: | ||
| 594 | ret = lock_taken = 0; | ||
| 595 | |||
| 596 | /* | ||
| 597 | * To avoid races, we attempt to take the lock here again | ||
| 598 | * (by doing a 0 -> TID atomic cmpxchg), while holding all | ||
| 599 | * the locks. It will most likely not succeed. | ||
| 600 | */ | ||
| 601 | newval = task_pid_vnr(task); | ||
| 602 | if (set_waiters) | ||
| 603 | newval |= FUTEX_WAITERS; | ||
| 604 | |||
| 605 | curval = cmpxchg_futex_value_locked(uaddr, 0, newval); | ||
| 606 | |||
| 607 | if (unlikely(curval == -EFAULT)) | ||
| 608 | return -EFAULT; | ||
| 609 | |||
| 610 | /* | ||
| 611 | * Detect deadlocks. | ||
| 612 | */ | ||
| 613 | if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) | ||
| 614 | return -EDEADLK; | ||
| 615 | |||
| 616 | /* | ||
| 617 | * Surprise - we got the lock. Just return to userspace: | ||
| 618 | */ | ||
| 619 | if (unlikely(!curval)) | ||
| 620 | return 1; | ||
| 621 | |||
| 622 | uval = curval; | ||
| 623 | |||
| 624 | /* | ||
| 625 | * Set the FUTEX_WAITERS flag, so the owner will know it has someone | ||
| 626 | * to wake at the next unlock. | ||
| 627 | */ | ||
| 628 | newval = curval | FUTEX_WAITERS; | ||
| 629 | |||
| 630 | /* | ||
| 631 | * There are two cases, where a futex might have no owner (the | ||
| 632 | * owner TID is 0): OWNER_DIED. We take over the futex in this | ||
| 633 | * case. We also do an unconditional take over, when the owner | ||
| 634 | * of the futex died. | ||
| 635 | * | ||
| 636 | * This is safe as we are protected by the hash bucket lock ! | ||
| 637 | */ | ||
| 638 | if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { | ||
| 639 | /* Keep the OWNER_DIED bit */ | ||
| 640 | newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); | ||
| 641 | ownerdied = 0; | ||
| 642 | lock_taken = 1; | ||
| 643 | } | ||
| 644 | |||
| 645 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); | ||
| 646 | |||
| 647 | if (unlikely(curval == -EFAULT)) | ||
| 648 | return -EFAULT; | ||
| 649 | if (unlikely(curval != uval)) | ||
| 650 | goto retry; | ||
| 651 | |||
| 652 | /* | ||
| 653 | * We took the lock due to owner died take over. | ||
| 654 | */ | ||
| 655 | if (unlikely(lock_taken)) | ||
| 656 | return 1; | ||
| 657 | |||
| 658 | /* | ||
| 659 | * We dont have the lock. Look up the PI state (or create it if | ||
| 660 | * we are the first waiter): | ||
| 661 | */ | ||
| 662 | ret = lookup_pi_state(uval, hb, key, ps); | ||
| 663 | |||
| 664 | if (unlikely(ret)) { | ||
| 665 | switch (ret) { | ||
| 666 | case -ESRCH: | ||
| 667 | /* | ||
| 668 | * No owner found for this futex. Check if the | ||
| 669 | * OWNER_DIED bit is set to figure out whether | ||
| 670 | * this is a robust futex or not. | ||
| 671 | */ | ||
| 672 | if (get_futex_value_locked(&curval, uaddr)) | ||
| 673 | return -EFAULT; | ||
| 674 | |||
| 675 | /* | ||
| 676 | * We simply start over in case of a robust | ||
| 677 | * futex. The code above will take the futex | ||
| 678 | * and return happy. | ||
| 679 | */ | ||
| 680 | if (curval & FUTEX_OWNER_DIED) { | ||
| 681 | ownerdied = 1; | ||
| 682 | goto retry; | ||
| 683 | } | ||
| 684 | default: | ||
| 685 | break; | ||
| 686 | } | ||
| 687 | } | ||
| 688 | |||
| 689 | return ret; | ||
| 690 | } | ||
| 691 | |||
| 542 | /* | 692 | /* |
| 543 | * The hash bucket lock must be held when this is called. | 693 | * The hash bucket lock must be held when this is called. |
| 544 | * Afterwards, the futex_q must not be accessed. | 694 | * Afterwards, the futex_q must not be accessed. |
| 545 | */ | 695 | */ |
| 546 | static void wake_futex(struct futex_q *q) | 696 | static void wake_futex(struct futex_q *q) |
| 547 | { | 697 | { |
| 548 | plist_del(&q->list, &q->list.plist); | 698 | struct task_struct *p = q->task; |
| 699 | |||
| 549 | /* | 700 | /* |
| 550 | * The lock in wake_up_all() is a crucial memory barrier after the | 701 | * We set q->lock_ptr = NULL _before_ we wake up the task. If |
| 551 | * plist_del() and also before assigning to q->lock_ptr. | 702 | * a non futex wake up happens on another CPU then the task |
| 703 | * might exit and p would dereference a non existing task | ||
| 704 | * struct. Prevent this by holding a reference on p across the | ||
| 705 | * wake up. | ||
| 552 | */ | 706 | */ |
| 553 | wake_up(&q->waiter); | 707 | get_task_struct(p); |
| 708 | |||
| 709 | plist_del(&q->list, &q->list.plist); | ||
| 554 | /* | 710 | /* |
| 555 | * The waiting task can free the futex_q as soon as this is written, | 711 | * The waiting task can free the futex_q as soon as |
| 556 | * without taking any locks. This must come last. | 712 | * q->lock_ptr = NULL is written, without taking any locks. A |
| 557 | * | 713 | * memory barrier is required here to prevent the following |
| 558 | * A memory barrier is required here to prevent the following store to | 714 | * store to lock_ptr from getting ahead of the plist_del. |
| 559 | * lock_ptr from getting ahead of the wakeup. Clearing the lock at the | ||
| 560 | * end of wake_up() does not prevent this store from moving. | ||
| 561 | */ | 715 | */ |
| 562 | smp_wmb(); | 716 | smp_wmb(); |
| 563 | q->lock_ptr = NULL; | 717 | q->lock_ptr = NULL; |
| 718 | |||
| 719 | wake_up_state(p, TASK_NORMAL); | ||
| 720 | put_task_struct(p); | ||
| 564 | } | 721 | } |
| 565 | 722 | ||
| 566 | static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | 723 | static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) |
| @@ -689,7 +846,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | |||
| 689 | 846 | ||
| 690 | plist_for_each_entry_safe(this, next, head, list) { | 847 | plist_for_each_entry_safe(this, next, head, list) { |
| 691 | if (match_futex (&this->key, &key)) { | 848 | if (match_futex (&this->key, &key)) { |
| 692 | if (this->pi_state) { | 849 | if (this->pi_state || this->rt_waiter) { |
| 693 | ret = -EINVAL; | 850 | ret = -EINVAL; |
| 694 | break; | 851 | break; |
| 695 | } | 852 | } |
| @@ -802,24 +959,185 @@ out: | |||
| 802 | return ret; | 959 | return ret; |
| 803 | } | 960 | } |
| 804 | 961 | ||
| 805 | /* | 962 | /** |
| 806 | * Requeue all waiters hashed on one physical page to another | 963 | * requeue_futex() - Requeue a futex_q from one hb to another |
| 807 | * physical page. | 964 | * @q: the futex_q to requeue |
| 965 | * @hb1: the source hash_bucket | ||
| 966 | * @hb2: the target hash_bucket | ||
| 967 | * @key2: the new key for the requeued futex_q | ||
| 968 | */ | ||
| 969 | static inline | ||
| 970 | void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | ||
| 971 | struct futex_hash_bucket *hb2, union futex_key *key2) | ||
| 972 | { | ||
| 973 | |||
| 974 | /* | ||
| 975 | * If key1 and key2 hash to the same bucket, no need to | ||
| 976 | * requeue. | ||
| 977 | */ | ||
| 978 | if (likely(&hb1->chain != &hb2->chain)) { | ||
| 979 | plist_del(&q->list, &hb1->chain); | ||
| 980 | plist_add(&q->list, &hb2->chain); | ||
| 981 | q->lock_ptr = &hb2->lock; | ||
| 982 | #ifdef CONFIG_DEBUG_PI_LIST | ||
| 983 | q->list.plist.lock = &hb2->lock; | ||
| 984 | #endif | ||
| 985 | } | ||
| 986 | get_futex_key_refs(key2); | ||
| 987 | q->key = *key2; | ||
| 988 | } | ||
| 989 | |||
| 990 | /** | ||
| 991 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue | ||
| 992 | * q: the futex_q | ||
| 993 | * key: the key of the requeue target futex | ||
| 994 | * | ||
| 995 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the | ||
| 996 | * target futex if it is uncontended or via a lock steal. Set the futex_q key | ||
| 997 | * to the requeue target futex so the waiter can detect the wakeup on the right | ||
| 998 | * futex, but remove it from the hb and NULL the rt_waiter so it can detect | ||
| 999 | * atomic lock acquisition. Must be called with the q->lock_ptr held. | ||
| 1000 | */ | ||
| 1001 | static inline | ||
| 1002 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) | ||
| 1003 | { | ||
| 1004 | drop_futex_key_refs(&q->key); | ||
| 1005 | get_futex_key_refs(key); | ||
| 1006 | q->key = *key; | ||
| 1007 | |||
| 1008 | WARN_ON(plist_node_empty(&q->list)); | ||
| 1009 | plist_del(&q->list, &q->list.plist); | ||
| 1010 | |||
| 1011 | WARN_ON(!q->rt_waiter); | ||
| 1012 | q->rt_waiter = NULL; | ||
| 1013 | |||
| 1014 | wake_up_state(q->task, TASK_NORMAL); | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | /** | ||
| 1018 | * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter | ||
| 1019 | * @pifutex: the user address of the to futex | ||
| 1020 | * @hb1: the from futex hash bucket, must be locked by the caller | ||
| 1021 | * @hb2: the to futex hash bucket, must be locked by the caller | ||
| 1022 | * @key1: the from futex key | ||
| 1023 | * @key2: the to futex key | ||
| 1024 | * @ps: address to store the pi_state pointer | ||
| 1025 | * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) | ||
| 1026 | * | ||
| 1027 | * Try and get the lock on behalf of the top waiter if we can do it atomically. | ||
| 1028 | * Wake the top waiter if we succeed. If the caller specified set_waiters, | ||
| 1029 | * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. | ||
| 1030 | * hb1 and hb2 must be held by the caller. | ||
| 1031 | * | ||
| 1032 | * Returns: | ||
| 1033 | * 0 - failed to acquire the lock atomicly | ||
| 1034 | * 1 - acquired the lock | ||
| 1035 | * <0 - error | ||
| 1036 | */ | ||
| 1037 | static int futex_proxy_trylock_atomic(u32 __user *pifutex, | ||
| 1038 | struct futex_hash_bucket *hb1, | ||
| 1039 | struct futex_hash_bucket *hb2, | ||
| 1040 | union futex_key *key1, union futex_key *key2, | ||
| 1041 | struct futex_pi_state **ps, int set_waiters) | ||
| 1042 | { | ||
| 1043 | struct futex_q *top_waiter = NULL; | ||
| 1044 | u32 curval; | ||
| 1045 | int ret; | ||
| 1046 | |||
| 1047 | if (get_futex_value_locked(&curval, pifutex)) | ||
| 1048 | return -EFAULT; | ||
| 1049 | |||
| 1050 | /* | ||
| 1051 | * Find the top_waiter and determine if there are additional waiters. | ||
| 1052 | * If the caller intends to requeue more than 1 waiter to pifutex, | ||
| 1053 | * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, | ||
| 1054 | * as we have means to handle the possible fault. If not, don't set | ||
| 1055 | * the bit unecessarily as it will force the subsequent unlock to enter | ||
| 1056 | * the kernel. | ||
| 1057 | */ | ||
| 1058 | top_waiter = futex_top_waiter(hb1, key1); | ||
| 1059 | |||
| 1060 | /* There are no waiters, nothing for us to do. */ | ||
| 1061 | if (!top_waiter) | ||
| 1062 | return 0; | ||
| 1063 | |||
| 1064 | /* | ||
| 1065 | * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in | ||
| 1066 | * the contended case or if set_waiters is 1. The pi_state is returned | ||
| 1067 | * in ps in contended cases. | ||
| 1068 | */ | ||
| 1069 | ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, | ||
| 1070 | set_waiters); | ||
| 1071 | if (ret == 1) | ||
| 1072 | requeue_pi_wake_futex(top_waiter, key2); | ||
| 1073 | |||
| 1074 | return ret; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | /** | ||
| 1078 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 | ||
| 1079 | * uaddr1: source futex user address | ||
| 1080 | * uaddr2: target futex user address | ||
| 1081 | * nr_wake: number of waiters to wake (must be 1 for requeue_pi) | ||
| 1082 | * nr_requeue: number of waiters to requeue (0-INT_MAX) | ||
| 1083 | * requeue_pi: if we are attempting to requeue from a non-pi futex to a | ||
| 1084 | * pi futex (pi to pi requeue is not supported) | ||
| 1085 | * | ||
| 1086 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire | ||
| 1087 | * uaddr2 atomically on behalf of the top waiter. | ||
| 1088 | * | ||
| 1089 | * Returns: | ||
| 1090 | * >=0 - on success, the number of tasks requeued or woken | ||
| 1091 | * <0 - on error | ||
| 808 | */ | 1092 | */ |
| 809 | static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | 1093 | static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, |
| 810 | int nr_wake, int nr_requeue, u32 *cmpval) | 1094 | int nr_wake, int nr_requeue, u32 *cmpval, |
| 1095 | int requeue_pi) | ||
| 811 | { | 1096 | { |
| 812 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; | 1097 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
| 1098 | int drop_count = 0, task_count = 0, ret; | ||
| 1099 | struct futex_pi_state *pi_state = NULL; | ||
| 813 | struct futex_hash_bucket *hb1, *hb2; | 1100 | struct futex_hash_bucket *hb1, *hb2; |
| 814 | struct plist_head *head1; | 1101 | struct plist_head *head1; |
| 815 | struct futex_q *this, *next; | 1102 | struct futex_q *this, *next; |
| 816 | int ret, drop_count = 0; | 1103 | u32 curval2; |
| 1104 | |||
| 1105 | if (requeue_pi) { | ||
| 1106 | /* | ||
| 1107 | * requeue_pi requires a pi_state, try to allocate it now | ||
| 1108 | * without any locks in case it fails. | ||
| 1109 | */ | ||
| 1110 | if (refill_pi_state_cache()) | ||
| 1111 | return -ENOMEM; | ||
| 1112 | /* | ||
| 1113 | * requeue_pi must wake as many tasks as it can, up to nr_wake | ||
| 1114 | * + nr_requeue, since it acquires the rt_mutex prior to | ||
| 1115 | * returning to userspace, so as to not leave the rt_mutex with | ||
| 1116 | * waiters and no owner. However, second and third wake-ups | ||
| 1117 | * cannot be predicted as they involve race conditions with the | ||
| 1118 | * first wake and a fault while looking up the pi_state. Both | ||
| 1119 | * pthread_cond_signal() and pthread_cond_broadcast() should | ||
| 1120 | * use nr_wake=1. | ||
| 1121 | */ | ||
| 1122 | if (nr_wake != 1) | ||
| 1123 | return -EINVAL; | ||
| 1124 | } | ||
| 817 | 1125 | ||
| 818 | retry: | 1126 | retry: |
| 1127 | if (pi_state != NULL) { | ||
| 1128 | /* | ||
| 1129 | * We will have to lookup the pi_state again, so free this one | ||
| 1130 | * to keep the accounting correct. | ||
| 1131 | */ | ||
| 1132 | free_pi_state(pi_state); | ||
| 1133 | pi_state = NULL; | ||
| 1134 | } | ||
| 1135 | |||
| 819 | ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); | 1136 | ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); |
| 820 | if (unlikely(ret != 0)) | 1137 | if (unlikely(ret != 0)) |
| 821 | goto out; | 1138 | goto out; |
| 822 | ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_READ); | 1139 | ret = get_futex_key(uaddr2, fshared, &key2, |
| 1140 | requeue_pi ? VERIFY_WRITE : VERIFY_READ); | ||
| 823 | if (unlikely(ret != 0)) | 1141 | if (unlikely(ret != 0)) |
| 824 | goto out_put_key1; | 1142 | goto out_put_key1; |
| 825 | 1143 | ||
| @@ -854,32 +1172,99 @@ retry_private: | |||
| 854 | } | 1172 | } |
| 855 | } | 1173 | } |
| 856 | 1174 | ||
| 1175 | if (requeue_pi && (task_count - nr_wake < nr_requeue)) { | ||
| 1176 | /* | ||
| 1177 | * Attempt to acquire uaddr2 and wake the top waiter. If we | ||
| 1178 | * intend to requeue waiters, force setting the FUTEX_WAITERS | ||
| 1179 | * bit. We force this here where we are able to easily handle | ||
| 1180 | * faults rather in the requeue loop below. | ||
| 1181 | */ | ||
| 1182 | ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, | ||
| 1183 | &key2, &pi_state, nr_requeue); | ||
| 1184 | |||
| 1185 | /* | ||
| 1186 | * At this point the top_waiter has either taken uaddr2 or is | ||
| 1187 | * waiting on it. If the former, then the pi_state will not | ||
| 1188 | * exist yet, look it up one more time to ensure we have a | ||
| 1189 | * reference to it. | ||
| 1190 | */ | ||
| 1191 | if (ret == 1) { | ||
| 1192 | WARN_ON(pi_state); | ||
| 1193 | task_count++; | ||
| 1194 | ret = get_futex_value_locked(&curval2, uaddr2); | ||
| 1195 | if (!ret) | ||
| 1196 | ret = lookup_pi_state(curval2, hb2, &key2, | ||
| 1197 | &pi_state); | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | switch (ret) { | ||
| 1201 | case 0: | ||
| 1202 | break; | ||
| 1203 | case -EFAULT: | ||
| 1204 | double_unlock_hb(hb1, hb2); | ||
| 1205 | put_futex_key(fshared, &key2); | ||
| 1206 | put_futex_key(fshared, &key1); | ||
| 1207 | ret = get_user(curval2, uaddr2); | ||
| 1208 | if (!ret) | ||
| 1209 | goto retry; | ||
| 1210 | goto out; | ||
| 1211 | case -EAGAIN: | ||
| 1212 | /* The owner was exiting, try again. */ | ||
| 1213 | double_unlock_hb(hb1, hb2); | ||
| 1214 | put_futex_key(fshared, &key2); | ||
| 1215 | put_futex_key(fshared, &key1); | ||
| 1216 | cond_resched(); | ||
| 1217 | goto retry; | ||
| 1218 | default: | ||
| 1219 | goto out_unlock; | ||
| 1220 | } | ||
| 1221 | } | ||
| 1222 | |||
| 857 | head1 = &hb1->chain; | 1223 | head1 = &hb1->chain; |
| 858 | plist_for_each_entry_safe(this, next, head1, list) { | 1224 | plist_for_each_entry_safe(this, next, head1, list) { |
| 859 | if (!match_futex (&this->key, &key1)) | 1225 | if (task_count - nr_wake >= nr_requeue) |
| 1226 | break; | ||
| 1227 | |||
| 1228 | if (!match_futex(&this->key, &key1)) | ||
| 860 | continue; | 1229 | continue; |
| 861 | if (++ret <= nr_wake) { | 1230 | |
| 1231 | WARN_ON(!requeue_pi && this->rt_waiter); | ||
| 1232 | WARN_ON(requeue_pi && !this->rt_waiter); | ||
| 1233 | |||
| 1234 | /* | ||
| 1235 | * Wake nr_wake waiters. For requeue_pi, if we acquired the | ||
| 1236 | * lock, we already woke the top_waiter. If not, it will be | ||
| 1237 | * woken by futex_unlock_pi(). | ||
| 1238 | */ | ||
| 1239 | if (++task_count <= nr_wake && !requeue_pi) { | ||
| 862 | wake_futex(this); | 1240 | wake_futex(this); |
| 863 | } else { | 1241 | continue; |
| 864 | /* | 1242 | } |
| 865 | * If key1 and key2 hash to the same bucket, no need to | ||
| 866 | * requeue. | ||
| 867 | */ | ||
| 868 | if (likely(head1 != &hb2->chain)) { | ||
| 869 | plist_del(&this->list, &hb1->chain); | ||
| 870 | plist_add(&this->list, &hb2->chain); | ||
| 871 | this->lock_ptr = &hb2->lock; | ||
| 872 | #ifdef CONFIG_DEBUG_PI_LIST | ||
| 873 | this->list.plist.lock = &hb2->lock; | ||
| 874 | #endif | ||
| 875 | } | ||
| 876 | this->key = key2; | ||
| 877 | get_futex_key_refs(&key2); | ||
| 878 | drop_count++; | ||
| 879 | 1243 | ||
| 880 | if (ret - nr_wake >= nr_requeue) | 1244 | /* |
| 881 | break; | 1245 | * Requeue nr_requeue waiters and possibly one more in the case |
| 1246 | * of requeue_pi if we couldn't acquire the lock atomically. | ||
| 1247 | */ | ||
| 1248 | if (requeue_pi) { | ||
| 1249 | /* Prepare the waiter to take the rt_mutex. */ | ||
| 1250 | atomic_inc(&pi_state->refcount); | ||
| 1251 | this->pi_state = pi_state; | ||
| 1252 | ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, | ||
| 1253 | this->rt_waiter, | ||
| 1254 | this->task, 1); | ||
| 1255 | if (ret == 1) { | ||
| 1256 | /* We got the lock. */ | ||
| 1257 | requeue_pi_wake_futex(this, &key2); | ||
| 1258 | continue; | ||
| 1259 | } else if (ret) { | ||
| 1260 | /* -EDEADLK */ | ||
| 1261 | this->pi_state = NULL; | ||
| 1262 | free_pi_state(pi_state); | ||
| 1263 | goto out_unlock; | ||
| 1264 | } | ||
| 882 | } | 1265 | } |
| 1266 | requeue_futex(this, hb1, hb2, &key2); | ||
| 1267 | drop_count++; | ||
| 883 | } | 1268 | } |
| 884 | 1269 | ||
| 885 | out_unlock: | 1270 | out_unlock: |
| @@ -899,7 +1284,9 @@ out_put_keys: | |||
| 899 | out_put_key1: | 1284 | out_put_key1: |
| 900 | put_futex_key(fshared, &key1); | 1285 | put_futex_key(fshared, &key1); |
| 901 | out: | 1286 | out: |
| 902 | return ret; | 1287 | if (pi_state != NULL) |
| 1288 | free_pi_state(pi_state); | ||
| 1289 | return ret ? ret : task_count; | ||
| 903 | } | 1290 | } |
| 904 | 1291 | ||
| 905 | /* The key must be already stored in q->key. */ | 1292 | /* The key must be already stored in q->key. */ |
| @@ -907,8 +1294,6 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
| 907 | { | 1294 | { |
| 908 | struct futex_hash_bucket *hb; | 1295 | struct futex_hash_bucket *hb; |
| 909 | 1296 | ||
| 910 | init_waitqueue_head(&q->waiter); | ||
| 911 | |||
| 912 | get_futex_key_refs(&q->key); | 1297 | get_futex_key_refs(&q->key); |
| 913 | hb = hash_futex(&q->key); | 1298 | hb = hash_futex(&q->key); |
| 914 | q->lock_ptr = &hb->lock; | 1299 | q->lock_ptr = &hb->lock; |
| @@ -1119,35 +1504,149 @@ handle_fault: | |||
| 1119 | */ | 1504 | */ |
| 1120 | #define FLAGS_SHARED 0x01 | 1505 | #define FLAGS_SHARED 0x01 |
| 1121 | #define FLAGS_CLOCKRT 0x02 | 1506 | #define FLAGS_CLOCKRT 0x02 |
| 1507 | #define FLAGS_HAS_TIMEOUT 0x04 | ||
| 1122 | 1508 | ||
| 1123 | static long futex_wait_restart(struct restart_block *restart); | 1509 | static long futex_wait_restart(struct restart_block *restart); |
| 1124 | 1510 | ||
| 1125 | static int futex_wait(u32 __user *uaddr, int fshared, | 1511 | /** |
| 1126 | u32 val, ktime_t *abs_time, u32 bitset, int clockrt) | 1512 | * fixup_owner() - Post lock pi_state and corner case management |
| 1513 | * @uaddr: user address of the futex | ||
| 1514 | * @fshared: whether the futex is shared (1) or not (0) | ||
| 1515 | * @q: futex_q (contains pi_state and access to the rt_mutex) | ||
| 1516 | * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) | ||
| 1517 | * | ||
| 1518 | * After attempting to lock an rt_mutex, this function is called to cleanup | ||
| 1519 | * the pi_state owner as well as handle race conditions that may allow us to | ||
| 1520 | * acquire the lock. Must be called with the hb lock held. | ||
| 1521 | * | ||
| 1522 | * Returns: | ||
| 1523 | * 1 - success, lock taken | ||
| 1524 | * 0 - success, lock not taken | ||
| 1525 | * <0 - on error (-EFAULT) | ||
| 1526 | */ | ||
| 1527 | static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, | ||
| 1528 | int locked) | ||
| 1127 | { | 1529 | { |
| 1128 | struct task_struct *curr = current; | 1530 | struct task_struct *owner; |
| 1129 | struct restart_block *restart; | 1531 | int ret = 0; |
| 1130 | DECLARE_WAITQUEUE(wait, curr); | ||
| 1131 | struct futex_hash_bucket *hb; | ||
| 1132 | struct futex_q q; | ||
| 1133 | u32 uval; | ||
| 1134 | int ret; | ||
| 1135 | struct hrtimer_sleeper t; | ||
| 1136 | int rem = 0; | ||
| 1137 | 1532 | ||
| 1138 | if (!bitset) | 1533 | if (locked) { |
| 1139 | return -EINVAL; | 1534 | /* |
| 1535 | * Got the lock. We might not be the anticipated owner if we | ||
| 1536 | * did a lock-steal - fix up the PI-state in that case: | ||
| 1537 | */ | ||
| 1538 | if (q->pi_state->owner != current) | ||
| 1539 | ret = fixup_pi_state_owner(uaddr, q, current, fshared); | ||
| 1540 | goto out; | ||
| 1541 | } | ||
| 1140 | 1542 | ||
| 1141 | q.pi_state = NULL; | 1543 | /* |
| 1142 | q.bitset = bitset; | 1544 | * Catch the rare case, where the lock was released when we were on the |
| 1143 | retry: | 1545 | * way back before we locked the hash bucket. |
| 1144 | q.key = FUTEX_KEY_INIT; | 1546 | */ |
| 1145 | ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ); | 1547 | if (q->pi_state->owner == current) { |
| 1146 | if (unlikely(ret != 0)) | 1548 | /* |
| 1549 | * Try to get the rt_mutex now. This might fail as some other | ||
| 1550 | * task acquired the rt_mutex after we removed ourself from the | ||
| 1551 | * rt_mutex waiters list. | ||
| 1552 | */ | ||
| 1553 | if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { | ||
| 1554 | locked = 1; | ||
| 1555 | goto out; | ||
| 1556 | } | ||
| 1557 | |||
| 1558 | /* | ||
| 1559 | * pi_state is incorrect, some other task did a lock steal and | ||
| 1560 | * we returned due to timeout or signal without taking the | ||
| 1561 | * rt_mutex. Too late. We can access the rt_mutex_owner without | ||
| 1562 | * locking, as the other task is now blocked on the hash bucket | ||
| 1563 | * lock. Fix the state up. | ||
| 1564 | */ | ||
| 1565 | owner = rt_mutex_owner(&q->pi_state->pi_mutex); | ||
| 1566 | ret = fixup_pi_state_owner(uaddr, q, owner, fshared); | ||
| 1147 | goto out; | 1567 | goto out; |
| 1568 | } | ||
| 1148 | 1569 | ||
| 1149 | retry_private: | 1570 | /* |
| 1150 | hb = queue_lock(&q); | 1571 | * Paranoia check. If we did not take the lock, then we should not be |
| 1572 | * the owner, nor the pending owner, of the rt_mutex. | ||
| 1573 | */ | ||
| 1574 | if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) | ||
| 1575 | printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " | ||
| 1576 | "pi-state %p\n", ret, | ||
| 1577 | q->pi_state->pi_mutex.owner, | ||
| 1578 | q->pi_state->owner); | ||
| 1579 | |||
| 1580 | out: | ||
| 1581 | return ret ? ret : locked; | ||
| 1582 | } | ||
| 1583 | |||
| 1584 | /** | ||
| 1585 | * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal | ||
| 1586 | * @hb: the futex hash bucket, must be locked by the caller | ||
| 1587 | * @q: the futex_q to queue up on | ||
| 1588 | * @timeout: the prepared hrtimer_sleeper, or null for no timeout | ||
| 1589 | */ | ||
| 1590 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | ||
| 1591 | struct hrtimer_sleeper *timeout) | ||
| 1592 | { | ||
| 1593 | queue_me(q, hb); | ||
| 1594 | |||
| 1595 | /* | ||
| 1596 | * There might have been scheduling since the queue_me(), as we | ||
| 1597 | * cannot hold a spinlock across the get_user() in case it | ||
| 1598 | * faults, and we cannot just set TASK_INTERRUPTIBLE state when | ||
| 1599 | * queueing ourselves into the futex hash. This code thus has to | ||
| 1600 | * rely on the futex_wake() code removing us from hash when it | ||
| 1601 | * wakes us up. | ||
| 1602 | */ | ||
| 1603 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1604 | |||
| 1605 | /* Arm the timer */ | ||
| 1606 | if (timeout) { | ||
| 1607 | hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); | ||
| 1608 | if (!hrtimer_active(&timeout->timer)) | ||
| 1609 | timeout->task = NULL; | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | /* | ||
| 1613 | * !plist_node_empty() is safe here without any lock. | ||
| 1614 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | ||
| 1615 | */ | ||
| 1616 | if (likely(!plist_node_empty(&q->list))) { | ||
| 1617 | /* | ||
| 1618 | * If the timer has already expired, current will already be | ||
| 1619 | * flagged for rescheduling. Only call schedule if there | ||
| 1620 | * is no timeout, or if it has yet to expire. | ||
| 1621 | */ | ||
| 1622 | if (!timeout || timeout->task) | ||
| 1623 | schedule(); | ||
| 1624 | } | ||
| 1625 | __set_current_state(TASK_RUNNING); | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | /** | ||
| 1629 | * futex_wait_setup() - Prepare to wait on a futex | ||
| 1630 | * @uaddr: the futex userspace address | ||
| 1631 | * @val: the expected value | ||
| 1632 | * @fshared: whether the futex is shared (1) or not (0) | ||
| 1633 | * @q: the associated futex_q | ||
| 1634 | * @hb: storage for hash_bucket pointer to be returned to caller | ||
| 1635 | * | ||
| 1636 | * Setup the futex_q and locate the hash_bucket. Get the futex value and | ||
| 1637 | * compare it with the expected value. Handle atomic faults internally. | ||
| 1638 | * Return with the hb lock held and a q.key reference on success, and unlocked | ||
| 1639 | * with no q.key reference on failure. | ||
| 1640 | * | ||
| 1641 | * Returns: | ||
| 1642 | * 0 - uaddr contains val and hb has been locked | ||
| 1643 | * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked | ||
| 1644 | */ | ||
| 1645 | static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, | ||
| 1646 | struct futex_q *q, struct futex_hash_bucket **hb) | ||
| 1647 | { | ||
| 1648 | u32 uval; | ||
| 1649 | int ret; | ||
| 1151 | 1650 | ||
| 1152 | /* | 1651 | /* |
| 1153 | * Access the page AFTER the hash-bucket is locked. | 1652 | * Access the page AFTER the hash-bucket is locked. |
| @@ -1165,95 +1664,83 @@ retry_private: | |||
| 1165 | * A consequence is that futex_wait() can return zero and absorb | 1664 | * A consequence is that futex_wait() can return zero and absorb |
| 1166 | * a wakeup when *uaddr != val on entry to the syscall. This is | 1665 | * a wakeup when *uaddr != val on entry to the syscall. This is |
| 1167 | * rare, but normal. | 1666 | * rare, but normal. |
| 1168 | * | ||
| 1169 | * For shared futexes, we hold the mmap semaphore, so the mapping | ||
| 1170 | * cannot have changed since we looked it up in get_futex_key. | ||
| 1171 | */ | 1667 | */ |
| 1668 | retry: | ||
| 1669 | q->key = FUTEX_KEY_INIT; | ||
| 1670 | ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ); | ||
| 1671 | if (unlikely(ret != 0)) | ||
| 1672 | return ret; | ||
| 1673 | |||
| 1674 | retry_private: | ||
| 1675 | *hb = queue_lock(q); | ||
| 1676 | |||
| 1172 | ret = get_futex_value_locked(&uval, uaddr); | 1677 | ret = get_futex_value_locked(&uval, uaddr); |
| 1173 | 1678 | ||
| 1174 | if (unlikely(ret)) { | 1679 | if (ret) { |
| 1175 | queue_unlock(&q, hb); | 1680 | queue_unlock(q, *hb); |
| 1176 | 1681 | ||
| 1177 | ret = get_user(uval, uaddr); | 1682 | ret = get_user(uval, uaddr); |
| 1178 | if (ret) | 1683 | if (ret) |
| 1179 | goto out_put_key; | 1684 | goto out; |
| 1180 | 1685 | ||
| 1181 | if (!fshared) | 1686 | if (!fshared) |
| 1182 | goto retry_private; | 1687 | goto retry_private; |
| 1183 | 1688 | ||
| 1184 | put_futex_key(fshared, &q.key); | 1689 | put_futex_key(fshared, &q->key); |
| 1185 | goto retry; | 1690 | goto retry; |
| 1186 | } | 1691 | } |
| 1187 | ret = -EWOULDBLOCK; | ||
| 1188 | if (unlikely(uval != val)) { | ||
| 1189 | queue_unlock(&q, hb); | ||
| 1190 | goto out_put_key; | ||
| 1191 | } | ||
| 1192 | 1692 | ||
| 1193 | /* Only actually queue if *uaddr contained val. */ | 1693 | if (uval != val) { |
| 1194 | queue_me(&q, hb); | 1694 | queue_unlock(q, *hb); |
| 1695 | ret = -EWOULDBLOCK; | ||
| 1696 | } | ||
| 1195 | 1697 | ||
| 1196 | /* | 1698 | out: |
| 1197 | * There might have been scheduling since the queue_me(), as we | 1699 | if (ret) |
| 1198 | * cannot hold a spinlock across the get_user() in case it | 1700 | put_futex_key(fshared, &q->key); |
| 1199 | * faults, and we cannot just set TASK_INTERRUPTIBLE state when | 1701 | return ret; |
| 1200 | * queueing ourselves into the futex hash. This code thus has to | 1702 | } |
| 1201 | * rely on the futex_wake() code removing us from hash when it | ||
| 1202 | * wakes us up. | ||
| 1203 | */ | ||
| 1204 | 1703 | ||
| 1205 | /* add_wait_queue is the barrier after __set_current_state. */ | 1704 | static int futex_wait(u32 __user *uaddr, int fshared, |
| 1206 | __set_current_state(TASK_INTERRUPTIBLE); | 1705 | u32 val, ktime_t *abs_time, u32 bitset, int clockrt) |
| 1207 | add_wait_queue(&q.waiter, &wait); | 1706 | { |
| 1208 | /* | 1707 | struct hrtimer_sleeper timeout, *to = NULL; |
| 1209 | * !plist_node_empty() is safe here without any lock. | 1708 | struct restart_block *restart; |
| 1210 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1709 | struct futex_hash_bucket *hb; |
| 1211 | */ | 1710 | struct futex_q q; |
| 1212 | if (likely(!plist_node_empty(&q.list))) { | 1711 | int ret; |
| 1213 | if (!abs_time) | ||
| 1214 | schedule(); | ||
| 1215 | else { | ||
| 1216 | hrtimer_init_on_stack(&t.timer, | ||
| 1217 | clockrt ? CLOCK_REALTIME : | ||
| 1218 | CLOCK_MONOTONIC, | ||
| 1219 | HRTIMER_MODE_ABS); | ||
| 1220 | hrtimer_init_sleeper(&t, current); | ||
| 1221 | hrtimer_set_expires_range_ns(&t.timer, *abs_time, | ||
| 1222 | current->timer_slack_ns); | ||
| 1223 | |||
| 1224 | hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); | ||
| 1225 | if (!hrtimer_active(&t.timer)) | ||
| 1226 | t.task = NULL; | ||
| 1227 | 1712 | ||
| 1228 | /* | 1713 | if (!bitset) |
| 1229 | * the timer could have already expired, in which | 1714 | return -EINVAL; |
| 1230 | * case current would be flagged for rescheduling. | ||
| 1231 | * Don't bother calling schedule. | ||
| 1232 | */ | ||
| 1233 | if (likely(t.task)) | ||
| 1234 | schedule(); | ||
| 1235 | 1715 | ||
| 1236 | hrtimer_cancel(&t.timer); | 1716 | q.pi_state = NULL; |
| 1717 | q.bitset = bitset; | ||
| 1718 | q.rt_waiter = NULL; | ||
| 1237 | 1719 | ||
| 1238 | /* Flag if a timeout occured */ | 1720 | if (abs_time) { |
| 1239 | rem = (t.task == NULL); | 1721 | to = &timeout; |
| 1240 | 1722 | ||
| 1241 | destroy_hrtimer_on_stack(&t.timer); | 1723 | hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : |
| 1242 | } | 1724 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
| 1725 | hrtimer_init_sleeper(to, current); | ||
| 1726 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, | ||
| 1727 | current->timer_slack_ns); | ||
| 1243 | } | 1728 | } |
| 1244 | __set_current_state(TASK_RUNNING); | ||
| 1245 | 1729 | ||
| 1246 | /* | 1730 | /* Prepare to wait on uaddr. */ |
| 1247 | * NOTE: we don't remove ourselves from the waitqueue because | 1731 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
| 1248 | * we are the only user of it. | 1732 | if (ret) |
| 1249 | */ | 1733 | goto out; |
| 1734 | |||
| 1735 | /* queue_me and wait for wakeup, timeout, or a signal. */ | ||
| 1736 | futex_wait_queue_me(hb, &q, to); | ||
| 1250 | 1737 | ||
| 1251 | /* If we were woken (and unqueued), we succeeded, whatever. */ | 1738 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
| 1252 | ret = 0; | 1739 | ret = 0; |
| 1253 | if (!unqueue_me(&q)) | 1740 | if (!unqueue_me(&q)) |
| 1254 | goto out_put_key; | 1741 | goto out_put_key; |
| 1255 | ret = -ETIMEDOUT; | 1742 | ret = -ETIMEDOUT; |
| 1256 | if (rem) | 1743 | if (to && !to->task) |
| 1257 | goto out_put_key; | 1744 | goto out_put_key; |
| 1258 | 1745 | ||
| 1259 | /* | 1746 | /* |
| @@ -1270,7 +1757,7 @@ retry_private: | |||
| 1270 | restart->futex.val = val; | 1757 | restart->futex.val = val; |
| 1271 | restart->futex.time = abs_time->tv64; | 1758 | restart->futex.time = abs_time->tv64; |
| 1272 | restart->futex.bitset = bitset; | 1759 | restart->futex.bitset = bitset; |
| 1273 | restart->futex.flags = 0; | 1760 | restart->futex.flags = FLAGS_HAS_TIMEOUT; |
| 1274 | 1761 | ||
| 1275 | if (fshared) | 1762 | if (fshared) |
| 1276 | restart->futex.flags |= FLAGS_SHARED; | 1763 | restart->futex.flags |= FLAGS_SHARED; |
| @@ -1282,6 +1769,10 @@ retry_private: | |||
| 1282 | out_put_key: | 1769 | out_put_key: |
| 1283 | put_futex_key(fshared, &q.key); | 1770 | put_futex_key(fshared, &q.key); |
| 1284 | out: | 1771 | out: |
| 1772 | if (to) { | ||
| 1773 | hrtimer_cancel(&to->timer); | ||
| 1774 | destroy_hrtimer_on_stack(&to->timer); | ||
| 1775 | } | ||
| 1285 | return ret; | 1776 | return ret; |
| 1286 | } | 1777 | } |
| 1287 | 1778 | ||
| @@ -1290,13 +1781,16 @@ static long futex_wait_restart(struct restart_block *restart) | |||
| 1290 | { | 1781 | { |
| 1291 | u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; | 1782 | u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; |
| 1292 | int fshared = 0; | 1783 | int fshared = 0; |
| 1293 | ktime_t t; | 1784 | ktime_t t, *tp = NULL; |
| 1294 | 1785 | ||
| 1295 | t.tv64 = restart->futex.time; | 1786 | if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { |
| 1787 | t.tv64 = restart->futex.time; | ||
| 1788 | tp = &t; | ||
| 1789 | } | ||
| 1296 | restart->fn = do_no_restart_syscall; | 1790 | restart->fn = do_no_restart_syscall; |
| 1297 | if (restart->futex.flags & FLAGS_SHARED) | 1791 | if (restart->futex.flags & FLAGS_SHARED) |
| 1298 | fshared = 1; | 1792 | fshared = 1; |
| 1299 | return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, | 1793 | return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, |
| 1300 | restart->futex.bitset, | 1794 | restart->futex.bitset, |
| 1301 | restart->futex.flags & FLAGS_CLOCKRT); | 1795 | restart->futex.flags & FLAGS_CLOCKRT); |
| 1302 | } | 1796 | } |
| @@ -1312,11 +1806,10 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
| 1312 | int detect, ktime_t *time, int trylock) | 1806 | int detect, ktime_t *time, int trylock) |
| 1313 | { | 1807 | { |
| 1314 | struct hrtimer_sleeper timeout, *to = NULL; | 1808 | struct hrtimer_sleeper timeout, *to = NULL; |
| 1315 | struct task_struct *curr = current; | ||
| 1316 | struct futex_hash_bucket *hb; | 1809 | struct futex_hash_bucket *hb; |
| 1317 | u32 uval, newval, curval; | 1810 | u32 uval; |
| 1318 | struct futex_q q; | 1811 | struct futex_q q; |
| 1319 | int ret, lock_taken, ownerdied = 0; | 1812 | int res, ret; |
| 1320 | 1813 | ||
| 1321 | if (refill_pi_state_cache()) | 1814 | if (refill_pi_state_cache()) |
| 1322 | return -ENOMEM; | 1815 | return -ENOMEM; |
| @@ -1330,6 +1823,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
| 1330 | } | 1823 | } |
| 1331 | 1824 | ||
| 1332 | q.pi_state = NULL; | 1825 | q.pi_state = NULL; |
| 1826 | q.rt_waiter = NULL; | ||
| 1333 | retry: | 1827 | retry: |
| 1334 | q.key = FUTEX_KEY_INIT; | 1828 | q.key = FUTEX_KEY_INIT; |
| 1335 | ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); | 1829 | ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); |
| @@ -1339,81 +1833,15 @@ retry: | |||
| 1339 | retry_private: | 1833 | retry_private: |
| 1340 | hb = queue_lock(&q); | 1834 | hb = queue_lock(&q); |
| 1341 | 1835 | ||
| 1342 | retry_locked: | 1836 | ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); |
| 1343 | ret = lock_taken = 0; | ||
| 1344 | |||
| 1345 | /* | ||
| 1346 | * To avoid races, we attempt to take the lock here again | ||
| 1347 | * (by doing a 0 -> TID atomic cmpxchg), while holding all | ||
| 1348 | * the locks. It will most likely not succeed. | ||
| 1349 | */ | ||
| 1350 | newval = task_pid_vnr(current); | ||
| 1351 | |||
| 1352 | curval = cmpxchg_futex_value_locked(uaddr, 0, newval); | ||
| 1353 | |||
| 1354 | if (unlikely(curval == -EFAULT)) | ||
| 1355 | goto uaddr_faulted; | ||
| 1356 | |||
| 1357 | /* | ||
| 1358 | * Detect deadlocks. In case of REQUEUE_PI this is a valid | ||
| 1359 | * situation and we return success to user space. | ||
| 1360 | */ | ||
| 1361 | if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) { | ||
| 1362 | ret = -EDEADLK; | ||
| 1363 | goto out_unlock_put_key; | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | /* | ||
| 1367 | * Surprise - we got the lock. Just return to userspace: | ||
| 1368 | */ | ||
| 1369 | if (unlikely(!curval)) | ||
| 1370 | goto out_unlock_put_key; | ||
| 1371 | |||
| 1372 | uval = curval; | ||
| 1373 | |||
| 1374 | /* | ||
| 1375 | * Set the WAITERS flag, so the owner will know it has someone | ||
| 1376 | * to wake at next unlock | ||
| 1377 | */ | ||
| 1378 | newval = curval | FUTEX_WAITERS; | ||
| 1379 | |||
| 1380 | /* | ||
| 1381 | * There are two cases, where a futex might have no owner (the | ||
| 1382 | * owner TID is 0): OWNER_DIED. We take over the futex in this | ||
| 1383 | * case. We also do an unconditional take over, when the owner | ||
| 1384 | * of the futex died. | ||
| 1385 | * | ||
| 1386 | * This is safe as we are protected by the hash bucket lock ! | ||
| 1387 | */ | ||
| 1388 | if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { | ||
| 1389 | /* Keep the OWNER_DIED bit */ | ||
| 1390 | newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current); | ||
| 1391 | ownerdied = 0; | ||
| 1392 | lock_taken = 1; | ||
| 1393 | } | ||
| 1394 | |||
| 1395 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); | ||
| 1396 | |||
| 1397 | if (unlikely(curval == -EFAULT)) | ||
| 1398 | goto uaddr_faulted; | ||
| 1399 | if (unlikely(curval != uval)) | ||
| 1400 | goto retry_locked; | ||
| 1401 | |||
| 1402 | /* | ||
| 1403 | * We took the lock due to owner died take over. | ||
| 1404 | */ | ||
| 1405 | if (unlikely(lock_taken)) | ||
| 1406 | goto out_unlock_put_key; | ||
| 1407 | |||
| 1408 | /* | ||
| 1409 | * We dont have the lock. Look up the PI state (or create it if | ||
| 1410 | * we are the first waiter): | ||
| 1411 | */ | ||
| 1412 | ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state); | ||
| 1413 | |||
| 1414 | if (unlikely(ret)) { | 1837 | if (unlikely(ret)) { |
| 1415 | switch (ret) { | 1838 | switch (ret) { |
| 1416 | 1839 | case 1: | |
| 1840 | /* We got the lock. */ | ||
| 1841 | ret = 0; | ||
| 1842 | goto out_unlock_put_key; | ||
| 1843 | case -EFAULT: | ||
| 1844 | goto uaddr_faulted; | ||
| 1417 | case -EAGAIN: | 1845 | case -EAGAIN: |
| 1418 | /* | 1846 | /* |
| 1419 | * Task is exiting and we just wait for the | 1847 | * Task is exiting and we just wait for the |
| @@ -1423,25 +1851,6 @@ retry_locked: | |||
| 1423 | put_futex_key(fshared, &q.key); | 1851 | put_futex_key(fshared, &q.key); |
| 1424 | cond_resched(); | 1852 | cond_resched(); |
| 1425 | goto retry; | 1853 | goto retry; |
| 1426 | |||
| 1427 | case -ESRCH: | ||
| 1428 | /* | ||
| 1429 | * No owner found for this futex. Check if the | ||
| 1430 | * OWNER_DIED bit is set to figure out whether | ||
| 1431 | * this is a robust futex or not. | ||
| 1432 | */ | ||
| 1433 | if (get_futex_value_locked(&curval, uaddr)) | ||
| 1434 | goto uaddr_faulted; | ||
| 1435 | |||
| 1436 | /* | ||
| 1437 | * We simply start over in case of a robust | ||
| 1438 | * futex. The code above will take the futex | ||
| 1439 | * and return happy. | ||
| 1440 | */ | ||
| 1441 | if (curval & FUTEX_OWNER_DIED) { | ||
| 1442 | ownerdied = 1; | ||
| 1443 | goto retry_locked; | ||
| 1444 | } | ||
| 1445 | default: | 1854 | default: |
| 1446 | goto out_unlock_put_key; | 1855 | goto out_unlock_put_key; |
| 1447 | } | 1856 | } |
| @@ -1465,71 +1874,21 @@ retry_locked: | |||
| 1465 | } | 1874 | } |
| 1466 | 1875 | ||
| 1467 | spin_lock(q.lock_ptr); | 1876 | spin_lock(q.lock_ptr); |
| 1468 | 1877 | /* | |
| 1469 | if (!ret) { | 1878 | * Fixup the pi_state owner and possibly acquire the lock if we |
| 1470 | /* | 1879 | * haven't already. |
| 1471 | * Got the lock. We might not be the anticipated owner | 1880 | */ |
| 1472 | * if we did a lock-steal - fix up the PI-state in | 1881 | res = fixup_owner(uaddr, fshared, &q, !ret); |
| 1473 | * that case: | 1882 | /* |
| 1474 | */ | 1883 | * If fixup_owner() returned an error, proprogate that. If it acquired |
| 1475 | if (q.pi_state->owner != curr) | 1884 | * the lock, clear our -ETIMEDOUT or -EINTR. |
| 1476 | ret = fixup_pi_state_owner(uaddr, &q, curr, fshared); | 1885 | */ |
| 1477 | } else { | 1886 | if (res) |
| 1478 | /* | 1887 | ret = (res < 0) ? res : 0; |
| 1479 | * Catch the rare case, where the lock was released | ||
| 1480 | * when we were on the way back before we locked the | ||
| 1481 | * hash bucket. | ||
| 1482 | */ | ||
| 1483 | if (q.pi_state->owner == curr) { | ||
| 1484 | /* | ||
| 1485 | * Try to get the rt_mutex now. This might | ||
| 1486 | * fail as some other task acquired the | ||
| 1487 | * rt_mutex after we removed ourself from the | ||
| 1488 | * rt_mutex waiters list. | ||
| 1489 | */ | ||
| 1490 | if (rt_mutex_trylock(&q.pi_state->pi_mutex)) | ||
| 1491 | ret = 0; | ||
| 1492 | else { | ||
| 1493 | /* | ||
| 1494 | * pi_state is incorrect, some other | ||
| 1495 | * task did a lock steal and we | ||
| 1496 | * returned due to timeout or signal | ||
| 1497 | * without taking the rt_mutex. Too | ||
| 1498 | * late. We can access the | ||
| 1499 | * rt_mutex_owner without locking, as | ||
| 1500 | * the other task is now blocked on | ||
| 1501 | * the hash bucket lock. Fix the state | ||
| 1502 | * up. | ||
| 1503 | */ | ||
| 1504 | struct task_struct *owner; | ||
| 1505 | int res; | ||
| 1506 | |||
| 1507 | owner = rt_mutex_owner(&q.pi_state->pi_mutex); | ||
| 1508 | res = fixup_pi_state_owner(uaddr, &q, owner, | ||
| 1509 | fshared); | ||
| 1510 | |||
| 1511 | /* propagate -EFAULT, if the fixup failed */ | ||
| 1512 | if (res) | ||
| 1513 | ret = res; | ||
| 1514 | } | ||
| 1515 | } else { | ||
| 1516 | /* | ||
| 1517 | * Paranoia check. If we did not take the lock | ||
| 1518 | * in the trylock above, then we should not be | ||
| 1519 | * the owner of the rtmutex, neither the real | ||
| 1520 | * nor the pending one: | ||
| 1521 | */ | ||
| 1522 | if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr) | ||
| 1523 | printk(KERN_ERR "futex_lock_pi: ret = %d " | ||
| 1524 | "pi-mutex: %p pi-state %p\n", ret, | ||
| 1525 | q.pi_state->pi_mutex.owner, | ||
| 1526 | q.pi_state->owner); | ||
| 1527 | } | ||
| 1528 | } | ||
| 1529 | 1888 | ||
| 1530 | /* | 1889 | /* |
| 1531 | * If fixup_pi_state_owner() faulted and was unable to handle the | 1890 | * If fixup_owner() faulted and was unable to handle the fault, unlock |
| 1532 | * fault, unlock it and return the fault to userspace. | 1891 | * it and return the fault to userspace. |
| 1533 | */ | 1892 | */ |
| 1534 | if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) | 1893 | if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) |
| 1535 | rt_mutex_unlock(&q.pi_state->pi_mutex); | 1894 | rt_mutex_unlock(&q.pi_state->pi_mutex); |
| @@ -1537,9 +1896,7 @@ retry_locked: | |||
| 1537 | /* Unqueue and drop the lock */ | 1896 | /* Unqueue and drop the lock */ |
| 1538 | unqueue_me_pi(&q); | 1897 | unqueue_me_pi(&q); |
| 1539 | 1898 | ||
| 1540 | if (to) | 1899 | goto out; |
| 1541 | destroy_hrtimer_on_stack(&to->timer); | ||
| 1542 | return ret != -EINTR ? ret : -ERESTARTNOINTR; | ||
| 1543 | 1900 | ||
| 1544 | out_unlock_put_key: | 1901 | out_unlock_put_key: |
| 1545 | queue_unlock(&q, hb); | 1902 | queue_unlock(&q, hb); |
| @@ -1549,7 +1906,7 @@ out_put_key: | |||
| 1549 | out: | 1906 | out: |
| 1550 | if (to) | 1907 | if (to) |
| 1551 | destroy_hrtimer_on_stack(&to->timer); | 1908 | destroy_hrtimer_on_stack(&to->timer); |
| 1552 | return ret; | 1909 | return ret != -EINTR ? ret : -ERESTARTNOINTR; |
| 1553 | 1910 | ||
| 1554 | uaddr_faulted: | 1911 | uaddr_faulted: |
| 1555 | /* | 1912 | /* |
| @@ -1572,7 +1929,6 @@ uaddr_faulted: | |||
| 1572 | goto retry; | 1929 | goto retry; |
| 1573 | } | 1930 | } |
| 1574 | 1931 | ||
| 1575 | |||
| 1576 | /* | 1932 | /* |
| 1577 | * Userspace attempted a TID -> 0 atomic transition, and failed. | 1933 | * Userspace attempted a TID -> 0 atomic transition, and failed. |
| 1578 | * This is the in-kernel slowpath: we look up the PI state (if any), | 1934 | * This is the in-kernel slowpath: we look up the PI state (if any), |
| @@ -1674,6 +2030,229 @@ pi_faulted: | |||
| 1674 | return ret; | 2030 | return ret; |
| 1675 | } | 2031 | } |
| 1676 | 2032 | ||
| 2033 | /** | ||
| 2034 | * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex | ||
| 2035 | * @hb: the hash_bucket futex_q was original enqueued on | ||
| 2036 | * @q: the futex_q woken while waiting to be requeued | ||
| 2037 | * @key2: the futex_key of the requeue target futex | ||
| 2038 | * @timeout: the timeout associated with the wait (NULL if none) | ||
| 2039 | * | ||
| 2040 | * Detect if the task was woken on the initial futex as opposed to the requeue | ||
| 2041 | * target futex. If so, determine if it was a timeout or a signal that caused | ||
| 2042 | * the wakeup and return the appropriate error code to the caller. Must be | ||
| 2043 | * called with the hb lock held. | ||
| 2044 | * | ||
| 2045 | * Returns | ||
| 2046 | * 0 - no early wakeup detected | ||
| 2047 | * <0 - -ETIMEDOUT or -ERESTARTNOINTR | ||
| 2048 | */ | ||
| 2049 | static inline | ||
| 2050 | int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | ||
| 2051 | struct futex_q *q, union futex_key *key2, | ||
| 2052 | struct hrtimer_sleeper *timeout) | ||
| 2053 | { | ||
| 2054 | int ret = 0; | ||
| 2055 | |||
| 2056 | /* | ||
| 2057 | * With the hb lock held, we avoid races while we process the wakeup. | ||
| 2058 | * We only need to hold hb (and not hb2) to ensure atomicity as the | ||
| 2059 | * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb. | ||
| 2060 | * It can't be requeued from uaddr2 to something else since we don't | ||
| 2061 | * support a PI aware source futex for requeue. | ||
| 2062 | */ | ||
| 2063 | if (!match_futex(&q->key, key2)) { | ||
| 2064 | WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); | ||
| 2065 | /* | ||
| 2066 | * We were woken prior to requeue by a timeout or a signal. | ||
| 2067 | * Unqueue the futex_q and determine which it was. | ||
| 2068 | */ | ||
| 2069 | plist_del(&q->list, &q->list.plist); | ||
| 2070 | drop_futex_key_refs(&q->key); | ||
| 2071 | |||
| 2072 | if (timeout && !timeout->task) | ||
| 2073 | ret = -ETIMEDOUT; | ||
| 2074 | else | ||
| 2075 | ret = -ERESTARTNOINTR; | ||
| 2076 | } | ||
| 2077 | return ret; | ||
| 2078 | } | ||
| 2079 | |||
| 2080 | /** | ||
| 2081 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 | ||
| 2082 | * @uaddr: the futex we initialyl wait on (non-pi) | ||
| 2083 | * @fshared: whether the futexes are shared (1) or not (0). They must be | ||
| 2084 | * the same type, no requeueing from private to shared, etc. | ||
| 2085 | * @val: the expected value of uaddr | ||
| 2086 | * @abs_time: absolute timeout | ||
| 2087 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. | ||
| 2088 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) | ||
| 2089 | * @uaddr2: the pi futex we will take prior to returning to user-space | ||
| 2090 | * | ||
| 2091 | * The caller will wait on uaddr and will be requeued by futex_requeue() to | ||
| 2092 | * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and | ||
| 2093 | * complete the acquisition of the rt_mutex prior to returning to userspace. | ||
| 2094 | * This ensures the rt_mutex maintains an owner when it has waiters; without | ||
| 2095 | * one, the pi logic wouldn't know which task to boost/deboost, if there was a | ||
| 2096 | * need to. | ||
| 2097 | * | ||
| 2098 | * We call schedule in futex_wait_queue_me() when we enqueue and return there | ||
| 2099 | * via the following: | ||
| 2100 | * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() | ||
| 2101 | * 2) wakeup on uaddr2 after a requeue and subsequent unlock | ||
| 2102 | * 3) signal (before or after requeue) | ||
| 2103 | * 4) timeout (before or after requeue) | ||
| 2104 | * | ||
| 2105 | * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function. | ||
| 2106 | * | ||
| 2107 | * If 2, we may then block on trying to take the rt_mutex and return via: | ||
| 2108 | * 5) successful lock | ||
| 2109 | * 6) signal | ||
| 2110 | * 7) timeout | ||
| 2111 | * 8) other lock acquisition failure | ||
| 2112 | * | ||
| 2113 | * If 6, we setup a restart_block with futex_lock_pi() as the function. | ||
| 2114 | * | ||
| 2115 | * If 4 or 7, we cleanup and return with -ETIMEDOUT. | ||
| 2116 | * | ||
| 2117 | * Returns: | ||
| 2118 | * 0 - On success | ||
| 2119 | * <0 - On error | ||
| 2120 | */ | ||
| 2121 | static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | ||
| 2122 | u32 val, ktime_t *abs_time, u32 bitset, | ||
| 2123 | int clockrt, u32 __user *uaddr2) | ||
| 2124 | { | ||
| 2125 | struct hrtimer_sleeper timeout, *to = NULL; | ||
| 2126 | struct rt_mutex_waiter rt_waiter; | ||
| 2127 | struct rt_mutex *pi_mutex = NULL; | ||
| 2128 | struct futex_hash_bucket *hb; | ||
| 2129 | union futex_key key2; | ||
| 2130 | struct futex_q q; | ||
| 2131 | int res, ret; | ||
| 2132 | |||
| 2133 | if (!bitset) | ||
| 2134 | return -EINVAL; | ||
| 2135 | |||
| 2136 | if (abs_time) { | ||
| 2137 | to = &timeout; | ||
| 2138 | hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : | ||
| 2139 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
| 2140 | hrtimer_init_sleeper(to, current); | ||
| 2141 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, | ||
| 2142 | current->timer_slack_ns); | ||
| 2143 | } | ||
| 2144 | |||
| 2145 | /* | ||
| 2146 | * The waiter is allocated on our stack, manipulated by the requeue | ||
| 2147 | * code while we sleep on uaddr. | ||
| 2148 | */ | ||
| 2149 | debug_rt_mutex_init_waiter(&rt_waiter); | ||
| 2150 | rt_waiter.task = NULL; | ||
| 2151 | |||
| 2152 | q.pi_state = NULL; | ||
| 2153 | q.bitset = bitset; | ||
| 2154 | q.rt_waiter = &rt_waiter; | ||
| 2155 | |||
| 2156 | key2 = FUTEX_KEY_INIT; | ||
| 2157 | ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); | ||
| 2158 | if (unlikely(ret != 0)) | ||
| 2159 | goto out; | ||
| 2160 | |||
| 2161 | /* Prepare to wait on uaddr. */ | ||
| 2162 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | ||
| 2163 | if (ret) | ||
| 2164 | goto out_key2; | ||
| 2165 | |||
| 2166 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ | ||
| 2167 | futex_wait_queue_me(hb, &q, to); | ||
| 2168 | |||
| 2169 | spin_lock(&hb->lock); | ||
| 2170 | ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | ||
| 2171 | spin_unlock(&hb->lock); | ||
| 2172 | if (ret) | ||
| 2173 | goto out_put_keys; | ||
| 2174 | |||
| 2175 | /* | ||
| 2176 | * In order for us to be here, we know our q.key == key2, and since | ||
| 2177 | * we took the hb->lock above, we also know that futex_requeue() has | ||
| 2178 | * completed and we no longer have to concern ourselves with a wakeup | ||
| 2179 | * race with the atomic proxy lock acquition by the requeue code. | ||
| 2180 | */ | ||
| 2181 | |||
| 2182 | /* Check if the requeue code acquired the second futex for us. */ | ||
| 2183 | if (!q.rt_waiter) { | ||
| 2184 | /* | ||
| 2185 | * Got the lock. We might not be the anticipated owner if we | ||
| 2186 | * did a lock-steal - fix up the PI-state in that case. | ||
| 2187 | */ | ||
| 2188 | if (q.pi_state && (q.pi_state->owner != current)) { | ||
| 2189 | spin_lock(q.lock_ptr); | ||
| 2190 | ret = fixup_pi_state_owner(uaddr2, &q, current, | ||
| 2191 | fshared); | ||
| 2192 | spin_unlock(q.lock_ptr); | ||
| 2193 | } | ||
| 2194 | } else { | ||
| 2195 | /* | ||
| 2196 | * We have been woken up by futex_unlock_pi(), a timeout, or a | ||
| 2197 | * signal. futex_unlock_pi() will not destroy the lock_ptr nor | ||
| 2198 | * the pi_state. | ||
| 2199 | */ | ||
| 2200 | WARN_ON(!&q.pi_state); | ||
| 2201 | pi_mutex = &q.pi_state->pi_mutex; | ||
| 2202 | ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); | ||
| 2203 | debug_rt_mutex_free_waiter(&rt_waiter); | ||
| 2204 | |||
| 2205 | spin_lock(q.lock_ptr); | ||
| 2206 | /* | ||
| 2207 | * Fixup the pi_state owner and possibly acquire the lock if we | ||
| 2208 | * haven't already. | ||
| 2209 | */ | ||
| 2210 | res = fixup_owner(uaddr2, fshared, &q, !ret); | ||
| 2211 | /* | ||
| 2212 | * If fixup_owner() returned an error, proprogate that. If it | ||
| 2213 | * acquired the lock, clear our -ETIMEDOUT or -EINTR. | ||
| 2214 | */ | ||
| 2215 | if (res) | ||
| 2216 | ret = (res < 0) ? res : 0; | ||
| 2217 | |||
| 2218 | /* Unqueue and drop the lock. */ | ||
| 2219 | unqueue_me_pi(&q); | ||
| 2220 | } | ||
| 2221 | |||
| 2222 | /* | ||
| 2223 | * If fixup_pi_state_owner() faulted and was unable to handle the | ||
| 2224 | * fault, unlock the rt_mutex and return the fault to userspace. | ||
| 2225 | */ | ||
| 2226 | if (ret == -EFAULT) { | ||
| 2227 | if (rt_mutex_owner(pi_mutex) == current) | ||
| 2228 | rt_mutex_unlock(pi_mutex); | ||
| 2229 | } else if (ret == -EINTR) { | ||
| 2230 | /* | ||
| 2231 | * We've already been requeued, but we have no way to | ||
| 2232 | * restart by calling futex_lock_pi() directly. We | ||
| 2233 | * could restart the syscall, but that will look at | ||
| 2234 | * the user space value and return right away. So we | ||
| 2235 | * drop back with EWOULDBLOCK to tell user space that | ||
| 2236 | * "val" has been changed. That's the same what the | ||
| 2237 | * restart of the syscall would do in | ||
| 2238 | * futex_wait_setup(). | ||
| 2239 | */ | ||
| 2240 | ret = -EWOULDBLOCK; | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | out_put_keys: | ||
| 2244 | put_futex_key(fshared, &q.key); | ||
| 2245 | out_key2: | ||
| 2246 | put_futex_key(fshared, &key2); | ||
| 2247 | |||
| 2248 | out: | ||
| 2249 | if (to) { | ||
| 2250 | hrtimer_cancel(&to->timer); | ||
| 2251 | destroy_hrtimer_on_stack(&to->timer); | ||
| 2252 | } | ||
| 2253 | return ret; | ||
| 2254 | } | ||
| 2255 | |||
| 1677 | /* | 2256 | /* |
| 1678 | * Support for robust futexes: the kernel cleans up held futexes at | 2257 | * Support for robust futexes: the kernel cleans up held futexes at |
| 1679 | * thread exit time. | 2258 | * thread exit time. |
| @@ -1896,7 +2475,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
| 1896 | fshared = 1; | 2475 | fshared = 1; |
| 1897 | 2476 | ||
| 1898 | clockrt = op & FUTEX_CLOCK_REALTIME; | 2477 | clockrt = op & FUTEX_CLOCK_REALTIME; |
| 1899 | if (clockrt && cmd != FUTEX_WAIT_BITSET) | 2478 | if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) |
| 1900 | return -ENOSYS; | 2479 | return -ENOSYS; |
| 1901 | 2480 | ||
| 1902 | switch (cmd) { | 2481 | switch (cmd) { |
| @@ -1911,10 +2490,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
| 1911 | ret = futex_wake(uaddr, fshared, val, val3); | 2490 | ret = futex_wake(uaddr, fshared, val, val3); |
| 1912 | break; | 2491 | break; |
| 1913 | case FUTEX_REQUEUE: | 2492 | case FUTEX_REQUEUE: |
| 1914 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); | 2493 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); |
| 1915 | break; | 2494 | break; |
| 1916 | case FUTEX_CMP_REQUEUE: | 2495 | case FUTEX_CMP_REQUEUE: |
| 1917 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); | 2496 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, |
| 2497 | 0); | ||
| 1918 | break; | 2498 | break; |
| 1919 | case FUTEX_WAKE_OP: | 2499 | case FUTEX_WAKE_OP: |
| 1920 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); | 2500 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); |
| @@ -1931,6 +2511,15 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
| 1931 | if (futex_cmpxchg_enabled) | 2511 | if (futex_cmpxchg_enabled) |
| 1932 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); | 2512 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); |
| 1933 | break; | 2513 | break; |
| 2514 | case FUTEX_WAIT_REQUEUE_PI: | ||
| 2515 | val3 = FUTEX_BITSET_MATCH_ANY; | ||
| 2516 | ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, | ||
| 2517 | clockrt, uaddr2); | ||
| 2518 | break; | ||
| 2519 | case FUTEX_CMP_REQUEUE_PI: | ||
| 2520 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, | ||
| 2521 | 1); | ||
| 2522 | break; | ||
| 1934 | default: | 2523 | default: |
| 1935 | ret = -ENOSYS; | 2524 | ret = -ENOSYS; |
| 1936 | } | 2525 | } |
| @@ -1948,7 +2537,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, | |||
| 1948 | int cmd = op & FUTEX_CMD_MASK; | 2537 | int cmd = op & FUTEX_CMD_MASK; |
| 1949 | 2538 | ||
| 1950 | if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || | 2539 | if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || |
| 1951 | cmd == FUTEX_WAIT_BITSET)) { | 2540 | cmd == FUTEX_WAIT_BITSET || |
| 2541 | cmd == FUTEX_WAIT_REQUEUE_PI)) { | ||
| 1952 | if (copy_from_user(&ts, utime, sizeof(ts)) != 0) | 2542 | if (copy_from_user(&ts, utime, sizeof(ts)) != 0) |
| 1953 | return -EFAULT; | 2543 | return -EFAULT; |
| 1954 | if (!timespec_valid(&ts)) | 2544 | if (!timespec_valid(&ts)) |
| @@ -1960,11 +2550,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, | |||
| 1960 | tp = &t; | 2550 | tp = &t; |
| 1961 | } | 2551 | } |
| 1962 | /* | 2552 | /* |
| 1963 | * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. | 2553 | * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*. |
| 1964 | * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. | 2554 | * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. |
| 1965 | */ | 2555 | */ |
| 1966 | if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || | 2556 | if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || |
| 1967 | cmd == FUTEX_WAKE_OP) | 2557 | cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) |
| 1968 | val2 = (u32) (unsigned long) utime; | 2558 | val2 = (u32) (unsigned long) utime; |
| 1969 | 2559 | ||
| 1970 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); | 2560 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 69d9cb921ffa..fec77e7e0562 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
| @@ -300,7 +300,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
| 300 | * assigned pending owner [which might not have taken the | 300 | * assigned pending owner [which might not have taken the |
| 301 | * lock yet]: | 301 | * lock yet]: |
| 302 | */ | 302 | */ |
| 303 | static inline int try_to_steal_lock(struct rt_mutex *lock) | 303 | static inline int try_to_steal_lock(struct rt_mutex *lock, |
| 304 | struct task_struct *task) | ||
| 304 | { | 305 | { |
| 305 | struct task_struct *pendowner = rt_mutex_owner(lock); | 306 | struct task_struct *pendowner = rt_mutex_owner(lock); |
| 306 | struct rt_mutex_waiter *next; | 307 | struct rt_mutex_waiter *next; |
| @@ -309,11 +310,11 @@ static inline int try_to_steal_lock(struct rt_mutex *lock) | |||
| 309 | if (!rt_mutex_owner_pending(lock)) | 310 | if (!rt_mutex_owner_pending(lock)) |
| 310 | return 0; | 311 | return 0; |
| 311 | 312 | ||
| 312 | if (pendowner == current) | 313 | if (pendowner == task) |
| 313 | return 1; | 314 | return 1; |
| 314 | 315 | ||
| 315 | spin_lock_irqsave(&pendowner->pi_lock, flags); | 316 | spin_lock_irqsave(&pendowner->pi_lock, flags); |
| 316 | if (current->prio >= pendowner->prio) { | 317 | if (task->prio >= pendowner->prio) { |
| 317 | spin_unlock_irqrestore(&pendowner->pi_lock, flags); | 318 | spin_unlock_irqrestore(&pendowner->pi_lock, flags); |
| 318 | return 0; | 319 | return 0; |
| 319 | } | 320 | } |
| @@ -338,21 +339,21 @@ static inline int try_to_steal_lock(struct rt_mutex *lock) | |||
| 338 | * We are going to steal the lock and a waiter was | 339 | * We are going to steal the lock and a waiter was |
| 339 | * enqueued on the pending owners pi_waiters queue. So | 340 | * enqueued on the pending owners pi_waiters queue. So |
| 340 | * we have to enqueue this waiter into | 341 | * we have to enqueue this waiter into |
| 341 | * current->pi_waiters list. This covers the case, | 342 | * task->pi_waiters list. This covers the case, |
| 342 | * where current is boosted because it holds another | 343 | * where task is boosted because it holds another |
| 343 | * lock and gets unboosted because the booster is | 344 | * lock and gets unboosted because the booster is |
| 344 | * interrupted, so we would delay a waiter with higher | 345 | * interrupted, so we would delay a waiter with higher |
| 345 | * priority as current->normal_prio. | 346 | * priority as task->normal_prio. |
| 346 | * | 347 | * |
| 347 | * Note: in the rare case of a SCHED_OTHER task changing | 348 | * Note: in the rare case of a SCHED_OTHER task changing |
| 348 | * its priority and thus stealing the lock, next->task | 349 | * its priority and thus stealing the lock, next->task |
| 349 | * might be current: | 350 | * might be task: |
| 350 | */ | 351 | */ |
| 351 | if (likely(next->task != current)) { | 352 | if (likely(next->task != task)) { |
| 352 | spin_lock_irqsave(¤t->pi_lock, flags); | 353 | spin_lock_irqsave(&task->pi_lock, flags); |
| 353 | plist_add(&next->pi_list_entry, ¤t->pi_waiters); | 354 | plist_add(&next->pi_list_entry, &task->pi_waiters); |
| 354 | __rt_mutex_adjust_prio(current); | 355 | __rt_mutex_adjust_prio(task); |
| 355 | spin_unlock_irqrestore(¤t->pi_lock, flags); | 356 | spin_unlock_irqrestore(&task->pi_lock, flags); |
| 356 | } | 357 | } |
| 357 | return 1; | 358 | return 1; |
| 358 | } | 359 | } |
| @@ -389,7 +390,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) | |||
| 389 | */ | 390 | */ |
| 390 | mark_rt_mutex_waiters(lock); | 391 | mark_rt_mutex_waiters(lock); |
| 391 | 392 | ||
| 392 | if (rt_mutex_owner(lock) && !try_to_steal_lock(lock)) | 393 | if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) |
| 393 | return 0; | 394 | return 0; |
| 394 | 395 | ||
| 395 | /* We got the lock. */ | 396 | /* We got the lock. */ |
| @@ -411,6 +412,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) | |||
| 411 | */ | 412 | */ |
| 412 | static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | 413 | static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
| 413 | struct rt_mutex_waiter *waiter, | 414 | struct rt_mutex_waiter *waiter, |
| 415 | struct task_struct *task, | ||
| 414 | int detect_deadlock) | 416 | int detect_deadlock) |
| 415 | { | 417 | { |
| 416 | struct task_struct *owner = rt_mutex_owner(lock); | 418 | struct task_struct *owner = rt_mutex_owner(lock); |
| @@ -418,21 +420,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
| 418 | unsigned long flags; | 420 | unsigned long flags; |
| 419 | int chain_walk = 0, res; | 421 | int chain_walk = 0, res; |
| 420 | 422 | ||
| 421 | spin_lock_irqsave(¤t->pi_lock, flags); | 423 | spin_lock_irqsave(&task->pi_lock, flags); |
| 422 | __rt_mutex_adjust_prio(current); | 424 | __rt_mutex_adjust_prio(task); |
| 423 | waiter->task = current; | 425 | waiter->task = task; |
| 424 | waiter->lock = lock; | 426 | waiter->lock = lock; |
| 425 | plist_node_init(&waiter->list_entry, current->prio); | 427 | plist_node_init(&waiter->list_entry, task->prio); |
| 426 | plist_node_init(&waiter->pi_list_entry, current->prio); | 428 | plist_node_init(&waiter->pi_list_entry, task->prio); |
| 427 | 429 | ||
| 428 | /* Get the top priority waiter on the lock */ | 430 | /* Get the top priority waiter on the lock */ |
| 429 | if (rt_mutex_has_waiters(lock)) | 431 | if (rt_mutex_has_waiters(lock)) |
| 430 | top_waiter = rt_mutex_top_waiter(lock); | 432 | top_waiter = rt_mutex_top_waiter(lock); |
| 431 | plist_add(&waiter->list_entry, &lock->wait_list); | 433 | plist_add(&waiter->list_entry, &lock->wait_list); |
| 432 | 434 | ||
| 433 | current->pi_blocked_on = waiter; | 435 | task->pi_blocked_on = waiter; |
| 434 | 436 | ||
| 435 | spin_unlock_irqrestore(¤t->pi_lock, flags); | 437 | spin_unlock_irqrestore(&task->pi_lock, flags); |
| 436 | 438 | ||
| 437 | if (waiter == rt_mutex_top_waiter(lock)) { | 439 | if (waiter == rt_mutex_top_waiter(lock)) { |
| 438 | spin_lock_irqsave(&owner->pi_lock, flags); | 440 | spin_lock_irqsave(&owner->pi_lock, flags); |
| @@ -460,7 +462,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
| 460 | spin_unlock(&lock->wait_lock); | 462 | spin_unlock(&lock->wait_lock); |
| 461 | 463 | ||
| 462 | res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, | 464 | res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, |
| 463 | current); | 465 | task); |
| 464 | 466 | ||
| 465 | spin_lock(&lock->wait_lock); | 467 | spin_lock(&lock->wait_lock); |
| 466 | 468 | ||
| @@ -605,37 +607,25 @@ void rt_mutex_adjust_pi(struct task_struct *task) | |||
| 605 | rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); | 607 | rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); |
| 606 | } | 608 | } |
| 607 | 609 | ||
| 608 | /* | 610 | /** |
| 609 | * Slow path lock function: | 611 | * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop |
| 612 | * @lock: the rt_mutex to take | ||
| 613 | * @state: the state the task should block in (TASK_INTERRUPTIBLE | ||
| 614 | * or TASK_UNINTERRUPTIBLE) | ||
| 615 | * @timeout: the pre-initialized and started timer, or NULL for none | ||
| 616 | * @waiter: the pre-initialized rt_mutex_waiter | ||
| 617 | * @detect_deadlock: passed to task_blocks_on_rt_mutex | ||
| 618 | * | ||
| 619 | * lock->wait_lock must be held by the caller. | ||
| 610 | */ | 620 | */ |
| 611 | static int __sched | 621 | static int __sched |
| 612 | rt_mutex_slowlock(struct rt_mutex *lock, int state, | 622 | __rt_mutex_slowlock(struct rt_mutex *lock, int state, |
| 613 | struct hrtimer_sleeper *timeout, | 623 | struct hrtimer_sleeper *timeout, |
| 614 | int detect_deadlock) | 624 | struct rt_mutex_waiter *waiter, |
| 625 | int detect_deadlock) | ||
| 615 | { | 626 | { |
| 616 | struct rt_mutex_waiter waiter; | ||
| 617 | int ret = 0; | 627 | int ret = 0; |
| 618 | 628 | ||
| 619 | debug_rt_mutex_init_waiter(&waiter); | ||
| 620 | waiter.task = NULL; | ||
| 621 | |||
| 622 | spin_lock(&lock->wait_lock); | ||
| 623 | |||
| 624 | /* Try to acquire the lock again: */ | ||
| 625 | if (try_to_take_rt_mutex(lock)) { | ||
| 626 | spin_unlock(&lock->wait_lock); | ||
| 627 | return 0; | ||
| 628 | } | ||
| 629 | |||
| 630 | set_current_state(state); | ||
| 631 | |||
| 632 | /* Setup the timer, when timeout != NULL */ | ||
| 633 | if (unlikely(timeout)) { | ||
| 634 | hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); | ||
| 635 | if (!hrtimer_active(&timeout->timer)) | ||
| 636 | timeout->task = NULL; | ||
| 637 | } | ||
| 638 | |||
| 639 | for (;;) { | 629 | for (;;) { |
| 640 | /* Try to acquire the lock: */ | 630 | /* Try to acquire the lock: */ |
| 641 | if (try_to_take_rt_mutex(lock)) | 631 | if (try_to_take_rt_mutex(lock)) |
| @@ -656,19 +646,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 656 | } | 646 | } |
| 657 | 647 | ||
| 658 | /* | 648 | /* |
| 659 | * waiter.task is NULL the first time we come here and | 649 | * waiter->task is NULL the first time we come here and |
| 660 | * when we have been woken up by the previous owner | 650 | * when we have been woken up by the previous owner |
| 661 | * but the lock got stolen by a higher prio task. | 651 | * but the lock got stolen by a higher prio task. |
| 662 | */ | 652 | */ |
| 663 | if (!waiter.task) { | 653 | if (!waiter->task) { |
| 664 | ret = task_blocks_on_rt_mutex(lock, &waiter, | 654 | ret = task_blocks_on_rt_mutex(lock, waiter, current, |
| 665 | detect_deadlock); | 655 | detect_deadlock); |
| 666 | /* | 656 | /* |
| 667 | * If we got woken up by the owner then start loop | 657 | * If we got woken up by the owner then start loop |
| 668 | * all over without going into schedule to try | 658 | * all over without going into schedule to try |
| 669 | * to get the lock now: | 659 | * to get the lock now: |
| 670 | */ | 660 | */ |
| 671 | if (unlikely(!waiter.task)) { | 661 | if (unlikely(!waiter->task)) { |
| 672 | /* | 662 | /* |
| 673 | * Reset the return value. We might | 663 | * Reset the return value. We might |
| 674 | * have returned with -EDEADLK and the | 664 | * have returned with -EDEADLK and the |
| @@ -684,15 +674,52 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 684 | 674 | ||
| 685 | spin_unlock(&lock->wait_lock); | 675 | spin_unlock(&lock->wait_lock); |
| 686 | 676 | ||
| 687 | debug_rt_mutex_print_deadlock(&waiter); | 677 | debug_rt_mutex_print_deadlock(waiter); |
| 688 | 678 | ||
| 689 | if (waiter.task) | 679 | if (waiter->task) |
| 690 | schedule_rt_mutex(lock); | 680 | schedule_rt_mutex(lock); |
| 691 | 681 | ||
| 692 | spin_lock(&lock->wait_lock); | 682 | spin_lock(&lock->wait_lock); |
| 693 | set_current_state(state); | 683 | set_current_state(state); |
| 694 | } | 684 | } |
| 695 | 685 | ||
| 686 | return ret; | ||
| 687 | } | ||
| 688 | |||
| 689 | /* | ||
| 690 | * Slow path lock function: | ||
| 691 | */ | ||
| 692 | static int __sched | ||
| 693 | rt_mutex_slowlock(struct rt_mutex *lock, int state, | ||
| 694 | struct hrtimer_sleeper *timeout, | ||
| 695 | int detect_deadlock) | ||
| 696 | { | ||
| 697 | struct rt_mutex_waiter waiter; | ||
| 698 | int ret = 0; | ||
| 699 | |||
| 700 | debug_rt_mutex_init_waiter(&waiter); | ||
| 701 | waiter.task = NULL; | ||
| 702 | |||
| 703 | spin_lock(&lock->wait_lock); | ||
| 704 | |||
| 705 | /* Try to acquire the lock again: */ | ||
| 706 | if (try_to_take_rt_mutex(lock)) { | ||
| 707 | spin_unlock(&lock->wait_lock); | ||
| 708 | return 0; | ||
| 709 | } | ||
| 710 | |||
| 711 | set_current_state(state); | ||
| 712 | |||
| 713 | /* Setup the timer, when timeout != NULL */ | ||
| 714 | if (unlikely(timeout)) { | ||
| 715 | hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); | ||
| 716 | if (!hrtimer_active(&timeout->timer)) | ||
| 717 | timeout->task = NULL; | ||
| 718 | } | ||
| 719 | |||
| 720 | ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, | ||
| 721 | detect_deadlock); | ||
| 722 | |||
| 696 | set_current_state(TASK_RUNNING); | 723 | set_current_state(TASK_RUNNING); |
| 697 | 724 | ||
| 698 | if (unlikely(waiter.task)) | 725 | if (unlikely(waiter.task)) |
| @@ -986,6 +1013,59 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, | |||
| 986 | } | 1013 | } |
| 987 | 1014 | ||
| 988 | /** | 1015 | /** |
| 1016 | * rt_mutex_start_proxy_lock() - Start lock acquisition for another task | ||
| 1017 | * @lock: the rt_mutex to take | ||
| 1018 | * @waiter: the pre-initialized rt_mutex_waiter | ||
| 1019 | * @task: the task to prepare | ||
| 1020 | * @detect_deadlock: perform deadlock detection (1) or not (0) | ||
| 1021 | * | ||
| 1022 | * Returns: | ||
| 1023 | * 0 - task blocked on lock | ||
| 1024 | * 1 - acquired the lock for task, caller should wake it up | ||
| 1025 | * <0 - error | ||
| 1026 | * | ||
| 1027 | * Special API call for FUTEX_REQUEUE_PI support. | ||
| 1028 | */ | ||
| 1029 | int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | ||
| 1030 | struct rt_mutex_waiter *waiter, | ||
| 1031 | struct task_struct *task, int detect_deadlock) | ||
| 1032 | { | ||
| 1033 | int ret; | ||
| 1034 | |||
| 1035 | spin_lock(&lock->wait_lock); | ||
| 1036 | |||
| 1037 | mark_rt_mutex_waiters(lock); | ||
| 1038 | |||
| 1039 | if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { | ||
| 1040 | /* We got the lock for task. */ | ||
| 1041 | debug_rt_mutex_lock(lock); | ||
| 1042 | |||
| 1043 | rt_mutex_set_owner(lock, task, 0); | ||
| 1044 | |||
| 1045 | rt_mutex_deadlock_account_lock(lock, task); | ||
| 1046 | return 1; | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); | ||
| 1050 | |||
| 1051 | |||
| 1052 | if (ret && !waiter->task) { | ||
| 1053 | /* | ||
| 1054 | * Reset the return value. We might have | ||
| 1055 | * returned with -EDEADLK and the owner | ||
| 1056 | * released the lock while we were walking the | ||
| 1057 | * pi chain. Let the waiter sort it out. | ||
| 1058 | */ | ||
| 1059 | ret = 0; | ||
| 1060 | } | ||
| 1061 | spin_unlock(&lock->wait_lock); | ||
| 1062 | |||
| 1063 | debug_rt_mutex_print_deadlock(waiter); | ||
| 1064 | |||
| 1065 | return ret; | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | /** | ||
| 989 | * rt_mutex_next_owner - return the next owner of the lock | 1069 | * rt_mutex_next_owner - return the next owner of the lock |
| 990 | * | 1070 | * |
| 991 | * @lock: the rt lock query | 1071 | * @lock: the rt lock query |
| @@ -1004,3 +1084,57 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) | |||
| 1004 | 1084 | ||
| 1005 | return rt_mutex_top_waiter(lock)->task; | 1085 | return rt_mutex_top_waiter(lock)->task; |
| 1006 | } | 1086 | } |
| 1087 | |||
| 1088 | /** | ||
| 1089 | * rt_mutex_finish_proxy_lock() - Complete lock acquisition | ||
| 1090 | * @lock: the rt_mutex we were woken on | ||
| 1091 | * @to: the timeout, null if none. hrtimer should already have | ||
| 1092 | * been started. | ||
| 1093 | * @waiter: the pre-initialized rt_mutex_waiter | ||
| 1094 | * @detect_deadlock: perform deadlock detection (1) or not (0) | ||
| 1095 | * | ||
| 1096 | * Complete the lock acquisition started our behalf by another thread. | ||
| 1097 | * | ||
| 1098 | * Returns: | ||
| 1099 | * 0 - success | ||
| 1100 | * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK | ||
| 1101 | * | ||
| 1102 | * Special API call for PI-futex requeue support | ||
| 1103 | */ | ||
| 1104 | int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | ||
| 1105 | struct hrtimer_sleeper *to, | ||
| 1106 | struct rt_mutex_waiter *waiter, | ||
| 1107 | int detect_deadlock) | ||
| 1108 | { | ||
| 1109 | int ret; | ||
| 1110 | |||
| 1111 | spin_lock(&lock->wait_lock); | ||
| 1112 | |||
| 1113 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1114 | |||
| 1115 | ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, | ||
| 1116 | detect_deadlock); | ||
| 1117 | |||
| 1118 | set_current_state(TASK_RUNNING); | ||
| 1119 | |||
| 1120 | if (unlikely(waiter->task)) | ||
| 1121 | remove_waiter(lock, waiter); | ||
| 1122 | |||
| 1123 | /* | ||
| 1124 | * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might | ||
| 1125 | * have to fix that up. | ||
| 1126 | */ | ||
| 1127 | fixup_rt_mutex_waiters(lock); | ||
| 1128 | |||
| 1129 | spin_unlock(&lock->wait_lock); | ||
| 1130 | |||
| 1131 | /* | ||
| 1132 | * Readjust priority, when we did not get the lock. We might have been | ||
| 1133 | * the pending owner and boosted. Since we did not take the lock, the | ||
| 1134 | * PI boost has to go. | ||
| 1135 | */ | ||
| 1136 | if (unlikely(ret)) | ||
| 1137 | rt_mutex_adjust_prio(current); | ||
| 1138 | |||
| 1139 | return ret; | ||
| 1140 | } | ||
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index e124bf5800ea..97a2f81866af 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h | |||
| @@ -120,6 +120,14 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |||
| 120 | struct task_struct *proxy_owner); | 120 | struct task_struct *proxy_owner); |
| 121 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, | 121 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, |
| 122 | struct task_struct *proxy_owner); | 122 | struct task_struct *proxy_owner); |
| 123 | extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | ||
| 124 | struct rt_mutex_waiter *waiter, | ||
| 125 | struct task_struct *task, | ||
| 126 | int detect_deadlock); | ||
| 127 | extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | ||
| 128 | struct hrtimer_sleeper *to, | ||
| 129 | struct rt_mutex_waiter *waiter, | ||
| 130 | int detect_deadlock); | ||
| 123 | 131 | ||
| 124 | #ifdef CONFIG_DEBUG_RT_MUTEXES | 132 | #ifdef CONFIG_DEBUG_RT_MUTEXES |
| 125 | # include "rtmutex-debug.h" | 133 | # include "rtmutex-debug.h" |
