diff options
Diffstat (limited to 'kernel/futex.c')
| -rw-r--r-- | kernel/futex.c | 85 |
1 files changed, 42 insertions, 43 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index e7a35f1039e7..6c683b37f2ce 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -91,6 +91,7 @@ struct futex_pi_state { | |||
| 91 | 91 | ||
| 92 | /** | 92 | /** |
| 93 | * struct futex_q - The hashed futex queue entry, one per waiting task | 93 | * struct futex_q - The hashed futex queue entry, one per waiting task |
| 94 | * @list: priority-sorted list of tasks waiting on this futex | ||
| 94 | * @task: the task waiting on the futex | 95 | * @task: the task waiting on the futex |
| 95 | * @lock_ptr: the hash bucket lock | 96 | * @lock_ptr: the hash bucket lock |
| 96 | * @key: the key the futex is hashed on | 97 | * @key: the key the futex is hashed on |
| @@ -104,7 +105,7 @@ struct futex_pi_state { | |||
| 104 | * | 105 | * |
| 105 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 106 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
| 106 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. | 107 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
| 107 | * The order of wakup is always to make the first condition true, then | 108 | * The order of wakeup is always to make the first condition true, then |
| 108 | * the second. | 109 | * the second. |
| 109 | * | 110 | * |
| 110 | * PI futexes are typically woken before they are removed from the hash list via | 111 | * PI futexes are typically woken before they are removed from the hash list via |
| @@ -168,7 +169,7 @@ static void get_futex_key_refs(union futex_key *key) | |||
| 168 | 169 | ||
| 169 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | 170 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { |
| 170 | case FUT_OFF_INODE: | 171 | case FUT_OFF_INODE: |
| 171 | atomic_inc(&key->shared.inode->i_count); | 172 | ihold(key->shared.inode); |
| 172 | break; | 173 | break; |
| 173 | case FUT_OFF_MMSHARED: | 174 | case FUT_OFF_MMSHARED: |
| 174 | atomic_inc(&key->private.mm->mm_count); | 175 | atomic_inc(&key->private.mm->mm_count); |
| @@ -295,7 +296,7 @@ void put_futex_key(int fshared, union futex_key *key) | |||
| 295 | * Slow path to fixup the fault we just took in the atomic write | 296 | * Slow path to fixup the fault we just took in the atomic write |
| 296 | * access to @uaddr. | 297 | * access to @uaddr. |
| 297 | * | 298 | * |
| 298 | * We have no generic implementation of a non destructive write to the | 299 | * We have no generic implementation of a non-destructive write to the |
| 299 | * user address. We know that we faulted in the atomic pagefault | 300 | * user address. We know that we faulted in the atomic pagefault |
| 300 | * disabled section so we can as well avoid the #PF overhead by | 301 | * disabled section so we can as well avoid the #PF overhead by |
| 301 | * calling get_user_pages() right away. | 302 | * calling get_user_pages() right away. |
| @@ -429,20 +430,11 @@ static void free_pi_state(struct futex_pi_state *pi_state) | |||
| 429 | static struct task_struct * futex_find_get_task(pid_t pid) | 430 | static struct task_struct * futex_find_get_task(pid_t pid) |
| 430 | { | 431 | { |
| 431 | struct task_struct *p; | 432 | struct task_struct *p; |
| 432 | const struct cred *cred = current_cred(), *pcred; | ||
| 433 | 433 | ||
| 434 | rcu_read_lock(); | 434 | rcu_read_lock(); |
| 435 | p = find_task_by_vpid(pid); | 435 | p = find_task_by_vpid(pid); |
| 436 | if (!p) { | 436 | if (p) |
| 437 | p = ERR_PTR(-ESRCH); | 437 | get_task_struct(p); |
| 438 | } else { | ||
| 439 | pcred = __task_cred(p); | ||
| 440 | if (cred->euid != pcred->euid && | ||
| 441 | cred->euid != pcred->uid) | ||
| 442 | p = ERR_PTR(-ESRCH); | ||
| 443 | else | ||
| 444 | get_task_struct(p); | ||
| 445 | } | ||
| 446 | 438 | ||
| 447 | rcu_read_unlock(); | 439 | rcu_read_unlock(); |
| 448 | 440 | ||
| @@ -524,7 +516,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 524 | */ | 516 | */ |
| 525 | pi_state = this->pi_state; | 517 | pi_state = this->pi_state; |
| 526 | /* | 518 | /* |
| 527 | * Userspace might have messed up non PI and PI futexes | 519 | * Userspace might have messed up non-PI and PI futexes |
| 528 | */ | 520 | */ |
| 529 | if (unlikely(!pi_state)) | 521 | if (unlikely(!pi_state)) |
| 530 | return -EINVAL; | 522 | return -EINVAL; |
| @@ -564,8 +556,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 564 | if (!pid) | 556 | if (!pid) |
| 565 | return -ESRCH; | 557 | return -ESRCH; |
| 566 | p = futex_find_get_task(pid); | 558 | p = futex_find_get_task(pid); |
| 567 | if (IS_ERR(p)) | 559 | if (!p) |
| 568 | return PTR_ERR(p); | 560 | return -ESRCH; |
| 569 | 561 | ||
| 570 | /* | 562 | /* |
| 571 | * We need to look at the task state flags to figure out, | 563 | * We need to look at the task state flags to figure out, |
| @@ -745,8 +737,8 @@ static void wake_futex(struct futex_q *q) | |||
| 745 | 737 | ||
| 746 | /* | 738 | /* |
| 747 | * We set q->lock_ptr = NULL _before_ we wake up the task. If | 739 | * We set q->lock_ptr = NULL _before_ we wake up the task. If |
| 748 | * a non futex wake up happens on another CPU then the task | 740 | * a non-futex wake up happens on another CPU then the task |
| 749 | * might exit and p would dereference a non existing task | 741 | * might exit and p would dereference a non-existing task |
| 750 | * struct. Prevent this by holding a reference on p across the | 742 | * struct. Prevent this by holding a reference on p across the |
| 751 | * wake up. | 743 | * wake up. |
| 752 | */ | 744 | */ |
| @@ -1140,11 +1132,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
| 1140 | 1132 | ||
| 1141 | /** | 1133 | /** |
| 1142 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 | 1134 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 |
| 1143 | * uaddr1: source futex user address | 1135 | * @uaddr1: source futex user address |
| 1144 | * uaddr2: target futex user address | 1136 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
| 1145 | * nr_wake: number of waiters to wake (must be 1 for requeue_pi) | 1137 | * @uaddr2: target futex user address |
| 1146 | * nr_requeue: number of waiters to requeue (0-INT_MAX) | 1138 | * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) |
| 1147 | * requeue_pi: if we are attempting to requeue from a non-pi futex to a | 1139 | * @nr_requeue: number of waiters to requeue (0-INT_MAX) |
| 1140 | * @cmpval: @uaddr1 expected value (or %NULL) | ||
| 1141 | * @requeue_pi: if we are attempting to requeue from a non-pi futex to a | ||
| 1148 | * pi futex (pi to pi requeue is not supported) | 1142 | * pi futex (pi to pi requeue is not supported) |
| 1149 | * | 1143 | * |
| 1150 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire | 1144 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire |
| @@ -1369,10 +1363,10 @@ out: | |||
| 1369 | 1363 | ||
| 1370 | /* The key must be already stored in q->key. */ | 1364 | /* The key must be already stored in q->key. */ |
| 1371 | static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | 1365 | static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) |
| 1366 | __acquires(&hb->lock) | ||
| 1372 | { | 1367 | { |
| 1373 | struct futex_hash_bucket *hb; | 1368 | struct futex_hash_bucket *hb; |
| 1374 | 1369 | ||
| 1375 | get_futex_key_refs(&q->key); | ||
| 1376 | hb = hash_futex(&q->key); | 1370 | hb = hash_futex(&q->key); |
| 1377 | q->lock_ptr = &hb->lock; | 1371 | q->lock_ptr = &hb->lock; |
| 1378 | 1372 | ||
| @@ -1382,9 +1376,9 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
| 1382 | 1376 | ||
| 1383 | static inline void | 1377 | static inline void |
| 1384 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | 1378 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) |
| 1379 | __releases(&hb->lock) | ||
| 1385 | { | 1380 | { |
| 1386 | spin_unlock(&hb->lock); | 1381 | spin_unlock(&hb->lock); |
| 1387 | drop_futex_key_refs(&q->key); | ||
| 1388 | } | 1382 | } |
| 1389 | 1383 | ||
| 1390 | /** | 1384 | /** |
| @@ -1400,6 +1394,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | |||
| 1400 | * an example). | 1394 | * an example). |
| 1401 | */ | 1395 | */ |
| 1402 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1396 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
| 1397 | __releases(&hb->lock) | ||
| 1403 | { | 1398 | { |
| 1404 | int prio; | 1399 | int prio; |
| 1405 | 1400 | ||
| @@ -1480,6 +1475,7 @@ retry: | |||
| 1480 | * and dropped here. | 1475 | * and dropped here. |
| 1481 | */ | 1476 | */ |
| 1482 | static void unqueue_me_pi(struct futex_q *q) | 1477 | static void unqueue_me_pi(struct futex_q *q) |
| 1478 | __releases(q->lock_ptr) | ||
| 1483 | { | 1479 | { |
| 1484 | WARN_ON(plist_node_empty(&q->list)); | 1480 | WARN_ON(plist_node_empty(&q->list)); |
| 1485 | plist_del(&q->list, &q->list.plist); | 1481 | plist_del(&q->list, &q->list.plist); |
| @@ -1489,8 +1485,6 @@ static void unqueue_me_pi(struct futex_q *q) | |||
| 1489 | q->pi_state = NULL; | 1485 | q->pi_state = NULL; |
| 1490 | 1486 | ||
| 1491 | spin_unlock(q->lock_ptr); | 1487 | spin_unlock(q->lock_ptr); |
| 1492 | |||
| 1493 | drop_futex_key_refs(&q->key); | ||
| 1494 | } | 1488 | } |
| 1495 | 1489 | ||
| 1496 | /* | 1490 | /* |
| @@ -1821,7 +1815,10 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
| 1821 | } | 1815 | } |
| 1822 | 1816 | ||
| 1823 | retry: | 1817 | retry: |
| 1824 | /* Prepare to wait on uaddr. */ | 1818 | /* |
| 1819 | * Prepare to wait on uaddr. On success, holds hb lock and increments | ||
| 1820 | * q.key refs. | ||
| 1821 | */ | ||
| 1825 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 1822 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
| 1826 | if (ret) | 1823 | if (ret) |
| 1827 | goto out; | 1824 | goto out; |
| @@ -1831,28 +1828,27 @@ retry: | |||
| 1831 | 1828 | ||
| 1832 | /* If we were woken (and unqueued), we succeeded, whatever. */ | 1829 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
| 1833 | ret = 0; | 1830 | ret = 0; |
| 1831 | /* unqueue_me() drops q.key ref */ | ||
| 1834 | if (!unqueue_me(&q)) | 1832 | if (!unqueue_me(&q)) |
| 1835 | goto out_put_key; | 1833 | goto out; |
| 1836 | ret = -ETIMEDOUT; | 1834 | ret = -ETIMEDOUT; |
| 1837 | if (to && !to->task) | 1835 | if (to && !to->task) |
| 1838 | goto out_put_key; | 1836 | goto out; |
| 1839 | 1837 | ||
| 1840 | /* | 1838 | /* |
| 1841 | * We expect signal_pending(current), but we might be the | 1839 | * We expect signal_pending(current), but we might be the |
| 1842 | * victim of a spurious wakeup as well. | 1840 | * victim of a spurious wakeup as well. |
| 1843 | */ | 1841 | */ |
| 1844 | if (!signal_pending(current)) { | 1842 | if (!signal_pending(current)) |
| 1845 | put_futex_key(fshared, &q.key); | ||
| 1846 | goto retry; | 1843 | goto retry; |
| 1847 | } | ||
| 1848 | 1844 | ||
| 1849 | ret = -ERESTARTSYS; | 1845 | ret = -ERESTARTSYS; |
| 1850 | if (!abs_time) | 1846 | if (!abs_time) |
| 1851 | goto out_put_key; | 1847 | goto out; |
| 1852 | 1848 | ||
| 1853 | restart = ¤t_thread_info()->restart_block; | 1849 | restart = ¤t_thread_info()->restart_block; |
| 1854 | restart->fn = futex_wait_restart; | 1850 | restart->fn = futex_wait_restart; |
| 1855 | restart->futex.uaddr = (u32 *)uaddr; | 1851 | restart->futex.uaddr = uaddr; |
| 1856 | restart->futex.val = val; | 1852 | restart->futex.val = val; |
| 1857 | restart->futex.time = abs_time->tv64; | 1853 | restart->futex.time = abs_time->tv64; |
| 1858 | restart->futex.bitset = bitset; | 1854 | restart->futex.bitset = bitset; |
| @@ -1865,8 +1861,6 @@ retry: | |||
| 1865 | 1861 | ||
| 1866 | ret = -ERESTART_RESTARTBLOCK; | 1862 | ret = -ERESTART_RESTARTBLOCK; |
| 1867 | 1863 | ||
| 1868 | out_put_key: | ||
| 1869 | put_futex_key(fshared, &q.key); | ||
| 1870 | out: | 1864 | out: |
| 1871 | if (to) { | 1865 | if (to) { |
| 1872 | hrtimer_cancel(&to->timer); | 1866 | hrtimer_cancel(&to->timer); |
| @@ -1878,7 +1872,7 @@ out: | |||
| 1878 | 1872 | ||
| 1879 | static long futex_wait_restart(struct restart_block *restart) | 1873 | static long futex_wait_restart(struct restart_block *restart) |
| 1880 | { | 1874 | { |
| 1881 | u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; | 1875 | u32 __user *uaddr = restart->futex.uaddr; |
| 1882 | int fshared = 0; | 1876 | int fshared = 0; |
| 1883 | ktime_t t, *tp = NULL; | 1877 | ktime_t t, *tp = NULL; |
| 1884 | 1878 | ||
| @@ -2245,7 +2239,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
| 2245 | q.rt_waiter = &rt_waiter; | 2239 | q.rt_waiter = &rt_waiter; |
| 2246 | q.requeue_pi_key = &key2; | 2240 | q.requeue_pi_key = &key2; |
| 2247 | 2241 | ||
| 2248 | /* Prepare to wait on uaddr. */ | 2242 | /* |
| 2243 | * Prepare to wait on uaddr. On success, increments q.key (key1) ref | ||
| 2244 | * count. | ||
| 2245 | */ | ||
| 2249 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 2246 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
| 2250 | if (ret) | 2247 | if (ret) |
| 2251 | goto out_key2; | 2248 | goto out_key2; |
| @@ -2263,7 +2260,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
| 2263 | * In order for us to be here, we know our q.key == key2, and since | 2260 | * In order for us to be here, we know our q.key == key2, and since |
| 2264 | * we took the hb->lock above, we also know that futex_requeue() has | 2261 | * we took the hb->lock above, we also know that futex_requeue() has |
| 2265 | * completed and we no longer have to concern ourselves with a wakeup | 2262 | * completed and we no longer have to concern ourselves with a wakeup |
| 2266 | * race with the atomic proxy lock acquition by the requeue code. | 2263 | * race with the atomic proxy lock acquisition by the requeue code. The |
| 2264 | * futex_requeue dropped our key1 reference and incremented our key2 | ||
| 2265 | * reference count. | ||
| 2267 | */ | 2266 | */ |
| 2268 | 2267 | ||
| 2269 | /* Check if the requeue code acquired the second futex for us. */ | 2268 | /* Check if the requeue code acquired the second futex for us. */ |
| @@ -2467,7 +2466,7 @@ retry: | |||
| 2467 | */ | 2466 | */ |
| 2468 | static inline int fetch_robust_entry(struct robust_list __user **entry, | 2467 | static inline int fetch_robust_entry(struct robust_list __user **entry, |
| 2469 | struct robust_list __user * __user *head, | 2468 | struct robust_list __user * __user *head, |
| 2470 | int *pi) | 2469 | unsigned int *pi) |
| 2471 | { | 2470 | { |
| 2472 | unsigned long uentry; | 2471 | unsigned long uentry; |
| 2473 | 2472 | ||
| @@ -2656,7 +2655,7 @@ static int __init futex_init(void) | |||
| 2656 | * of the complex code paths. Also we want to prevent | 2655 | * of the complex code paths. Also we want to prevent |
| 2657 | * registration of robust lists in that case. NULL is | 2656 | * registration of robust lists in that case. NULL is |
| 2658 | * guaranteed to fault and we get -EFAULT on functional | 2657 | * guaranteed to fault and we get -EFAULT on functional |
| 2659 | * implementation, the non functional ones will return | 2658 | * implementation, the non-functional ones will return |
| 2660 | * -ENOSYS. | 2659 | * -ENOSYS. |
| 2661 | */ | 2660 | */ |
| 2662 | curval = cmpxchg_futex_value_locked(NULL, 0, 0); | 2661 | curval = cmpxchg_futex_value_locked(NULL, 0, 0); |
