diff options
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 192 |
1 files changed, 121 insertions, 71 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index c3b73b0311bc..6262f1534ac9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1311,13 +1311,15 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval, | |||
1311 | 1311 | ||
1312 | static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) | 1312 | static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) |
1313 | { | 1313 | { |
1314 | int err; | ||
1314 | u32 uninitialized_var(curval); | 1315 | u32 uninitialized_var(curval); |
1315 | 1316 | ||
1316 | if (unlikely(should_fail_futex(true))) | 1317 | if (unlikely(should_fail_futex(true))) |
1317 | return -EFAULT; | 1318 | return -EFAULT; |
1318 | 1319 | ||
1319 | if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) | 1320 | err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); |
1320 | return -EFAULT; | 1321 | if (unlikely(err)) |
1322 | return err; | ||
1321 | 1323 | ||
1322 | /* If user space value changed, let the caller retry */ | 1324 | /* If user space value changed, let the caller retry */ |
1323 | return curval != uval ? -EAGAIN : 0; | 1325 | return curval != uval ? -EAGAIN : 0; |
@@ -1502,10 +1504,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ | |||
1502 | if (unlikely(should_fail_futex(true))) | 1504 | if (unlikely(should_fail_futex(true))) |
1503 | ret = -EFAULT; | 1505 | ret = -EFAULT; |
1504 | 1506 | ||
1505 | if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { | 1507 | ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); |
1506 | ret = -EFAULT; | 1508 | if (!ret && (curval != uval)) { |
1507 | |||
1508 | } else if (curval != uval) { | ||
1509 | /* | 1509 | /* |
1510 | * If a unconditional UNLOCK_PI operation (user space did not | 1510 | * If a unconditional UNLOCK_PI operation (user space did not |
1511 | * try the TID->0 transition) raced with a waiter setting the | 1511 | * try the TID->0 transition) raced with a waiter setting the |
@@ -1700,32 +1700,32 @@ retry_private: | |||
1700 | double_lock_hb(hb1, hb2); | 1700 | double_lock_hb(hb1, hb2); |
1701 | op_ret = futex_atomic_op_inuser(op, uaddr2); | 1701 | op_ret = futex_atomic_op_inuser(op, uaddr2); |
1702 | if (unlikely(op_ret < 0)) { | 1702 | if (unlikely(op_ret < 0)) { |
1703 | |||
1704 | double_unlock_hb(hb1, hb2); | 1703 | double_unlock_hb(hb1, hb2); |
1705 | 1704 | ||
1706 | #ifndef CONFIG_MMU | 1705 | if (!IS_ENABLED(CONFIG_MMU) || |
1707 | /* | 1706 | unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { |
1708 | * we don't get EFAULT from MMU faults if we don't have an MMU, | 1707 | /* |
1709 | * but we might get them from range checking | 1708 | * we don't get EFAULT from MMU faults if we don't have |
1710 | */ | 1709 | * an MMU, but we might get them from range checking |
1711 | ret = op_ret; | 1710 | */ |
1712 | goto out_put_keys; | ||
1713 | #endif | ||
1714 | |||
1715 | if (unlikely(op_ret != -EFAULT)) { | ||
1716 | ret = op_ret; | 1711 | ret = op_ret; |
1717 | goto out_put_keys; | 1712 | goto out_put_keys; |
1718 | } | 1713 | } |
1719 | 1714 | ||
1720 | ret = fault_in_user_writeable(uaddr2); | 1715 | if (op_ret == -EFAULT) { |
1721 | if (ret) | 1716 | ret = fault_in_user_writeable(uaddr2); |
1722 | goto out_put_keys; | 1717 | if (ret) |
1718 | goto out_put_keys; | ||
1719 | } | ||
1723 | 1720 | ||
1724 | if (!(flags & FLAGS_SHARED)) | 1721 | if (!(flags & FLAGS_SHARED)) { |
1722 | cond_resched(); | ||
1725 | goto retry_private; | 1723 | goto retry_private; |
1724 | } | ||
1726 | 1725 | ||
1727 | put_futex_key(&key2); | 1726 | put_futex_key(&key2); |
1728 | put_futex_key(&key1); | 1727 | put_futex_key(&key1); |
1728 | cond_resched(); | ||
1729 | goto retry; | 1729 | goto retry; |
1730 | } | 1730 | } |
1731 | 1731 | ||
@@ -2350,7 +2350,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
2350 | u32 uval, uninitialized_var(curval), newval; | 2350 | u32 uval, uninitialized_var(curval), newval; |
2351 | struct task_struct *oldowner, *newowner; | 2351 | struct task_struct *oldowner, *newowner; |
2352 | u32 newtid; | 2352 | u32 newtid; |
2353 | int ret; | 2353 | int ret, err = 0; |
2354 | 2354 | ||
2355 | lockdep_assert_held(q->lock_ptr); | 2355 | lockdep_assert_held(q->lock_ptr); |
2356 | 2356 | ||
@@ -2421,14 +2421,17 @@ retry: | |||
2421 | if (!pi_state->owner) | 2421 | if (!pi_state->owner) |
2422 | newtid |= FUTEX_OWNER_DIED; | 2422 | newtid |= FUTEX_OWNER_DIED; |
2423 | 2423 | ||
2424 | if (get_futex_value_locked(&uval, uaddr)) | 2424 | err = get_futex_value_locked(&uval, uaddr); |
2425 | goto handle_fault; | 2425 | if (err) |
2426 | goto handle_err; | ||
2426 | 2427 | ||
2427 | for (;;) { | 2428 | for (;;) { |
2428 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | 2429 | newval = (uval & FUTEX_OWNER_DIED) | newtid; |
2429 | 2430 | ||
2430 | if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) | 2431 | err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); |
2431 | goto handle_fault; | 2432 | if (err) |
2433 | goto handle_err; | ||
2434 | |||
2432 | if (curval == uval) | 2435 | if (curval == uval) |
2433 | break; | 2436 | break; |
2434 | uval = curval; | 2437 | uval = curval; |
@@ -2456,23 +2459,37 @@ retry: | |||
2456 | return 0; | 2459 | return 0; |
2457 | 2460 | ||
2458 | /* | 2461 | /* |
2459 | * To handle the page fault we need to drop the locks here. That gives | 2462 | * In order to reschedule or handle a page fault, we need to drop the |
2460 | * the other task (either the highest priority waiter itself or the | 2463 | * locks here. In the case of a fault, this gives the other task |
2461 | * task which stole the rtmutex) the chance to try the fixup of the | 2464 | * (either the highest priority waiter itself or the task which stole |
2462 | * pi_state. So once we are back from handling the fault we need to | 2465 | * the rtmutex) the chance to try the fixup of the pi_state. So once we |
2463 | * check the pi_state after reacquiring the locks and before trying to | 2466 | * are back from handling the fault we need to check the pi_state after |
2464 | * do another fixup. When the fixup has been done already we simply | 2467 | * reacquiring the locks and before trying to do another fixup. When |
2465 | * return. | 2468 | * the fixup has been done already we simply return. |
2466 | * | 2469 | * |
2467 | * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely | 2470 | * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely |
2468 | * drop hb->lock since the caller owns the hb -> futex_q relation. | 2471 | * drop hb->lock since the caller owns the hb -> futex_q relation. |
2469 | * Dropping the pi_mutex->wait_lock requires the state revalidate. | 2472 | * Dropping the pi_mutex->wait_lock requires the state revalidate. |
2470 | */ | 2473 | */ |
2471 | handle_fault: | 2474 | handle_err: |
2472 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | 2475 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); |
2473 | spin_unlock(q->lock_ptr); | 2476 | spin_unlock(q->lock_ptr); |
2474 | 2477 | ||
2475 | ret = fault_in_user_writeable(uaddr); | 2478 | switch (err) { |
2479 | case -EFAULT: | ||
2480 | ret = fault_in_user_writeable(uaddr); | ||
2481 | break; | ||
2482 | |||
2483 | case -EAGAIN: | ||
2484 | cond_resched(); | ||
2485 | ret = 0; | ||
2486 | break; | ||
2487 | |||
2488 | default: | ||
2489 | WARN_ON_ONCE(1); | ||
2490 | ret = err; | ||
2491 | break; | ||
2492 | } | ||
2476 | 2493 | ||
2477 | spin_lock(q->lock_ptr); | 2494 | spin_lock(q->lock_ptr); |
2478 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | 2495 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); |
@@ -3041,10 +3058,8 @@ retry: | |||
3041 | * A unconditional UNLOCK_PI op raced against a waiter | 3058 | * A unconditional UNLOCK_PI op raced against a waiter |
3042 | * setting the FUTEX_WAITERS bit. Try again. | 3059 | * setting the FUTEX_WAITERS bit. Try again. |
3043 | */ | 3060 | */ |
3044 | if (ret == -EAGAIN) { | 3061 | if (ret == -EAGAIN) |
3045 | put_futex_key(&key); | 3062 | goto pi_retry; |
3046 | goto retry; | ||
3047 | } | ||
3048 | /* | 3063 | /* |
3049 | * wake_futex_pi has detected invalid state. Tell user | 3064 | * wake_futex_pi has detected invalid state. Tell user |
3050 | * space. | 3065 | * space. |
@@ -3059,9 +3074,19 @@ retry: | |||
3059 | * preserve the WAITERS bit not the OWNER_DIED one. We are the | 3074 | * preserve the WAITERS bit not the OWNER_DIED one. We are the |
3060 | * owner. | 3075 | * owner. |
3061 | */ | 3076 | */ |
3062 | if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) { | 3077 | if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) { |
3063 | spin_unlock(&hb->lock); | 3078 | spin_unlock(&hb->lock); |
3064 | goto pi_faulted; | 3079 | switch (ret) { |
3080 | case -EFAULT: | ||
3081 | goto pi_faulted; | ||
3082 | |||
3083 | case -EAGAIN: | ||
3084 | goto pi_retry; | ||
3085 | |||
3086 | default: | ||
3087 | WARN_ON_ONCE(1); | ||
3088 | goto out_putkey; | ||
3089 | } | ||
3065 | } | 3090 | } |
3066 | 3091 | ||
3067 | /* | 3092 | /* |
@@ -3075,6 +3100,11 @@ out_putkey: | |||
3075 | put_futex_key(&key); | 3100 | put_futex_key(&key); |
3076 | return ret; | 3101 | return ret; |
3077 | 3102 | ||
3103 | pi_retry: | ||
3104 | put_futex_key(&key); | ||
3105 | cond_resched(); | ||
3106 | goto retry; | ||
3107 | |||
3078 | pi_faulted: | 3108 | pi_faulted: |
3079 | put_futex_key(&key); | 3109 | put_futex_key(&key); |
3080 | 3110 | ||
@@ -3435,47 +3465,67 @@ err_unlock: | |||
3435 | static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) | 3465 | static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) |
3436 | { | 3466 | { |
3437 | u32 uval, uninitialized_var(nval), mval; | 3467 | u32 uval, uninitialized_var(nval), mval; |
3468 | int err; | ||
3469 | |||
3470 | /* Futex address must be 32bit aligned */ | ||
3471 | if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) | ||
3472 | return -1; | ||
3438 | 3473 | ||
3439 | retry: | 3474 | retry: |
3440 | if (get_user(uval, uaddr)) | 3475 | if (get_user(uval, uaddr)) |
3441 | return -1; | 3476 | return -1; |
3442 | 3477 | ||
3443 | if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) { | 3478 | if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) |
3444 | /* | 3479 | return 0; |
3445 | * Ok, this dying thread is truly holding a futex | 3480 | |
3446 | * of interest. Set the OWNER_DIED bit atomically | 3481 | /* |
3447 | * via cmpxchg, and if the value had FUTEX_WAITERS | 3482 | * Ok, this dying thread is truly holding a futex |
3448 | * set, wake up a waiter (if any). (We have to do a | 3483 | * of interest. Set the OWNER_DIED bit atomically |
3449 | * futex_wake() even if OWNER_DIED is already set - | 3484 | * via cmpxchg, and if the value had FUTEX_WAITERS |
3450 | * to handle the rare but possible case of recursive | 3485 | * set, wake up a waiter (if any). (We have to do a |
3451 | * thread-death.) The rest of the cleanup is done in | 3486 | * futex_wake() even if OWNER_DIED is already set - |
3452 | * userspace. | 3487 | * to handle the rare but possible case of recursive |
3453 | */ | 3488 | * thread-death.) The rest of the cleanup is done in |
3454 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; | 3489 | * userspace. |
3455 | /* | 3490 | */ |
3456 | * We are not holding a lock here, but we want to have | 3491 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; |
3457 | * the pagefault_disable/enable() protection because | 3492 | |
3458 | * we want to handle the fault gracefully. If the | 3493 | /* |
3459 | * access fails we try to fault in the futex with R/W | 3494 | * We are not holding a lock here, but we want to have |
3460 | * verification via get_user_pages. get_user() above | 3495 | * the pagefault_disable/enable() protection because |
3461 | * does not guarantee R/W access. If that fails we | 3496 | * we want to handle the fault gracefully. If the |
3462 | * give up and leave the futex locked. | 3497 | * access fails we try to fault in the futex with R/W |
3463 | */ | 3498 | * verification via get_user_pages. get_user() above |
3464 | if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) { | 3499 | * does not guarantee R/W access. If that fails we |
3500 | * give up and leave the futex locked. | ||
3501 | */ | ||
3502 | if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) { | ||
3503 | switch (err) { | ||
3504 | case -EFAULT: | ||
3465 | if (fault_in_user_writeable(uaddr)) | 3505 | if (fault_in_user_writeable(uaddr)) |
3466 | return -1; | 3506 | return -1; |
3467 | goto retry; | 3507 | goto retry; |
3468 | } | 3508 | |
3469 | if (nval != uval) | 3509 | case -EAGAIN: |
3510 | cond_resched(); | ||
3470 | goto retry; | 3511 | goto retry; |
3471 | 3512 | ||
3472 | /* | 3513 | default: |
3473 | * Wake robust non-PI futexes here. The wakeup of | 3514 | WARN_ON_ONCE(1); |
3474 | * PI futexes happens in exit_pi_state(): | 3515 | return err; |
3475 | */ | 3516 | } |
3476 | if (!pi && (uval & FUTEX_WAITERS)) | ||
3477 | futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); | ||
3478 | } | 3517 | } |
3518 | |||
3519 | if (nval != uval) | ||
3520 | goto retry; | ||
3521 | |||
3522 | /* | ||
3523 | * Wake robust non-PI futexes here. The wakeup of | ||
3524 | * PI futexes happens in exit_pi_state(): | ||
3525 | */ | ||
3526 | if (!pi && (uval & FUTEX_WAITERS)) | ||
3527 | futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); | ||
3528 | |||
3479 | return 0; | 3529 | return 0; |
3480 | } | 3530 | } |
3481 | 3531 | ||