diff options
Diffstat (limited to 'kernel/futex.c')
| -rw-r--r-- | kernel/futex.c | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 5efa2f978032..9c9b2b6b22dd 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -8,6 +8,10 @@ | |||
| 8 | * Removed page pinning, fix privately mapped COW pages and other cleanups | 8 | * Removed page pinning, fix privately mapped COW pages and other cleanups |
| 9 | * (C) Copyright 2003, 2004 Jamie Lokier | 9 | * (C) Copyright 2003, 2004 Jamie Lokier |
| 10 | * | 10 | * |
| 11 | * Robust futex support started by Ingo Molnar | ||
| 12 | * (C) Copyright 2006 Red Hat Inc, All Rights Reserved | ||
| 13 | * Thanks to Thomas Gleixner for suggestions, analysis and fixes. | ||
| 14 | * | ||
| 11 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly | 15 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly |
| 12 | * enough at me, Linus for the original (flawed) idea, Matthew | 16 | * enough at me, Linus for the original (flawed) idea, Matthew |
| 13 | * Kirkwood for proof-of-concept implementation. | 17 | * Kirkwood for proof-of-concept implementation. |
| @@ -829,6 +833,172 @@ error: | |||
| 829 | goto out; | 833 | goto out; |
| 830 | } | 834 | } |
| 831 | 835 | ||
| 836 | /* | ||
| 837 | * Support for robust futexes: the kernel cleans up held futexes at | ||
| 838 | * thread exit time. | ||
| 839 | * | ||
| 840 | * Implementation: user-space maintains a per-thread list of locks it | ||
| 841 | * is holding. Upon do_exit(), the kernel carefully walks this list, | ||
| 842 | * and marks all locks that are owned by this thread with the | ||
| 843 | * FUTEX_OWNER_DEAD bit, and wakes up a waiter (if any). The list is | ||
| 844 | * always manipulated with the lock held, so the list is private and | ||
| 845 | * per-thread. Userspace also maintains a per-thread 'list_op_pending' | ||
| 846 | * field, to allow the kernel to clean up if the thread dies after | ||
| 847 | * acquiring the lock, but just before it could have added itself to | ||
| 848 | * the list. There can only be one such pending lock. | ||
| 849 | */ | ||
| 850 | |||
| 851 | /** | ||
| 852 | * sys_set_robust_list - set the robust-futex list head of a task | ||
| 853 | * @head: pointer to the list-head | ||
| 854 | * @len: length of the list-head, as userspace expects | ||
| 855 | */ | ||
| 856 | asmlinkage long | ||
| 857 | sys_set_robust_list(struct robust_list_head __user *head, | ||
| 858 | size_t len) | ||
| 859 | { | ||
| 860 | /* | ||
| 861 | * The kernel knows only one size for now: | ||
| 862 | */ | ||
| 863 | if (unlikely(len != sizeof(*head))) | ||
| 864 | return -EINVAL; | ||
| 865 | |||
| 866 | current->robust_list = head; | ||
| 867 | |||
| 868 | return 0; | ||
| 869 | } | ||
| 870 | |||
| 871 | /** | ||
| 872 | * sys_get_robust_list - get the robust-futex list head of a task | ||
| 873 | * @pid: pid of the process [zero for current task] | ||
| 874 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in | ||
| 875 | * @len_ptr: pointer to a length field, the kernel fills in the header size | ||
| 876 | */ | ||
| 877 | asmlinkage long | ||
| 878 | sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, | ||
| 879 | size_t __user *len_ptr) | ||
| 880 | { | ||
| 881 | struct robust_list_head *head; | ||
| 882 | unsigned long ret; | ||
| 883 | |||
| 884 | if (!pid) | ||
| 885 | head = current->robust_list; | ||
| 886 | else { | ||
| 887 | struct task_struct *p; | ||
| 888 | |||
| 889 | ret = -ESRCH; | ||
| 890 | read_lock(&tasklist_lock); | ||
| 891 | p = find_task_by_pid(pid); | ||
| 892 | if (!p) | ||
| 893 | goto err_unlock; | ||
| 894 | ret = -EPERM; | ||
| 895 | if ((current->euid != p->euid) && (current->euid != p->uid) && | ||
| 896 | !capable(CAP_SYS_PTRACE)) | ||
| 897 | goto err_unlock; | ||
| 898 | head = p->robust_list; | ||
| 899 | read_unlock(&tasklist_lock); | ||
| 900 | } | ||
| 901 | |||
| 902 | if (put_user(sizeof(*head), len_ptr)) | ||
| 903 | return -EFAULT; | ||
| 904 | return put_user(head, head_ptr); | ||
| 905 | |||
| 906 | err_unlock: | ||
| 907 | read_unlock(&tasklist_lock); | ||
| 908 | |||
| 909 | return ret; | ||
| 910 | } | ||
| 911 | |||
| 912 | /* | ||
| 913 | * Process a futex-list entry, check whether it's owned by the | ||
| 914 | * dying task, and do notification if so: | ||
| 915 | */ | ||
| 916 | int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) | ||
| 917 | { | ||
| 918 | u32 uval; | ||
| 919 | |||
| 920 | retry: | ||
| 921 | if (get_user(uval, uaddr)) | ||
| 922 | return -1; | ||
| 923 | |||
| 924 | if ((uval & FUTEX_TID_MASK) == curr->pid) { | ||
| 925 | /* | ||
| 926 | * Ok, this dying thread is truly holding a futex | ||
| 927 | * of interest. Set the OWNER_DIED bit atomically | ||
| 928 | * via cmpxchg, and if the value had FUTEX_WAITERS | ||
| 929 | * set, wake up a waiter (if any). (We have to do a | ||
| 930 | * futex_wake() even if OWNER_DIED is already set - | ||
| 931 | * to handle the rare but possible case of recursive | ||
| 932 | * thread-death.) The rest of the cleanup is done in | ||
| 933 | * userspace. | ||
| 934 | */ | ||
| 935 | if (futex_atomic_cmpxchg_inatomic(uaddr, uval, | ||
| 936 | uval | FUTEX_OWNER_DIED) != uval) | ||
| 937 | goto retry; | ||
| 938 | |||
| 939 | if (uval & FUTEX_WAITERS) | ||
| 940 | futex_wake((unsigned long)uaddr, 1); | ||
| 941 | } | ||
| 942 | return 0; | ||
| 943 | } | ||
| 944 | |||
| 945 | /* | ||
| 946 | * Walk curr->robust_list (very carefully, it's a userspace list!) | ||
| 947 | * and mark any locks found there dead, and notify any waiters. | ||
| 948 | * | ||
| 949 | * We silently return on any sign of list-walking problem. | ||
| 950 | */ | ||
| 951 | void exit_robust_list(struct task_struct *curr) | ||
| 952 | { | ||
| 953 | struct robust_list_head __user *head = curr->robust_list; | ||
| 954 | struct robust_list __user *entry, *pending; | ||
| 955 | unsigned int limit = ROBUST_LIST_LIMIT; | ||
| 956 | unsigned long futex_offset; | ||
| 957 | |||
| 958 | /* | ||
| 959 | * Fetch the list head (which was registered earlier, via | ||
| 960 | * sys_set_robust_list()): | ||
| 961 | */ | ||
| 962 | if (get_user(entry, &head->list.next)) | ||
| 963 | return; | ||
| 964 | /* | ||
| 965 | * Fetch the relative futex offset: | ||
| 966 | */ | ||
| 967 | if (get_user(futex_offset, &head->futex_offset)) | ||
| 968 | return; | ||
| 969 | /* | ||
| 970 | * Fetch any possibly pending lock-add first, and handle it | ||
| 971 | * if it exists: | ||
| 972 | */ | ||
| 973 | if (get_user(pending, &head->list_op_pending)) | ||
| 974 | return; | ||
| 975 | if (pending) | ||
| 976 | handle_futex_death((void *)pending + futex_offset, curr); | ||
| 977 | |||
| 978 | while (entry != &head->list) { | ||
| 979 | /* | ||
| 980 | * A pending lock might already be on the list, so | ||
| 981 | * dont process it twice: | ||
| 982 | */ | ||
| 983 | if (entry != pending) | ||
| 984 | if (handle_futex_death((void *)entry + futex_offset, | ||
| 985 | curr)) | ||
| 986 | return; | ||
| 987 | /* | ||
| 988 | * Fetch the next entry in the list: | ||
| 989 | */ | ||
| 990 | if (get_user(entry, &entry->next)) | ||
| 991 | return; | ||
| 992 | /* | ||
| 993 | * Avoid excessively long or circular lists: | ||
| 994 | */ | ||
| 995 | if (!--limit) | ||
| 996 | break; | ||
| 997 | |||
| 998 | cond_resched(); | ||
| 999 | } | ||
| 1000 | } | ||
| 1001 | |||
| 832 | long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, | 1002 | long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, |
| 833 | unsigned long uaddr2, int val2, int val3) | 1003 | unsigned long uaddr2, int val2, int val3) |
| 834 | { | 1004 | { |
