aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2006-03-27 04:16:22 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-27 11:44:49 -0500
commit0771dfefc9e538f077d0b43b6dec19a5a67d0e70 (patch)
tree696267e69228b7406b337f9651dedc75055a589e /kernel
parente9056f13bfcdd054a0c3d730e4e096748d8a363a (diff)
[PATCH] lightweight robust futexes: core
Add the core infrastructure for robust futexes: structure definitions, the new syscalls and the do_exit() based cleanup mechanism. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Arjan van de Ven <arjan@infradead.org> Acked-by: Ulrich Drepper <drepper@redhat.com> Cc: Michael Kerrisk <mtk-manpages@gmx.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/futex.c172
-rw-r--r--kernel/sys_ni.c4
3 files changed, 179 insertions, 0 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 8037405e136e..aecb48ca7370 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -31,6 +31,7 @@
31#include <linux/signal.h> 31#include <linux/signal.h>
32#include <linux/cn_proc.h> 32#include <linux/cn_proc.h>
33#include <linux/mutex.h> 33#include <linux/mutex.h>
34#include <linux/futex.h>
34 35
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36#include <asm/unistd.h> 37#include <asm/unistd.h>
@@ -852,6 +853,8 @@ fastcall NORET_TYPE void do_exit(long code)
852 exit_itimers(tsk->signal); 853 exit_itimers(tsk->signal);
853 acct_process(code); 854 acct_process(code);
854 } 855 }
856 if (unlikely(tsk->robust_list))
857 exit_robust_list(tsk);
855 exit_mm(tsk); 858 exit_mm(tsk);
856 859
857 exit_sem(tsk); 860 exit_sem(tsk);
diff --git a/kernel/futex.c b/kernel/futex.c
index 5efa2f978032..feb724b2554e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -8,6 +8,10 @@
8 * Removed page pinning, fix privately mapped COW pages and other cleanups 8 * Removed page pinning, fix privately mapped COW pages and other cleanups
9 * (C) Copyright 2003, 2004 Jamie Lokier 9 * (C) Copyright 2003, 2004 Jamie Lokier
10 * 10 *
11 * Robust futex support started by Ingo Molnar
12 * (C) Copyright 2006 Red Hat Inc, All Rights Reserved
13 * Thanks to Thomas Gleixner for suggestions, analysis and fixes.
14 *
11 * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly 15 * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
12 * enough at me, Linus for the original (flawed) idea, Matthew 16 * enough at me, Linus for the original (flawed) idea, Matthew
13 * Kirkwood for proof-of-concept implementation. 17 * Kirkwood for proof-of-concept implementation.
@@ -829,6 +833,174 @@ error:
829 goto out; 833 goto out;
830} 834}
831 835
836/*
837 * Support for robust futexes: the kernel cleans up held futexes at
838 * thread exit time.
839 *
840 * Implementation: user-space maintains a per-thread list of locks it
841 * is holding. Upon do_exit(), the kernel carefully walks this list,
842 * and marks all locks that are owned by this thread with the
843 * FUTEX_OWNER_DEAD bit, and wakes up a waiter (if any). The list is
844 * always manipulated with the lock held, so the list is private and
845 * per-thread. Userspace also maintains a per-thread 'list_op_pending'
846 * field, to allow the kernel to clean up if the thread dies after
847 * acquiring the lock, but just before it could have added itself to
848 * the list. There can only be one such pending lock.
849 */
850
851/**
852 * sys_set_robust_list - set the robust-futex list head of a task
853 * @head: pointer to the list-head
854 * @len: length of the list-head, as userspace expects
855 */
856asmlinkage long
857sys_set_robust_list(struct robust_list_head __user *head,
858 size_t len)
859{
860 /*
861 * The kernel knows only one size for now:
862 */
863 if (unlikely(len != sizeof(*head)))
864 return -EINVAL;
865
866 current->robust_list = head;
867
868 return 0;
869}
870
871/**
872 * sys_get_robust_list - get the robust-futex list head of a task
873 * @pid: pid of the process [zero for current task]
874 * @head_ptr: pointer to a list-head pointer, the kernel fills it in
875 * @len_ptr: pointer to a length field, the kernel fills in the header size
876 */
877asmlinkage long
878sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
879 size_t __user *len_ptr)
880{
881 struct robust_list_head *head;
882 unsigned long ret;
883
884 if (!pid)
885 head = current->robust_list;
886 else {
887 struct task_struct *p;
888
889 ret = -ESRCH;
890 read_lock(&tasklist_lock);
891 p = find_task_by_pid(pid);
892 if (!p)
893 goto err_unlock;
894 ret = -EPERM;
895 if ((current->euid != p->euid) && (current->euid != p->uid) &&
896 !capable(CAP_SYS_PTRACE))
897 goto err_unlock;
898 head = p->robust_list;
899 read_unlock(&tasklist_lock);
900 }
901
902 if (put_user(sizeof(*head), len_ptr))
903 return -EFAULT;
904 return put_user(head, head_ptr);
905
906err_unlock:
907 read_unlock(&tasklist_lock);
908
909 return ret;
910}
911
912/*
913 * Process a futex-list entry, check whether it's owned by the
914 * dying task, and do notification if so:
915 */
916int handle_futex_death(unsigned int *uaddr, struct task_struct *curr)
917{
918 unsigned int futex_val;
919
920repeat:
921 if (get_user(futex_val, uaddr))
922 return -1;
923
924 if ((futex_val & FUTEX_TID_MASK) == curr->pid) {
925 /*
926 * Ok, this dying thread is truly holding a futex
927 * of interest. Set the OWNER_DIED bit atomically
928 * via cmpxchg, and if the value had FUTEX_WAITERS
929 * set, wake up a waiter (if any). (We have to do a
930 * futex_wake() even if OWNER_DIED is already set -
931 * to handle the rare but possible case of recursive
932 * thread-death.) The rest of the cleanup is done in
933 * userspace.
934 */
935 if (futex_atomic_cmpxchg_inuser(uaddr, futex_val,
936 futex_val | FUTEX_OWNER_DIED) !=
937 futex_val)
938 goto repeat;
939
940 if (futex_val & FUTEX_WAITERS)
941 futex_wake((unsigned long)uaddr, 1);
942 }
943 return 0;
944}
945
946/*
947 * Walk curr->robust_list (very carefully, it's a userspace list!)
948 * and mark any locks found there dead, and notify any waiters.
949 *
950 * We silently return on any sign of list-walking problem.
951 */
952void exit_robust_list(struct task_struct *curr)
953{
954 struct robust_list_head __user *head = curr->robust_list;
955 struct robust_list __user *entry, *pending;
956 unsigned int limit = ROBUST_LIST_LIMIT;
957 unsigned long futex_offset;
958
959 /*
960 * Fetch the list head (which was registered earlier, via
961 * sys_set_robust_list()):
962 */
963 if (get_user(entry, &head->list.next))
964 return;
965 /*
966 * Fetch the relative futex offset:
967 */
968 if (get_user(futex_offset, &head->futex_offset))
969 return;
970 /*
971 * Fetch any possibly pending lock-add first, and handle it
972 * if it exists:
973 */
974 if (get_user(pending, &head->list_op_pending))
975 return;
976 if (pending)
977 handle_futex_death((void *)pending + futex_offset, curr);
978
979 while (entry != &head->list) {
980 /*
981 * A pending lock might already be on the list, so
982 * dont process it twice:
983 */
984 if (entry != pending)
985 if (handle_futex_death((void *)entry + futex_offset,
986 curr))
987 return;
988
989 /*
990 * Fetch the next entry in the list:
991 */
992 if (get_user(entry, &entry->next))
993 return;
994 /*
995 * Avoid excessively long or circular lists:
996 */
997 if (!--limit)
998 break;
999
1000 cond_resched();
1001 }
1002}
1003
832long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, 1004long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
833 unsigned long uaddr2, int val2, int val3) 1005 unsigned long uaddr2, int val2, int val3)
834{ 1006{
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 1067090db6b1..d82864c4a617 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -42,6 +42,10 @@ cond_syscall(sys_recvmsg);
42cond_syscall(sys_socketcall); 42cond_syscall(sys_socketcall);
43cond_syscall(sys_futex); 43cond_syscall(sys_futex);
44cond_syscall(compat_sys_futex); 44cond_syscall(compat_sys_futex);
45cond_syscall(sys_set_robust_list);
46cond_syscall(compat_sys_set_robust_list);
47cond_syscall(sys_get_robust_list);
48cond_syscall(compat_sys_get_robust_list);
45cond_syscall(sys_epoll_create); 49cond_syscall(sys_epoll_create);
46cond_syscall(sys_epoll_ctl); 50cond_syscall(sys_epoll_ctl);
47cond_syscall(sys_epoll_wait); 51cond_syscall(sys_epoll_wait);