diff options
author | Ingo Molnar <mingo@elte.hu> | 2006-03-27 04:16:22 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-27 11:44:49 -0500 |
commit | 0771dfefc9e538f077d0b43b6dec19a5a67d0e70 (patch) | |
tree | 696267e69228b7406b337f9651dedc75055a589e /include/linux | |
parent | e9056f13bfcdd054a0c3d730e4e096748d8a363a (diff) |
[PATCH] lightweight robust futexes: core
Add the core infrastructure for robust futexes: structure definitions, the new
syscalls and the do_exit() based cleanup mechanism.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/futex.h | 95 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | include/linux/threads.h | 3 |
3 files changed, 100 insertions, 1 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h index 10f96c31971e..20face6b798d 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _LINUX_FUTEX_H | 1 | #ifndef _LINUX_FUTEX_H |
2 | #define _LINUX_FUTEX_H | 2 | #define _LINUX_FUTEX_H |
3 | 3 | ||
4 | #include <linux/sched.h> | ||
5 | |||
4 | /* Second argument to futex syscall */ | 6 | /* Second argument to futex syscall */ |
5 | 7 | ||
6 | 8 | ||
@@ -11,10 +13,103 @@ | |||
11 | #define FUTEX_CMP_REQUEUE 4 | 13 | #define FUTEX_CMP_REQUEUE 4 |
12 | #define FUTEX_WAKE_OP 5 | 14 | #define FUTEX_WAKE_OP 5 |
13 | 15 | ||
16 | /* | ||
17 | * Support for robust futexes: the kernel cleans up held futexes at | ||
18 | * thread exit time. | ||
19 | */ | ||
20 | |||
21 | /* | ||
22 | * Per-lock list entry - embedded in user-space locks, somewhere close | ||
23 | * to the futex field. (Note: user-space uses a double-linked list to | ||
24 | * achieve O(1) list add and remove, but the kernel only needs to know | ||
25 | * about the forward link) | ||
26 | * | ||
27 | * NOTE: this structure is part of the syscall ABI, and must not be | ||
28 | * changed. | ||
29 | */ | ||
30 | struct robust_list { | ||
31 | struct robust_list __user *next; | ||
32 | }; | ||
33 | |||
34 | /* | ||
35 | * Per-thread list head: | ||
36 | * | ||
37 | * NOTE: this structure is part of the syscall ABI, and must only be | ||
38 | * changed if the change is first communicated with the glibc folks. | ||
39 | * (When an incompatible change is done, we'll increase the structure | ||
40 | * size, which glibc will detect) | ||
41 | */ | ||
42 | struct robust_list_head { | ||
43 | /* | ||
44 | * The head of the list. Points back to itself if empty: | ||
45 | */ | ||
46 | struct robust_list list; | ||
47 | |||
48 | /* | ||
49 | * This relative offset is set by user-space, it gives the kernel | ||
50 | * the relative position of the futex field to examine. This way | ||
51 | * we keep userspace flexible, to freely shape its data-structure, | ||
52 | * without hardcoding any particular offset into the kernel: | ||
53 | */ | ||
54 | long futex_offset; | ||
55 | |||
56 | /* | ||
57 | * The death of the thread may race with userspace setting | ||
58 | * up a lock's links. So to handle this race, userspace first | ||
59 | * sets this field to the address of the to-be-taken lock, | ||
60 | * then does the lock acquire, and then adds itself to the | ||
61 | * list, and then clears this field. Hence the kernel will | ||
62 | * always have full knowledge of all locks that the thread | ||
63 | * _might_ have taken. We check the owner TID in any case, | ||
64 | * so only truly owned locks will be handled. | ||
65 | */ | ||
66 | struct robust_list __user *list_op_pending; | ||
67 | }; | ||
68 | |||
69 | /* | ||
70 | * Are there any waiters for this robust futex: | ||
71 | */ | ||
72 | #define FUTEX_WAITERS 0x80000000 | ||
73 | |||
74 | /* | ||
75 | * The kernel signals via this bit that a thread holding a futex | ||
76 | * has exited without unlocking the futex. The kernel also does | ||
77 | * a FUTEX_WAKE on such futexes, after setting the bit, to wake | ||
78 | * up any possible waiters: | ||
79 | */ | ||
80 | #define FUTEX_OWNER_DIED 0x40000000 | ||
81 | |||
82 | /* | ||
83 | * Reserved bit: | ||
84 | */ | ||
85 | #define FUTEX_OWNER_PENDING 0x20000000 | ||
86 | |||
87 | /* | ||
88 | * The rest of the robust-futex field is for the TID: | ||
89 | */ | ||
90 | #define FUTEX_TID_MASK 0x1fffffff | ||
91 | |||
92 | /* | ||
93 | * A limit of one million locks held per thread (!) ought to be enough | ||
94 | * for some time. This also protects against a deliberately circular | ||
95 | * list. Not worth introducing an rlimit for this: | ||
96 | */ | ||
97 | #define ROBUST_LIST_LIMIT 1048576 | ||
98 | |||
14 | long do_futex(unsigned long uaddr, int op, int val, | 99 | long do_futex(unsigned long uaddr, int op, int val, |
15 | unsigned long timeout, unsigned long uaddr2, int val2, | 100 | unsigned long timeout, unsigned long uaddr2, int val2, |
16 | int val3); | 101 | int val3); |
17 | 102 | ||
103 | extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr); | ||
104 | |||
105 | #ifdef CONFIG_FUTEX | ||
106 | extern void exit_robust_list(struct task_struct *curr); | ||
107 | #else | ||
108 | static inline void exit_robust_list(struct task_struct *curr) | ||
109 | { | ||
110 | } | ||
111 | #endif | ||
112 | |||
18 | #define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */ | 113 | #define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */ |
19 | #define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */ | 114 | #define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */ |
20 | #define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */ | 115 | #define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 036d14d2bf90..fd4848f2d750 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/topology.h> | 35 | #include <linux/topology.h> |
36 | #include <linux/seccomp.h> | 36 | #include <linux/seccomp.h> |
37 | #include <linux/rcupdate.h> | 37 | #include <linux/rcupdate.h> |
38 | #include <linux/futex.h> | ||
38 | 39 | ||
39 | #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */ | 40 | #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */ |
40 | 41 | ||
@@ -872,6 +873,8 @@ struct task_struct { | |||
872 | int cpuset_mems_generation; | 873 | int cpuset_mems_generation; |
873 | int cpuset_mem_spread_rotor; | 874 | int cpuset_mem_spread_rotor; |
874 | #endif | 875 | #endif |
876 | struct robust_list_head __user *robust_list; | ||
877 | |||
875 | atomic_t fs_excl; /* holding fs exclusive resources */ | 878 | atomic_t fs_excl; /* holding fs exclusive resources */ |
876 | struct rcu_head rcu; | 879 | struct rcu_head rcu; |
877 | }; | 880 | }; |
diff --git a/include/linux/threads.h b/include/linux/threads.h index b59738ac6197..e646bcdf2614 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h | |||
@@ -28,7 +28,8 @@ | |||
28 | #define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000) | 28 | #define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000) |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * A maximum of 4 million PIDs should be enough for a while: | 31 | * A maximum of 4 million PIDs should be enough for a while. |
32 | * [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.] | ||
32 | */ | 33 | */ |
33 | #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \ | 34 | #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \ |
34 | (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT)) | 35 | (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT)) |