diff options
author | Pierre Peiffer <pierre.peiffer@bull.net> | 2007-05-09 05:35:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 15:30:55 -0400 |
commit | c19384b5b296905d4988c7c684ff540a0f9d65be (patch) | |
tree | 071cfe0855d409d63de80ec1b9b663738efb09de | |
parent | ec92d08292d3e9b0823eba138a4564d2d39f25c7 (diff) |
Make futex_wait() use an hrtimer for timeout
This patch modifies futex_wait() to use an hrtimer + schedule() in place of
schedule_timeout().
schedule_timeout() is tick based, therefore the timeout granularity is the
tick (1 ms, 4 ms or 10 ms depending on HZ). By using a high resolution timer
for timeout wakeup, we can attain a much finer timeout granularity (in the
microsecond range). This parallels what is already done for futex_lock_pi().
The timeout passed to the syscall is no longer converted to jiffies and is
therefore passed to do_futex() and futex_wait() as an absolute ktime_t
therefore keeping nanosecond resolution.
Also this removes the need to pass the nanoseconds timeout part to
futex_lock_pi() in val2.
In futex_wait(), if there is no timeout then a regular schedule() is
performed. Otherwise, an hrtimer is fired before schedule() is called.
[akpm@linux-foundation.org: fix `make headers_check']
Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Signed-off-by: Pierre Peiffer <pierre.peiffer@bull.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/futex.h | 4 | ||||
-rw-r--r-- | kernel/futex.c | 89 | ||||
-rw-r--r-- | kernel/futex_compat.c | 19 |
3 files changed, 57 insertions, 55 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h index 820125c628c1..34e54f2b8997 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | 5 | ||
6 | union ktime; | ||
7 | |||
6 | /* Second argument to futex syscall */ | 8 | /* Second argument to futex syscall */ |
7 | 9 | ||
8 | 10 | ||
@@ -94,7 +96,7 @@ struct robust_list_head { | |||
94 | #define ROBUST_LIST_LIMIT 2048 | 96 | #define ROBUST_LIST_LIMIT 2048 |
95 | 97 | ||
96 | #ifdef __KERNEL__ | 98 | #ifdef __KERNEL__ |
97 | long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, | 99 | long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, |
98 | u32 __user *uaddr2, u32 val2, u32 val3); | 100 | u32 __user *uaddr2, u32 val2, u32 val3); |
99 | 101 | ||
100 | extern int | 102 | extern int |
diff --git a/kernel/futex.c b/kernel/futex.c index 685ee2362a5e..e1246ccbf89a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1001,16 +1001,16 @@ static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb) | |||
1001 | } | 1001 | } |
1002 | 1002 | ||
1003 | static long futex_wait_restart(struct restart_block *restart); | 1003 | static long futex_wait_restart(struct restart_block *restart); |
1004 | static int futex_wait_abstime(u32 __user *uaddr, u32 val, | 1004 | static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) |
1005 | int timed, unsigned long abs_time) | ||
1006 | { | 1005 | { |
1007 | struct task_struct *curr = current; | 1006 | struct task_struct *curr = current; |
1008 | DECLARE_WAITQUEUE(wait, curr); | 1007 | DECLARE_WAITQUEUE(wait, curr); |
1009 | struct futex_hash_bucket *hb; | 1008 | struct futex_hash_bucket *hb; |
1010 | struct futex_q q; | 1009 | struct futex_q q; |
1011 | unsigned long time_left = 0; | ||
1012 | u32 uval; | 1010 | u32 uval; |
1013 | int ret; | 1011 | int ret; |
1012 | struct hrtimer_sleeper t; | ||
1013 | int rem = 0; | ||
1014 | 1014 | ||
1015 | q.pi_state = NULL; | 1015 | q.pi_state = NULL; |
1016 | retry: | 1016 | retry: |
@@ -1088,20 +1088,29 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1088 | * !plist_node_empty() is safe here without any lock. | 1088 | * !plist_node_empty() is safe here without any lock. |
1089 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1089 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. |
1090 | */ | 1090 | */ |
1091 | time_left = 0; | ||
1092 | if (likely(!plist_node_empty(&q.list))) { | 1091 | if (likely(!plist_node_empty(&q.list))) { |
1093 | unsigned long rel_time; | 1092 | if (!abs_time) |
1094 | 1093 | schedule(); | |
1095 | if (timed) { | 1094 | else { |
1096 | unsigned long now = jiffies; | 1095 | hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
1097 | if (time_after(now, abs_time)) | 1096 | hrtimer_init_sleeper(&t, current); |
1098 | rel_time = 0; | 1097 | t.timer.expires = *abs_time; |
1099 | else | 1098 | |
1100 | rel_time = abs_time - now; | 1099 | hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS); |
1101 | } else | 1100 | |
1102 | rel_time = MAX_SCHEDULE_TIMEOUT; | 1101 | /* |
1102 | * the timer could have already expired, in which | ||
1103 | * case current would be flagged for rescheduling. | ||
1104 | * Don't bother calling schedule. | ||
1105 | */ | ||
1106 | if (likely(t.task)) | ||
1107 | schedule(); | ||
1108 | |||
1109 | hrtimer_cancel(&t.timer); | ||
1103 | 1110 | ||
1104 | time_left = schedule_timeout(rel_time); | 1111 | /* Flag if a timeout occured */ |
1112 | rem = (t.task == NULL); | ||
1113 | } | ||
1105 | } | 1114 | } |
1106 | __set_current_state(TASK_RUNNING); | 1115 | __set_current_state(TASK_RUNNING); |
1107 | 1116 | ||
@@ -1113,14 +1122,14 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1113 | /* If we were woken (and unqueued), we succeeded, whatever. */ | 1122 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
1114 | if (!unqueue_me(&q)) | 1123 | if (!unqueue_me(&q)) |
1115 | return 0; | 1124 | return 0; |
1116 | if (time_left == 0) | 1125 | if (rem) |
1117 | return -ETIMEDOUT; | 1126 | return -ETIMEDOUT; |
1118 | 1127 | ||
1119 | /* | 1128 | /* |
1120 | * We expect signal_pending(current), but another thread may | 1129 | * We expect signal_pending(current), but another thread may |
1121 | * have handled it for us already. | 1130 | * have handled it for us already. |
1122 | */ | 1131 | */ |
1123 | if (time_left == MAX_SCHEDULE_TIMEOUT) | 1132 | if (!abs_time) |
1124 | return -ERESTARTSYS; | 1133 | return -ERESTARTSYS; |
1125 | else { | 1134 | else { |
1126 | struct restart_block *restart; | 1135 | struct restart_block *restart; |
@@ -1128,8 +1137,7 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1128 | restart->fn = futex_wait_restart; | 1137 | restart->fn = futex_wait_restart; |
1129 | restart->arg0 = (unsigned long)uaddr; | 1138 | restart->arg0 = (unsigned long)uaddr; |
1130 | restart->arg1 = (unsigned long)val; | 1139 | restart->arg1 = (unsigned long)val; |
1131 | restart->arg2 = (unsigned long)timed; | 1140 | restart->arg2 = (unsigned long)abs_time; |
1132 | restart->arg3 = abs_time; | ||
1133 | return -ERESTART_RESTARTBLOCK; | 1141 | return -ERESTART_RESTARTBLOCK; |
1134 | } | 1142 | } |
1135 | 1143 | ||
@@ -1141,21 +1149,15 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val, | |||
1141 | return ret; | 1149 | return ret; |
1142 | } | 1150 | } |
1143 | 1151 | ||
1144 | static int futex_wait(u32 __user *uaddr, u32 val, unsigned long rel_time) | ||
1145 | { | ||
1146 | int timed = (rel_time != MAX_SCHEDULE_TIMEOUT); | ||
1147 | return futex_wait_abstime(uaddr, val, timed, jiffies+rel_time); | ||
1148 | } | ||
1149 | 1152 | ||
1150 | static long futex_wait_restart(struct restart_block *restart) | 1153 | static long futex_wait_restart(struct restart_block *restart) |
1151 | { | 1154 | { |
1152 | u32 __user *uaddr = (u32 __user *)restart->arg0; | 1155 | u32 __user *uaddr = (u32 __user *)restart->arg0; |
1153 | u32 val = (u32)restart->arg1; | 1156 | u32 val = (u32)restart->arg1; |
1154 | int timed = (int)restart->arg2; | 1157 | ktime_t *abs_time = (ktime_t *)restart->arg2; |
1155 | unsigned long abs_time = restart->arg3; | ||
1156 | 1158 | ||
1157 | restart->fn = do_no_restart_syscall; | 1159 | restart->fn = do_no_restart_syscall; |
1158 | return (long)futex_wait_abstime(uaddr, val, timed, abs_time); | 1160 | return (long)futex_wait(uaddr, val, abs_time); |
1159 | } | 1161 | } |
1160 | 1162 | ||
1161 | 1163 | ||
@@ -1165,8 +1167,8 @@ static long futex_wait_restart(struct restart_block *restart) | |||
1165 | * if there are waiters then it will block, it does PI, etc. (Due to | 1167 | * if there are waiters then it will block, it does PI, etc. (Due to |
1166 | * races the kernel might see a 0 value of the futex too.) | 1168 | * races the kernel might see a 0 value of the futex too.) |
1167 | */ | 1169 | */ |
1168 | static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | 1170 | static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, |
1169 | long nsec, int trylock) | 1171 | int trylock) |
1170 | { | 1172 | { |
1171 | struct hrtimer_sleeper timeout, *to = NULL; | 1173 | struct hrtimer_sleeper timeout, *to = NULL; |
1172 | struct task_struct *curr = current; | 1174 | struct task_struct *curr = current; |
@@ -1178,11 +1180,11 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
1178 | if (refill_pi_state_cache()) | 1180 | if (refill_pi_state_cache()) |
1179 | return -ENOMEM; | 1181 | return -ENOMEM; |
1180 | 1182 | ||
1181 | if (sec != MAX_SCHEDULE_TIMEOUT) { | 1183 | if (time) { |
1182 | to = &timeout; | 1184 | to = &timeout; |
1183 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 1185 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
1184 | hrtimer_init_sleeper(to, current); | 1186 | hrtimer_init_sleeper(to, current); |
1185 | to->timer.expires = ktime_set(sec, nsec); | 1187 | to->timer.expires = *time; |
1186 | } | 1188 | } |
1187 | 1189 | ||
1188 | q.pi_state = NULL; | 1190 | q.pi_state = NULL; |
@@ -1818,7 +1820,7 @@ void exit_robust_list(struct task_struct *curr) | |||
1818 | } | 1820 | } |
1819 | } | 1821 | } |
1820 | 1822 | ||
1821 | long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, | 1823 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
1822 | u32 __user *uaddr2, u32 val2, u32 val3) | 1824 | u32 __user *uaddr2, u32 val2, u32 val3) |
1823 | { | 1825 | { |
1824 | int ret; | 1826 | int ret; |
@@ -1844,13 +1846,13 @@ long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, | |||
1844 | ret = futex_wake_op(uaddr, uaddr2, val, val2, val3); | 1846 | ret = futex_wake_op(uaddr, uaddr2, val, val2, val3); |
1845 | break; | 1847 | break; |
1846 | case FUTEX_LOCK_PI: | 1848 | case FUTEX_LOCK_PI: |
1847 | ret = futex_lock_pi(uaddr, val, timeout, val2, 0); | 1849 | ret = futex_lock_pi(uaddr, val, timeout, 0); |
1848 | break; | 1850 | break; |
1849 | case FUTEX_UNLOCK_PI: | 1851 | case FUTEX_UNLOCK_PI: |
1850 | ret = futex_unlock_pi(uaddr); | 1852 | ret = futex_unlock_pi(uaddr); |
1851 | break; | 1853 | break; |
1852 | case FUTEX_TRYLOCK_PI: | 1854 | case FUTEX_TRYLOCK_PI: |
1853 | ret = futex_lock_pi(uaddr, 0, timeout, val2, 1); | 1855 | ret = futex_lock_pi(uaddr, 0, timeout, 1); |
1854 | break; | 1856 | break; |
1855 | default: | 1857 | default: |
1856 | ret = -ENOSYS; | 1858 | ret = -ENOSYS; |
@@ -1863,21 +1865,20 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, | |||
1863 | struct timespec __user *utime, u32 __user *uaddr2, | 1865 | struct timespec __user *utime, u32 __user *uaddr2, |
1864 | u32 val3) | 1866 | u32 val3) |
1865 | { | 1867 | { |
1866 | struct timespec t; | 1868 | struct timespec ts; |
1867 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | 1869 | ktime_t t, *tp = NULL; |
1868 | u32 val2 = 0; | 1870 | u32 val2 = 0; |
1869 | 1871 | ||
1870 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { | 1872 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { |
1871 | if (copy_from_user(&t, utime, sizeof(t)) != 0) | 1873 | if (copy_from_user(&ts, utime, sizeof(ts)) != 0) |
1872 | return -EFAULT; | 1874 | return -EFAULT; |
1873 | if (!timespec_valid(&t)) | 1875 | if (!timespec_valid(&ts)) |
1874 | return -EINVAL; | 1876 | return -EINVAL; |
1877 | |||
1878 | t = timespec_to_ktime(ts); | ||
1875 | if (op == FUTEX_WAIT) | 1879 | if (op == FUTEX_WAIT) |
1876 | timeout = timespec_to_jiffies(&t) + 1; | 1880 | t = ktime_add(ktime_get(), t); |
1877 | else { | 1881 | tp = &t; |
1878 | timeout = t.tv_sec; | ||
1879 | val2 = t.tv_nsec; | ||
1880 | } | ||
1881 | } | 1882 | } |
1882 | /* | 1883 | /* |
1883 | * requeue parameter in 'utime' if op == FUTEX_REQUEUE. | 1884 | * requeue parameter in 'utime' if op == FUTEX_REQUEUE. |
@@ -1885,7 +1886,7 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, | |||
1885 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE) | 1886 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE) |
1886 | val2 = (u32) (unsigned long) utime; | 1887 | val2 = (u32) (unsigned long) utime; |
1887 | 1888 | ||
1888 | return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3); | 1889 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); |
1889 | } | 1890 | } |
1890 | 1891 | ||
1891 | static int futexfs_get_sb(struct file_system_type *fs_type, | 1892 | static int futexfs_get_sb(struct file_system_type *fs_type, |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 50f24eea6cd0..dff27c471ea6 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -141,24 +141,23 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, | |||
141 | struct compat_timespec __user *utime, u32 __user *uaddr2, | 141 | struct compat_timespec __user *utime, u32 __user *uaddr2, |
142 | u32 val3) | 142 | u32 val3) |
143 | { | 143 | { |
144 | struct timespec t; | 144 | struct timespec ts; |
145 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | 145 | ktime_t t, *tp = NULL; |
146 | int val2 = 0; | 146 | int val2 = 0; |
147 | 147 | ||
148 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { | 148 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { |
149 | if (get_compat_timespec(&t, utime)) | 149 | if (get_compat_timespec(&ts, utime)) |
150 | return -EFAULT; | 150 | return -EFAULT; |
151 | if (!timespec_valid(&t)) | 151 | if (!timespec_valid(&ts)) |
152 | return -EINVAL; | 152 | return -EINVAL; |
153 | |||
154 | t = timespec_to_ktime(ts); | ||
153 | if (op == FUTEX_WAIT) | 155 | if (op == FUTEX_WAIT) |
154 | timeout = timespec_to_jiffies(&t) + 1; | 156 | t = ktime_add(ktime_get(), t); |
155 | else { | 157 | tp = &t; |
156 | timeout = t.tv_sec; | ||
157 | val2 = t.tv_nsec; | ||
158 | } | ||
159 | } | 158 | } |
160 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE) | 159 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE) |
161 | val2 = (int) (unsigned long) utime; | 160 | val2 = (int) (unsigned long) utime; |
162 | 161 | ||
163 | return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3); | 162 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); |
164 | } | 163 | } |