diff options
author | Shawn Bohrer <shawn.bohrer@gmail.com> | 2010-10-27 18:34:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-27 21:03:18 -0400 |
commit | 95aac7b1cd224f568fb83937044cd303ff11b029 (patch) | |
tree | 24c08cfe031ecc0549cf3f7900e992a27044c4ad | |
parent | 231f3d393f63f6e3b505afa179999bba491d0f08 (diff) |
epoll: make epoll_wait() use the hrtimer range feature
This make epoll use hrtimers for the timeout value which prevents
epoll_wait() from timing out up to a millisecond early.
This mirrors the behavior of select() and poll().
Signed-off-by: Shawn Bohrer <shawn.bohrer@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/eventpoll.c | 35 | ||||
-rw-r--r-- | fs/select.c | 2 | ||||
-rw-r--r-- | include/linux/poll.h | 2 |
3 files changed, 22 insertions, 17 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 256bb7bb102a..8cf07242067d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -77,9 +77,6 @@ | |||
77 | /* Maximum number of nesting allowed inside epoll sets */ | 77 | /* Maximum number of nesting allowed inside epoll sets */ |
78 | #define EP_MAX_NESTS 4 | 78 | #define EP_MAX_NESTS 4 |
79 | 79 | ||
80 | /* Maximum msec timeout value storeable in a long int */ | ||
81 | #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) | ||
82 | |||
83 | #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) | 80 | #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) |
84 | 81 | ||
85 | #define EP_UNACTIVE_PTR ((void *) -1L) | 82 | #define EP_UNACTIVE_PTR ((void *) -1L) |
@@ -1117,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep, | |||
1117 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | 1114 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, |
1118 | int maxevents, long timeout) | 1115 | int maxevents, long timeout) |
1119 | { | 1116 | { |
1120 | int res, eavail; | 1117 | int res, eavail, timed_out = 0; |
1121 | unsigned long flags; | 1118 | unsigned long flags; |
1122 | long jtimeout; | 1119 | long slack; |
1123 | wait_queue_t wait; | 1120 | wait_queue_t wait; |
1124 | 1121 | struct timespec end_time; | |
1125 | /* | 1122 | ktime_t expires, *to = NULL; |
1126 | * Calculate the timeout by checking for the "infinite" value (-1) | 1123 | |
1127 | * and the overflow condition. The passed timeout is in milliseconds, | 1124 | if (timeout > 0) { |
1128 | * that why (t * HZ) / 1000. | 1125 | ktime_get_ts(&end_time); |
1129 | */ | 1126 | timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); |
1130 | jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ? | 1127 | slack = select_estimate_accuracy(&end_time); |
1131 | MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; | 1128 | to = &expires; |
1129 | *to = timespec_to_ktime(end_time); | ||
1130 | } else if (timeout == 0) { | ||
1131 | timed_out = 1; | ||
1132 | } | ||
1132 | 1133 | ||
1133 | retry: | 1134 | retry: |
1134 | spin_lock_irqsave(&ep->lock, flags); | 1135 | spin_lock_irqsave(&ep->lock, flags); |
@@ -1150,7 +1151,7 @@ retry: | |||
1150 | * to TASK_INTERRUPTIBLE before doing the checks. | 1151 | * to TASK_INTERRUPTIBLE before doing the checks. |
1151 | */ | 1152 | */ |
1152 | set_current_state(TASK_INTERRUPTIBLE); | 1153 | set_current_state(TASK_INTERRUPTIBLE); |
1153 | if (!list_empty(&ep->rdllist) || !jtimeout) | 1154 | if (!list_empty(&ep->rdllist) || timed_out) |
1154 | break; | 1155 | break; |
1155 | if (signal_pending(current)) { | 1156 | if (signal_pending(current)) { |
1156 | res = -EINTR; | 1157 | res = -EINTR; |
@@ -1158,7 +1159,9 @@ retry: | |||
1158 | } | 1159 | } |
1159 | 1160 | ||
1160 | spin_unlock_irqrestore(&ep->lock, flags); | 1161 | spin_unlock_irqrestore(&ep->lock, flags); |
1161 | jtimeout = schedule_timeout(jtimeout); | 1162 | if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) |
1163 | timed_out = 1; | ||
1164 | |||
1162 | spin_lock_irqsave(&ep->lock, flags); | 1165 | spin_lock_irqsave(&ep->lock, flags); |
1163 | } | 1166 | } |
1164 | __remove_wait_queue(&ep->wq, &wait); | 1167 | __remove_wait_queue(&ep->wq, &wait); |
@@ -1176,7 +1179,7 @@ retry: | |||
1176 | * more luck. | 1179 | * more luck. |
1177 | */ | 1180 | */ |
1178 | if (!res && eavail && | 1181 | if (!res && eavail && |
1179 | !(res = ep_send_events(ep, events, maxevents)) && jtimeout) | 1182 | !(res = ep_send_events(ep, events, maxevents)) && !timed_out) |
1180 | goto retry; | 1183 | goto retry; |
1181 | 1184 | ||
1182 | return res; | 1185 | return res; |
diff --git a/fs/select.c b/fs/select.c index 5f023f911202..b7b10aa30861 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv) | |||
67 | return slack; | 67 | return slack; |
68 | } | 68 | } |
69 | 69 | ||
70 | static long select_estimate_accuracy(struct timespec *tv) | 70 | long select_estimate_accuracy(struct timespec *tv) |
71 | { | 71 | { |
72 | unsigned long ret; | 72 | unsigned long ret; |
73 | struct timespec now; | 73 | struct timespec now; |
diff --git a/include/linux/poll.h b/include/linux/poll.h index 600cc1fde64d..56e76af78102 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h | |||
@@ -73,6 +73,8 @@ extern void poll_initwait(struct poll_wqueues *pwq); | |||
73 | extern void poll_freewait(struct poll_wqueues *pwq); | 73 | extern void poll_freewait(struct poll_wqueues *pwq); |
74 | extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, | 74 | extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, |
75 | ktime_t *expires, unsigned long slack); | 75 | ktime_t *expires, unsigned long slack); |
76 | extern long select_estimate_accuracy(struct timespec *tv); | ||
77 | |||
76 | 78 | ||
77 | static inline int poll_schedule(struct poll_wqueues *pwq, int state) | 79 | static inline int poll_schedule(struct poll_wqueues *pwq, int state) |
78 | { | 80 | { |