aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-23 13:53:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-23 13:53:02 -0400
commit1f6d6e8ebe73ba9d9d4c693f7f6f50f661dbd6e4 (patch)
treebe7a2d20b1728da5a0d844a6f4cd382b2c2569fb /fs
parentdb563fc2e80534f98c7f9121a6f7dfe41f177a79 (diff)
parent268a3dcfea2077fca60d3715caa5c96f9b5e6ea7 (diff)
Merge branch 'v28-range-hrtimers-for-linus-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'v28-range-hrtimers-for-linus-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (37 commits) hrtimers: add missing docbook comments to struct hrtimer hrtimers: simplify hrtimer_peek_ahead_timers() hrtimers: fix docbook comments DECLARE_PER_CPU needs linux/percpu.h hrtimers: fix typo rangetimers: fix the bug reported by Ingo for real rangetimer: fix BUG_ON reported by Ingo rangetimer: fix x86 build failure for the !HRTIMERS case select: fix alpha OSF wrapper select: fix alpha OSF wrapper hrtimer: peek at the timer queue just before going idle hrtimer: make the futex() system call use the per process slack value hrtimer: make the nanosleep() syscall use the per process slack hrtimer: fix signed/unsigned bug in slack estimator hrtimer: show the timer ranges in /proc/timer_list hrtimer: incorporate feedback from Peter Zijlstra hrtimer: add a hrtimer_start_range() function hrtimer: another build fix hrtimer: fix build bug found by Ingo hrtimer: make select() and poll() use the hrtimer range feature ...
Diffstat (limited to 'fs')
-rw-r--r--fs/compat.c187
-rw-r--r--fs/select.c396
-rw-r--r--fs/timerfd.c8
3 files changed, 297 insertions, 294 deletions
diff --git a/fs/compat.c b/fs/compat.c
index cb36245f9fe0..fe3c9bf87608 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1469,6 +1469,57 @@ out_ret:
1469 1469
1470#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1470#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1471 1471
1472static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
1473 int timeval, int ret)
1474{
1475 struct timespec ts;
1476
1477 if (!p)
1478 return ret;
1479
1480 if (current->personality & STICKY_TIMEOUTS)
1481 goto sticky;
1482
1483 /* No update for zero timeout */
1484 if (!end_time->tv_sec && !end_time->tv_nsec)
1485 return ret;
1486
1487 ktime_get_ts(&ts);
1488 ts = timespec_sub(*end_time, ts);
1489 if (ts.tv_sec < 0)
1490 ts.tv_sec = ts.tv_nsec = 0;
1491
1492 if (timeval) {
1493 struct compat_timeval rtv;
1494
1495 rtv.tv_sec = ts.tv_sec;
1496 rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
1497
1498 if (!copy_to_user(p, &rtv, sizeof(rtv)))
1499 return ret;
1500 } else {
1501 struct compat_timespec rts;
1502
1503 rts.tv_sec = ts.tv_sec;
1504 rts.tv_nsec = ts.tv_nsec;
1505
1506 if (!copy_to_user(p, &rts, sizeof(rts)))
1507 return ret;
1508 }
1509 /*
1510 * If an application puts its timeval in read-only memory, we
1511 * don't want the Linux-specific update to the timeval to
1512 * cause a fault after the select has completed
1513 * successfully. However, because we're not updating the
1514 * timeval, we can't restart the system call.
1515 */
1516
1517sticky:
1518 if (ret == -ERESTARTNOHAND)
1519 ret = -EINTR;
1520 return ret;
1521}
1522
1472/* 1523/*
1473 * Ooo, nasty. We need here to frob 32-bit unsigned longs to 1524 * Ooo, nasty. We need here to frob 32-bit unsigned longs to
1474 * 64-bit unsigned longs. 1525 * 64-bit unsigned longs.
@@ -1550,7 +1601,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1550 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 1601 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1551 1602
1552int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1603int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1553 compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) 1604 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1605 struct timespec *end_time)
1554{ 1606{
1555 fd_set_bits fds; 1607 fd_set_bits fds;
1556 void *bits; 1608 void *bits;
@@ -1597,7 +1649,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1597 zero_fd_set(n, fds.res_out); 1649 zero_fd_set(n, fds.res_out);
1598 zero_fd_set(n, fds.res_ex); 1650 zero_fd_set(n, fds.res_ex);
1599 1651
1600 ret = do_select(n, &fds, timeout); 1652 ret = do_select(n, &fds, end_time);
1601 1653
1602 if (ret < 0) 1654 if (ret < 0)
1603 goto out; 1655 goto out;
@@ -1623,7 +1675,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1623 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1675 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1624 struct compat_timeval __user *tvp) 1676 struct compat_timeval __user *tvp)
1625{ 1677{
1626 s64 timeout = -1; 1678 struct timespec end_time, *to = NULL;
1627 struct compat_timeval tv; 1679 struct compat_timeval tv;
1628 int ret; 1680 int ret;
1629 1681
@@ -1631,43 +1683,14 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1631 if (copy_from_user(&tv, tvp, sizeof(tv))) 1683 if (copy_from_user(&tv, tvp, sizeof(tv)))
1632 return -EFAULT; 1684 return -EFAULT;
1633 1685
1634 if (tv.tv_sec < 0 || tv.tv_usec < 0) 1686 to = &end_time;
1687 if (poll_select_set_timeout(to, tv.tv_sec,
1688 tv.tv_usec * NSEC_PER_USEC))
1635 return -EINVAL; 1689 return -EINVAL;
1636
1637 /* Cast to u64 to make GCC stop complaining */
1638 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
1639 timeout = -1; /* infinite */
1640 else {
1641 timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ);
1642 timeout += tv.tv_sec * HZ;
1643 }
1644 } 1690 }
1645 1691
1646 ret = compat_core_sys_select(n, inp, outp, exp, &timeout); 1692 ret = compat_core_sys_select(n, inp, outp, exp, to);
1647 1693 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
1648 if (tvp) {
1649 struct compat_timeval rtv;
1650
1651 if (current->personality & STICKY_TIMEOUTS)
1652 goto sticky;
1653 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1654 rtv.tv_sec = timeout;
1655 if (compat_timeval_compare(&rtv, &tv) >= 0)
1656 rtv = tv;
1657 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
1658sticky:
1659 /*
1660 * If an application puts its timeval in read-only
1661 * memory, we don't want the Linux-specific update to
1662 * the timeval to cause a fault after the select has
1663 * completed successfully. However, because we're not
1664 * updating the timeval, we can't restart the system
1665 * call.
1666 */
1667 if (ret == -ERESTARTNOHAND)
1668 ret = -EINTR;
1669 }
1670 }
1671 1694
1672 return ret; 1695 return ret;
1673} 1696}
@@ -1680,15 +1703,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1680{ 1703{
1681 compat_sigset_t ss32; 1704 compat_sigset_t ss32;
1682 sigset_t ksigmask, sigsaved; 1705 sigset_t ksigmask, sigsaved;
1683 s64 timeout = MAX_SCHEDULE_TIMEOUT;
1684 struct compat_timespec ts; 1706 struct compat_timespec ts;
1707 struct timespec end_time, *to = NULL;
1685 int ret; 1708 int ret;
1686 1709
1687 if (tsp) { 1710 if (tsp) {
1688 if (copy_from_user(&ts, tsp, sizeof(ts))) 1711 if (copy_from_user(&ts, tsp, sizeof(ts)))
1689 return -EFAULT; 1712 return -EFAULT;
1690 1713
1691 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 1714 to = &end_time;
1715 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1692 return -EINVAL; 1716 return -EINVAL;
1693 } 1717 }
1694 1718
@@ -1703,51 +1727,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1703 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1727 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1704 } 1728 }
1705 1729
1706 do { 1730 ret = compat_core_sys_select(n, inp, outp, exp, to);
1707 if (tsp) { 1731 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1708 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
1709 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1710 timeout += ts.tv_sec * (unsigned long)HZ;
1711 ts.tv_sec = 0;
1712 ts.tv_nsec = 0;
1713 } else {
1714 ts.tv_sec -= MAX_SELECT_SECONDS;
1715 timeout = MAX_SELECT_SECONDS * HZ;
1716 }
1717 }
1718
1719 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1720
1721 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1722
1723 if (tsp) {
1724 struct compat_timespec rts;
1725
1726 if (current->personality & STICKY_TIMEOUTS)
1727 goto sticky;
1728
1729 rts.tv_sec = timeout / HZ;
1730 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
1731 if (rts.tv_nsec >= NSEC_PER_SEC) {
1732 rts.tv_sec++;
1733 rts.tv_nsec -= NSEC_PER_SEC;
1734 }
1735 if (compat_timespec_compare(&rts, &ts) >= 0)
1736 rts = ts;
1737 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1738sticky:
1739 /*
1740 * If an application puts its timeval in read-only
1741 * memory, we don't want the Linux-specific update to
1742 * the timeval to cause a fault after the select has
1743 * completed successfully. However, because we're not
1744 * updating the timeval, we can't restart the system
1745 * call.
1746 */
1747 if (ret == -ERESTARTNOHAND)
1748 ret = -EINTR;
1749 }
1750 }
1751 1732
1752 if (ret == -ERESTARTNOHAND) { 1733 if (ret == -ERESTARTNOHAND) {
1753 /* 1734 /*
@@ -1792,18 +1773,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1792 compat_sigset_t ss32; 1773 compat_sigset_t ss32;
1793 sigset_t ksigmask, sigsaved; 1774 sigset_t ksigmask, sigsaved;
1794 struct compat_timespec ts; 1775 struct compat_timespec ts;
1795 s64 timeout = -1; 1776 struct timespec end_time, *to = NULL;
1796 int ret; 1777 int ret;
1797 1778
1798 if (tsp) { 1779 if (tsp) {
1799 if (copy_from_user(&ts, tsp, sizeof(ts))) 1780 if (copy_from_user(&ts, tsp, sizeof(ts)))
1800 return -EFAULT; 1781 return -EFAULT;
1801 1782
1802 /* We assume that ts.tv_sec is always lower than 1783 to = &end_time;
1803 the number of seconds that can be expressed in 1784 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1804 an s64. Otherwise the compiler bitches at us */ 1785 return -EINVAL;
1805 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1806 timeout += ts.tv_sec * HZ;
1807 } 1786 }
1808 1787
1809 if (sigmask) { 1788 if (sigmask) {
@@ -1817,7 +1796,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1817 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1796 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1818 } 1797 }
1819 1798
1820 ret = do_sys_poll(ufds, nfds, &timeout); 1799 ret = do_sys_poll(ufds, nfds, to);
1821 1800
1822 /* We can restart this syscall, usually */ 1801 /* We can restart this syscall, usually */
1823 if (ret == -EINTR) { 1802 if (ret == -EINTR) {
@@ -1835,31 +1814,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1835 } else if (sigmask) 1814 } else if (sigmask)
1836 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1815 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1837 1816
1838 if (tsp && timeout >= 0) { 1817 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1839 struct compat_timespec rts;
1840
1841 if (current->personality & STICKY_TIMEOUTS)
1842 goto sticky;
1843 /* Yes, we know it's actually an s64, but it's also positive. */
1844 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
1845 1000;
1846 rts.tv_sec = timeout;
1847 if (compat_timespec_compare(&rts, &ts) >= 0)
1848 rts = ts;
1849 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1850sticky:
1851 /*
1852 * If an application puts its timeval in read-only
1853 * memory, we don't want the Linux-specific update to
1854 * the timeval to cause a fault after the select has
1855 * completed successfully. However, because we're not
1856 * updating the timeval, we can't restart the system
1857 * call.
1858 */
1859 if (ret == -ERESTARTNOHAND && timeout >= 0)
1860 ret = -EINTR;
1861 }
1862 }
1863 1818
1864 return ret; 1819 return ret;
1865} 1820}
diff --git a/fs/select.c b/fs/select.c
index da0e88201c3a..448e44001286 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -24,9 +24,64 @@
24#include <linux/fdtable.h> 24#include <linux/fdtable.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/hrtimer.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
31
32/*
33 * Estimate expected accuracy in ns from a timeval.
34 *
35 * After quite a bit of churning around, we've settled on
36 * a simple thing of taking 0.1% of the timeout as the
37 * slack, with a cap of 100 msec.
38 * "nice" tasks get a 0.5% slack instead.
39 *
40 * Consider this comment an open invitation to come up with even
41 * better solutions..
42 */
43
44static long __estimate_accuracy(struct timespec *tv)
45{
46 long slack;
47 int divfactor = 1000;
48
49 if (task_nice(current) > 0)
50 divfactor = divfactor / 5;
51
52 slack = tv->tv_nsec / divfactor;
53 slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
54
55 if (slack > 100 * NSEC_PER_MSEC)
56 slack = 100 * NSEC_PER_MSEC;
57
58 if (slack < 0)
59 slack = 0;
60 return slack;
61}
62
63static long estimate_accuracy(struct timespec *tv)
64{
65 unsigned long ret;
66 struct timespec now;
67
68 /*
69 * Realtime tasks get a slack of 0 for obvious reasons.
70 */
71
72 if (rt_task(current))
73 return 0;
74
75 ktime_get_ts(&now);
76 now = timespec_sub(*tv, now);
77 ret = __estimate_accuracy(&now);
78 if (ret < current->timer_slack_ns)
79 return current->timer_slack_ns;
80 return ret;
81}
82
83
84
30struct poll_table_page { 85struct poll_table_page {
31 struct poll_table_page * next; 86 struct poll_table_page * next;
32 struct poll_table_entry * entry; 87 struct poll_table_entry * entry;
@@ -130,6 +185,79 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
130 add_wait_queue(wait_address, &entry->wait); 185 add_wait_queue(wait_address, &entry->wait);
131} 186}
132 187
188/**
189 * poll_select_set_timeout - helper function to setup the timeout value
190 * @to: pointer to timespec variable for the final timeout
191 * @sec: seconds (from user space)
192 * @nsec: nanoseconds (from user space)
193 *
194 * Note, we do not use a timespec for the user space value here, That
195 * way we can use the function for timeval and compat interfaces as well.
196 *
197 * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
198 */
199int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
200{
201 struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
202
203 if (!timespec_valid(&ts))
204 return -EINVAL;
205
206 /* Optimize for the zero timeout value here */
207 if (!sec && !nsec) {
208 to->tv_sec = to->tv_nsec = 0;
209 } else {
210 ktime_get_ts(to);
211 *to = timespec_add_safe(*to, ts);
212 }
213 return 0;
214}
215
216static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
217 int timeval, int ret)
218{
219 struct timespec rts;
220 struct timeval rtv;
221
222 if (!p)
223 return ret;
224
225 if (current->personality & STICKY_TIMEOUTS)
226 goto sticky;
227
228 /* No update for zero timeout */
229 if (!end_time->tv_sec && !end_time->tv_nsec)
230 return ret;
231
232 ktime_get_ts(&rts);
233 rts = timespec_sub(*end_time, rts);
234 if (rts.tv_sec < 0)
235 rts.tv_sec = rts.tv_nsec = 0;
236
237 if (timeval) {
238 rtv.tv_sec = rts.tv_sec;
239 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
240
241 if (!copy_to_user(p, &rtv, sizeof(rtv)))
242 return ret;
243
244 } else if (!copy_to_user(p, &rts, sizeof(rts)))
245 return ret;
246
247 /*
248 * If an application puts its timeval in read-only memory, we
249 * don't want the Linux-specific update to the timeval to
250 * cause a fault after the select has completed
251 * successfully. However, because we're not updating the
252 * timeval, we can't restart the system call.
253 */
254
255sticky:
256 if (ret == -ERESTARTNOHAND)
257 ret = -EINTR;
258 return ret;
259}
260
133#define FDS_IN(fds, n) (fds->in + n) 261#define FDS_IN(fds, n) (fds->in + n)
134#define FDS_OUT(fds, n) (fds->out + n) 262#define FDS_OUT(fds, n) (fds->out + n)
135#define FDS_EX(fds, n) (fds->ex + n) 263#define FDS_EX(fds, n) (fds->ex + n)
@@ -182,11 +310,13 @@ get_max:
182#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 310#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
183#define POLLEX_SET (POLLPRI) 311#define POLLEX_SET (POLLPRI)
184 312
185int do_select(int n, fd_set_bits *fds, s64 *timeout) 313int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
186{ 314{
315 ktime_t expire, *to = NULL;
187 struct poll_wqueues table; 316 struct poll_wqueues table;
188 poll_table *wait; 317 poll_table *wait;
189 int retval, i; 318 int retval, i, timed_out = 0;
319 unsigned long slack = 0;
190 320
191 rcu_read_lock(); 321 rcu_read_lock();
192 retval = max_select_fd(n, fds); 322 retval = max_select_fd(n, fds);
@@ -198,12 +328,17 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
198 328
199 poll_initwait(&table); 329 poll_initwait(&table);
200 wait = &table.pt; 330 wait = &table.pt;
201 if (!*timeout) 331 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
202 wait = NULL; 332 wait = NULL;
333 timed_out = 1;
334 }
335
336 if (end_time && !timed_out)
337 slack = estimate_accuracy(end_time);
338
203 retval = 0; 339 retval = 0;
204 for (;;) { 340 for (;;) {
205 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 341 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
206 long __timeout;
207 342
208 set_current_state(TASK_INTERRUPTIBLE); 343 set_current_state(TASK_INTERRUPTIBLE);
209 344
@@ -259,27 +394,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
259 cond_resched(); 394 cond_resched();
260 } 395 }
261 wait = NULL; 396 wait = NULL;
262 if (retval || !*timeout || signal_pending(current)) 397 if (retval || timed_out || signal_pending(current))
263 break; 398 break;
264 if (table.error) { 399 if (table.error) {
265 retval = table.error; 400 retval = table.error;
266 break; 401 break;
267 } 402 }
268 403
269 if (*timeout < 0) { 404 /*
270 /* Wait indefinitely */ 405 * If this is the first loop and we have a timeout
271 __timeout = MAX_SCHEDULE_TIMEOUT; 406 * given, then we convert to ktime_t and set the to
272 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { 407 * pointer to the expiry value.
273 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ 408 */
274 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 409 if (end_time && !to) {
275 *timeout -= __timeout; 410 expire = timespec_to_ktime(*end_time);
276 } else { 411 to = &expire;
277 __timeout = *timeout;
278 *timeout = 0;
279 } 412 }
280 __timeout = schedule_timeout(__timeout); 413
281 if (*timeout >= 0) 414 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
282 *timeout += __timeout; 415 timed_out = 1;
283 } 416 }
284 __set_current_state(TASK_RUNNING); 417 __set_current_state(TASK_RUNNING);
285 418
@@ -300,7 +433,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 433 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
301 434
302int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 435int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
303 fd_set __user *exp, s64 *timeout) 436 fd_set __user *exp, struct timespec *end_time)
304{ 437{
305 fd_set_bits fds; 438 fd_set_bits fds;
306 void *bits; 439 void *bits;
@@ -351,7 +484,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
351 zero_fd_set(n, fds.res_out); 484 zero_fd_set(n, fds.res_out);
352 zero_fd_set(n, fds.res_ex); 485 zero_fd_set(n, fds.res_ex);
353 486
354 ret = do_select(n, &fds, timeout); 487 ret = do_select(n, &fds, end_time);
355 488
356 if (ret < 0) 489 if (ret < 0)
357 goto out; 490 goto out;
@@ -377,7 +510,7 @@ out_nofds:
377asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 510asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
378 fd_set __user *exp, struct timeval __user *tvp) 511 fd_set __user *exp, struct timeval __user *tvp)
379{ 512{
380 s64 timeout = -1; 513 struct timespec end_time, *to = NULL;
381 struct timeval tv; 514 struct timeval tv;
382 int ret; 515 int ret;
383 516
@@ -385,43 +518,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
385 if (copy_from_user(&tv, tvp, sizeof(tv))) 518 if (copy_from_user(&tv, tvp, sizeof(tv)))
386 return -EFAULT; 519 return -EFAULT;
387 520
388 if (tv.tv_sec < 0 || tv.tv_usec < 0) 521 to = &end_time;
522 if (poll_select_set_timeout(to, tv.tv_sec,
523 tv.tv_usec * NSEC_PER_USEC))
389 return -EINVAL; 524 return -EINVAL;
390
391 /* Cast to u64 to make GCC stop complaining */
392 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
393 timeout = -1; /* infinite */
394 else {
395 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
396 timeout += tv.tv_sec * HZ;
397 }
398 } 525 }
399 526
400 ret = core_sys_select(n, inp, outp, exp, &timeout); 527 ret = core_sys_select(n, inp, outp, exp, to);
401 528 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
402 if (tvp) {
403 struct timeval rtv;
404
405 if (current->personality & STICKY_TIMEOUTS)
406 goto sticky;
407 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
408 rtv.tv_sec = timeout;
409 if (timeval_compare(&rtv, &tv) >= 0)
410 rtv = tv;
411 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
412sticky:
413 /*
414 * If an application puts its timeval in read-only
415 * memory, we don't want the Linux-specific update to
416 * the timeval to cause a fault after the select has
417 * completed successfully. However, because we're not
418 * updating the timeval, we can't restart the system
419 * call.
420 */
421 if (ret == -ERESTARTNOHAND)
422 ret = -EINTR;
423 }
424 }
425 529
426 return ret; 530 return ret;
427} 531}
@@ -431,25 +535,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
431 fd_set __user *exp, struct timespec __user *tsp, 535 fd_set __user *exp, struct timespec __user *tsp,
432 const sigset_t __user *sigmask, size_t sigsetsize) 536 const sigset_t __user *sigmask, size_t sigsetsize)
433{ 537{
434 s64 timeout = MAX_SCHEDULE_TIMEOUT;
435 sigset_t ksigmask, sigsaved; 538 sigset_t ksigmask, sigsaved;
436 struct timespec ts; 539 struct timespec ts, end_time, *to = NULL;
437 int ret; 540 int ret;
438 541
439 if (tsp) { 542 if (tsp) {
440 if (copy_from_user(&ts, tsp, sizeof(ts))) 543 if (copy_from_user(&ts, tsp, sizeof(ts)))
441 return -EFAULT; 544 return -EFAULT;
442 545
443 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 546 to = &end_time;
547 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
444 return -EINVAL; 548 return -EINVAL;
445
446 /* Cast to u64 to make GCC stop complaining */
447 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
448 timeout = -1; /* infinite */
449 else {
450 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
451 timeout += ts.tv_sec * HZ;
452 }
453 } 549 }
454 550
455 if (sigmask) { 551 if (sigmask) {
@@ -463,32 +559,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
463 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 559 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
464 } 560 }
465 561
466 ret = core_sys_select(n, inp, outp, exp, &timeout); 562 ret = core_sys_select(n, inp, outp, exp, &end_time);
467 563 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
468 if (tsp) {
469 struct timespec rts;
470
471 if (current->personality & STICKY_TIMEOUTS)
472 goto sticky;
473 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
474 1000;
475 rts.tv_sec = timeout;
476 if (timespec_compare(&rts, &ts) >= 0)
477 rts = ts;
478 if (copy_to_user(tsp, &rts, sizeof(rts))) {
479sticky:
480 /*
481 * If an application puts its timeval in read-only
482 * memory, we don't want the Linux-specific update to
483 * the timeval to cause a fault after the select has
484 * completed successfully. However, because we're not
485 * updating the timeval, we can't restart the system
486 * call.
487 */
488 if (ret == -ERESTARTNOHAND)
489 ret = -EINTR;
490 }
491 }
492 564
493 if (ret == -ERESTARTNOHAND) { 565 if (ret == -ERESTARTNOHAND) {
494 /* 566 /*
@@ -574,18 +646,24 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
574} 646}
575 647
576static int do_poll(unsigned int nfds, struct poll_list *list, 648static int do_poll(unsigned int nfds, struct poll_list *list,
577 struct poll_wqueues *wait, s64 *timeout) 649 struct poll_wqueues *wait, struct timespec *end_time)
578{ 650{
579 int count = 0;
580 poll_table* pt = &wait->pt; 651 poll_table* pt = &wait->pt;
652 ktime_t expire, *to = NULL;
653 int timed_out = 0, count = 0;
654 unsigned long slack = 0;
581 655
582 /* Optimise the no-wait case */ 656 /* Optimise the no-wait case */
583 if (!(*timeout)) 657 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
584 pt = NULL; 658 pt = NULL;
659 timed_out = 1;
660 }
661
662 if (end_time && !timed_out)
663 slack = estimate_accuracy(end_time);
585 664
586 for (;;) { 665 for (;;) {
587 struct poll_list *walk; 666 struct poll_list *walk;
588 long __timeout;
589 667
590 set_current_state(TASK_INTERRUPTIBLE); 668 set_current_state(TASK_INTERRUPTIBLE);
591 for (walk = list; walk != NULL; walk = walk->next) { 669 for (walk = list; walk != NULL; walk = walk->next) {
@@ -617,27 +695,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
617 if (signal_pending(current)) 695 if (signal_pending(current))
618 count = -EINTR; 696 count = -EINTR;
619 } 697 }
620 if (count || !*timeout) 698 if (count || timed_out)
621 break; 699 break;
622 700
623 if (*timeout < 0) { 701 /*
624 /* Wait indefinitely */ 702 * If this is the first loop and we have a timeout
625 __timeout = MAX_SCHEDULE_TIMEOUT; 703 * given, then we convert to ktime_t and set the to
626 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { 704 * pointer to the expiry value.
627 /* 705 */
628 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in 706 if (end_time && !to) {
629 * a loop 707 expire = timespec_to_ktime(*end_time);
630 */ 708 to = &expire;
631 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
632 *timeout -= __timeout;
633 } else {
634 __timeout = *timeout;
635 *timeout = 0;
636 } 709 }
637 710
638 __timeout = schedule_timeout(__timeout); 711 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
639 if (*timeout >= 0) 712 timed_out = 1;
640 *timeout += __timeout;
641 } 713 }
642 __set_current_state(TASK_RUNNING); 714 __set_current_state(TASK_RUNNING);
643 return count; 715 return count;
@@ -646,7 +718,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
646#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 718#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
647 sizeof(struct pollfd)) 719 sizeof(struct pollfd))
648 720
649int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 721int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
722 struct timespec *end_time)
650{ 723{
651 struct poll_wqueues table; 724 struct poll_wqueues table;
652 int err = -EFAULT, fdcount, len, size; 725 int err = -EFAULT, fdcount, len, size;
@@ -686,7 +759,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
686 } 759 }
687 760
688 poll_initwait(&table); 761 poll_initwait(&table);
689 fdcount = do_poll(nfds, head, &table, timeout); 762 fdcount = do_poll(nfds, head, &table, end_time);
690 poll_freewait(&table); 763 poll_freewait(&table);
691 764
692 for (walk = head; walk; walk = walk->next) { 765 for (walk = head; walk; walk = walk->next) {
@@ -712,16 +785,21 @@ out_fds:
712 785
713static long do_restart_poll(struct restart_block *restart_block) 786static long do_restart_poll(struct restart_block *restart_block)
714{ 787{
715 struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; 788 struct pollfd __user *ufds = restart_block->poll.ufds;
716 int nfds = restart_block->arg1; 789 int nfds = restart_block->poll.nfds;
717 s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; 790 struct timespec *to = NULL, end_time;
718 int ret; 791 int ret;
719 792
720 ret = do_sys_poll(ufds, nfds, &timeout); 793 if (restart_block->poll.has_timeout) {
794 end_time.tv_sec = restart_block->poll.tv_sec;
795 end_time.tv_nsec = restart_block->poll.tv_nsec;
796 to = &end_time;
797 }
798
799 ret = do_sys_poll(ufds, nfds, to);
800
721 if (ret == -EINTR) { 801 if (ret == -EINTR) {
722 restart_block->fn = do_restart_poll; 802 restart_block->fn = do_restart_poll;
723 restart_block->arg2 = timeout & 0xFFFFFFFF;
724 restart_block->arg3 = (u64)timeout >> 32;
725 ret = -ERESTART_RESTARTBLOCK; 803 ret = -ERESTART_RESTARTBLOCK;
726 } 804 }
727 return ret; 805 return ret;
@@ -730,31 +808,32 @@ static long do_restart_poll(struct restart_block *restart_block)
730asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 808asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
731 long timeout_msecs) 809 long timeout_msecs)
732{ 810{
733 s64 timeout_jiffies; 811 struct timespec end_time, *to = NULL;
734 int ret; 812 int ret;
735 813
736 if (timeout_msecs > 0) { 814 if (timeout_msecs >= 0) {
737#if HZ > 1000 815 to = &end_time;
738 /* We can only overflow if HZ > 1000 */ 816 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
739 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 817 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
740 timeout_jiffies = -1;
741 else
742#endif
743 timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1;
744 } else {
745 /* Infinite (< 0) or no (0) timeout */
746 timeout_jiffies = timeout_msecs;
747 } 818 }
748 819
749 ret = do_sys_poll(ufds, nfds, &timeout_jiffies); 820 ret = do_sys_poll(ufds, nfds, to);
821
750 if (ret == -EINTR) { 822 if (ret == -EINTR) {
751 struct restart_block *restart_block; 823 struct restart_block *restart_block;
824
752 restart_block = &current_thread_info()->restart_block; 825 restart_block = &current_thread_info()->restart_block;
753 restart_block->fn = do_restart_poll; 826 restart_block->fn = do_restart_poll;
754 restart_block->arg0 = (unsigned long)ufds; 827 restart_block->poll.ufds = ufds;
755 restart_block->arg1 = nfds; 828 restart_block->poll.nfds = nfds;
756 restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; 829
757 restart_block->arg3 = (u64)timeout_jiffies >> 32; 830 if (timeout_msecs >= 0) {
831 restart_block->poll.tv_sec = end_time.tv_sec;
832 restart_block->poll.tv_nsec = end_time.tv_nsec;
833 restart_block->poll.has_timeout = 1;
834 } else
835 restart_block->poll.has_timeout = 0;
836
758 ret = -ERESTART_RESTARTBLOCK; 837 ret = -ERESTART_RESTARTBLOCK;
759 } 838 }
760 return ret; 839 return ret;
@@ -766,21 +845,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
766 size_t sigsetsize) 845 size_t sigsetsize)
767{ 846{
768 sigset_t ksigmask, sigsaved; 847 sigset_t ksigmask, sigsaved;
769 struct timespec ts; 848 struct timespec ts, end_time, *to = NULL;
770 s64 timeout = -1;
771 int ret; 849 int ret;
772 850
773 if (tsp) { 851 if (tsp) {
774 if (copy_from_user(&ts, tsp, sizeof(ts))) 852 if (copy_from_user(&ts, tsp, sizeof(ts)))
775 return -EFAULT; 853 return -EFAULT;
776 854
777 /* Cast to u64 to make GCC stop complaining */ 855 to = &end_time;
778 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 856 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
779 timeout = -1; /* infinite */ 857 return -EINVAL;
780 else {
781 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
782 timeout += ts.tv_sec * HZ;
783 }
784 } 858 }
785 859
786 if (sigmask) { 860 if (sigmask) {
@@ -794,7 +868,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
794 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 868 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
795 } 869 }
796 870
797 ret = do_sys_poll(ufds, nfds, &timeout); 871 ret = do_sys_poll(ufds, nfds, to);
798 872
799 /* We can restart this syscall, usually */ 873 /* We can restart this syscall, usually */
800 if (ret == -EINTR) { 874 if (ret == -EINTR) {
@@ -812,31 +886,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
812 } else if (sigmask) 886 } else if (sigmask)
813 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 887 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
814 888
815 if (tsp && timeout >= 0) { 889 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
816 struct timespec rts;
817
818 if (current->personality & STICKY_TIMEOUTS)
819 goto sticky;
820 /* Yes, we know it's actually an s64, but it's also positive. */
821 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
822 1000;
823 rts.tv_sec = timeout;
824 if (timespec_compare(&rts, &ts) >= 0)
825 rts = ts;
826 if (copy_to_user(tsp, &rts, sizeof(rts))) {
827 sticky:
828 /*
829 * If an application puts its timeval in read-only
830 * memory, we don't want the Linux-specific update to
831 * the timeval to cause a fault after the select has
832 * completed successfully. However, because we're not
833 * updating the timeval, we can't restart the system
834 * call.
835 */
836 if (ret == -ERESTARTNOHAND && timeout >= 0)
837 ret = -EINTR;
838 }
839 }
840 890
841 return ret; 891 return ret;
842} 892}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index c502c60e4f54..0862f0e49d0c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -52,11 +52,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
52 52
53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
54{ 54{
55 ktime_t now, remaining; 55 ktime_t remaining;
56
57 now = ctx->tmr.base->get_time();
58 remaining = ktime_sub(ctx->tmr.expires, now);
59 56
57 remaining = hrtimer_expires_remaining(&ctx->tmr);
60 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 58 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
61} 59}
62 60
@@ -74,7 +72,7 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
74 ctx->ticks = 0; 72 ctx->ticks = 0;
75 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 73 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
76 hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 74 hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
77 ctx->tmr.expires = texp; 75 hrtimer_set_expires(&ctx->tmr, texp);
78 ctx->tmr.function = timerfd_tmrproc; 76 ctx->tmr.function = timerfd_tmrproc;
79 if (texp.tv64 != 0) 77 if (texp.tv64 != 0)
80 hrtimer_start(&ctx->tmr, texp, htmode); 78 hrtimer_start(&ctx->tmr, texp, htmode);