aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/compat.c187
-rw-r--r--fs/select.c396
-rw-r--r--fs/timerfd.c8
3 files changed, 297 insertions, 294 deletions
diff --git a/fs/compat.c b/fs/compat.c
index 5f9ec449c799..3b58c32be526 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1475,6 +1475,57 @@ out_ret:
1475 1475
1476#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1476#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1477 1477
1478static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
1479 int timeval, int ret)
1480{
1481 struct timespec ts;
1482
1483 if (!p)
1484 return ret;
1485
1486 if (current->personality & STICKY_TIMEOUTS)
1487 goto sticky;
1488
1489 /* No update for zero timeout */
1490 if (!end_time->tv_sec && !end_time->tv_nsec)
1491 return ret;
1492
1493 ktime_get_ts(&ts);
1494 ts = timespec_sub(*end_time, ts);
1495 if (ts.tv_sec < 0)
1496 ts.tv_sec = ts.tv_nsec = 0;
1497
1498 if (timeval) {
1499 struct compat_timeval rtv;
1500
1501 rtv.tv_sec = ts.tv_sec;
1502 rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
1503
1504 if (!copy_to_user(p, &rtv, sizeof(rtv)))
1505 return ret;
1506 } else {
1507 struct compat_timespec rts;
1508
1509 rts.tv_sec = ts.tv_sec;
1510 rts.tv_nsec = ts.tv_nsec;
1511
1512 if (!copy_to_user(p, &rts, sizeof(rts)))
1513 return ret;
1514 }
1515 /*
1516 * If an application puts its timeval in read-only memory, we
1517 * don't want the Linux-specific update to the timeval to
1518 * cause a fault after the select has completed
1519 * successfully. However, because we're not updating the
1520 * timeval, we can't restart the system call.
1521 */
1522
1523sticky:
1524 if (ret == -ERESTARTNOHAND)
1525 ret = -EINTR;
1526 return ret;
1527}
1528
1478/* 1529/*
1479 * Ooo, nasty. We need here to frob 32-bit unsigned longs to 1530 * Ooo, nasty. We need here to frob 32-bit unsigned longs to
1480 * 64-bit unsigned longs. 1531 * 64-bit unsigned longs.
@@ -1556,7 +1607,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1556 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 1607 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1557 1608
1558int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1609int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1559 compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) 1610 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1611 struct timespec *end_time)
1560{ 1612{
1561 fd_set_bits fds; 1613 fd_set_bits fds;
1562 void *bits; 1614 void *bits;
@@ -1603,7 +1655,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1603 zero_fd_set(n, fds.res_out); 1655 zero_fd_set(n, fds.res_out);
1604 zero_fd_set(n, fds.res_ex); 1656 zero_fd_set(n, fds.res_ex);
1605 1657
1606 ret = do_select(n, &fds, timeout); 1658 ret = do_select(n, &fds, end_time);
1607 1659
1608 if (ret < 0) 1660 if (ret < 0)
1609 goto out; 1661 goto out;
@@ -1629,7 +1681,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1629 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1681 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1630 struct compat_timeval __user *tvp) 1682 struct compat_timeval __user *tvp)
1631{ 1683{
1632 s64 timeout = -1; 1684 struct timespec end_time, *to = NULL;
1633 struct compat_timeval tv; 1685 struct compat_timeval tv;
1634 int ret; 1686 int ret;
1635 1687
@@ -1637,43 +1689,14 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1637 if (copy_from_user(&tv, tvp, sizeof(tv))) 1689 if (copy_from_user(&tv, tvp, sizeof(tv)))
1638 return -EFAULT; 1690 return -EFAULT;
1639 1691
1640 if (tv.tv_sec < 0 || tv.tv_usec < 0) 1692 to = &end_time;
1693 if (poll_select_set_timeout(to, tv.tv_sec,
1694 tv.tv_usec * NSEC_PER_USEC))
1641 return -EINVAL; 1695 return -EINVAL;
1642
1643 /* Cast to u64 to make GCC stop complaining */
1644 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
1645 timeout = -1; /* infinite */
1646 else {
1647 timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ);
1648 timeout += tv.tv_sec * HZ;
1649 }
1650 } 1696 }
1651 1697
1652 ret = compat_core_sys_select(n, inp, outp, exp, &timeout); 1698 ret = compat_core_sys_select(n, inp, outp, exp, to);
1653 1699 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
1654 if (tvp) {
1655 struct compat_timeval rtv;
1656
1657 if (current->personality & STICKY_TIMEOUTS)
1658 goto sticky;
1659 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1660 rtv.tv_sec = timeout;
1661 if (compat_timeval_compare(&rtv, &tv) >= 0)
1662 rtv = tv;
1663 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
1664sticky:
1665 /*
1666 * If an application puts its timeval in read-only
1667 * memory, we don't want the Linux-specific update to
1668 * the timeval to cause a fault after the select has
1669 * completed successfully. However, because we're not
1670 * updating the timeval, we can't restart the system
1671 * call.
1672 */
1673 if (ret == -ERESTARTNOHAND)
1674 ret = -EINTR;
1675 }
1676 }
1677 1700
1678 return ret; 1701 return ret;
1679} 1702}
@@ -1686,15 +1709,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1686{ 1709{
1687 compat_sigset_t ss32; 1710 compat_sigset_t ss32;
1688 sigset_t ksigmask, sigsaved; 1711 sigset_t ksigmask, sigsaved;
1689 s64 timeout = MAX_SCHEDULE_TIMEOUT;
1690 struct compat_timespec ts; 1712 struct compat_timespec ts;
1713 struct timespec end_time, *to = NULL;
1691 int ret; 1714 int ret;
1692 1715
1693 if (tsp) { 1716 if (tsp) {
1694 if (copy_from_user(&ts, tsp, sizeof(ts))) 1717 if (copy_from_user(&ts, tsp, sizeof(ts)))
1695 return -EFAULT; 1718 return -EFAULT;
1696 1719
1697 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 1720 to = &end_time;
1721 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1698 return -EINVAL; 1722 return -EINVAL;
1699 } 1723 }
1700 1724
@@ -1709,51 +1733,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1709 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1733 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1710 } 1734 }
1711 1735
1712 do { 1736 ret = compat_core_sys_select(n, inp, outp, exp, to);
1713 if (tsp) { 1737 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1714 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
1715 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1716 timeout += ts.tv_sec * (unsigned long)HZ;
1717 ts.tv_sec = 0;
1718 ts.tv_nsec = 0;
1719 } else {
1720 ts.tv_sec -= MAX_SELECT_SECONDS;
1721 timeout = MAX_SELECT_SECONDS * HZ;
1722 }
1723 }
1724
1725 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1726
1727 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1728
1729 if (tsp) {
1730 struct compat_timespec rts;
1731
1732 if (current->personality & STICKY_TIMEOUTS)
1733 goto sticky;
1734
1735 rts.tv_sec = timeout / HZ;
1736 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
1737 if (rts.tv_nsec >= NSEC_PER_SEC) {
1738 rts.tv_sec++;
1739 rts.tv_nsec -= NSEC_PER_SEC;
1740 }
1741 if (compat_timespec_compare(&rts, &ts) >= 0)
1742 rts = ts;
1743 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1744sticky:
1745 /*
1746 * If an application puts its timeval in read-only
1747 * memory, we don't want the Linux-specific update to
1748 * the timeval to cause a fault after the select has
1749 * completed successfully. However, because we're not
1750 * updating the timeval, we can't restart the system
1751 * call.
1752 */
1753 if (ret == -ERESTARTNOHAND)
1754 ret = -EINTR;
1755 }
1756 }
1757 1738
1758 if (ret == -ERESTARTNOHAND) { 1739 if (ret == -ERESTARTNOHAND) {
1759 /* 1740 /*
@@ -1798,18 +1779,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1798 compat_sigset_t ss32; 1779 compat_sigset_t ss32;
1799 sigset_t ksigmask, sigsaved; 1780 sigset_t ksigmask, sigsaved;
1800 struct compat_timespec ts; 1781 struct compat_timespec ts;
1801 s64 timeout = -1; 1782 struct timespec end_time, *to = NULL;
1802 int ret; 1783 int ret;
1803 1784
1804 if (tsp) { 1785 if (tsp) {
1805 if (copy_from_user(&ts, tsp, sizeof(ts))) 1786 if (copy_from_user(&ts, tsp, sizeof(ts)))
1806 return -EFAULT; 1787 return -EFAULT;
1807 1788
1808 /* We assume that ts.tv_sec is always lower than 1789 to = &end_time;
1809 the number of seconds that can be expressed in 1790 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1810 an s64. Otherwise the compiler bitches at us */ 1791 return -EINVAL;
1811 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1812 timeout += ts.tv_sec * HZ;
1813 } 1792 }
1814 1793
1815 if (sigmask) { 1794 if (sigmask) {
@@ -1823,7 +1802,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1823 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1802 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1824 } 1803 }
1825 1804
1826 ret = do_sys_poll(ufds, nfds, &timeout); 1805 ret = do_sys_poll(ufds, nfds, to);
1827 1806
1828 /* We can restart this syscall, usually */ 1807 /* We can restart this syscall, usually */
1829 if (ret == -EINTR) { 1808 if (ret == -EINTR) {
@@ -1841,31 +1820,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1841 } else if (sigmask) 1820 } else if (sigmask)
1842 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1821 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1843 1822
1844 if (tsp && timeout >= 0) { 1823 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1845 struct compat_timespec rts;
1846
1847 if (current->personality & STICKY_TIMEOUTS)
1848 goto sticky;
1849 /* Yes, we know it's actually an s64, but it's also positive. */
1850 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
1851 1000;
1852 rts.tv_sec = timeout;
1853 if (compat_timespec_compare(&rts, &ts) >= 0)
1854 rts = ts;
1855 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1856sticky:
1857 /*
1858 * If an application puts its timeval in read-only
1859 * memory, we don't want the Linux-specific update to
1860 * the timeval to cause a fault after the select has
1861 * completed successfully. However, because we're not
1862 * updating the timeval, we can't restart the system
1863 * call.
1864 */
1865 if (ret == -ERESTARTNOHAND && timeout >= 0)
1866 ret = -EINTR;
1867 }
1868 }
1869 1824
1870 return ret; 1825 return ret;
1871} 1826}
diff --git a/fs/select.c b/fs/select.c
index da0e88201c3a..448e44001286 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -24,9 +24,64 @@
24#include <linux/fdtable.h> 24#include <linux/fdtable.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/hrtimer.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
31
32/*
33 * Estimate expected accuracy in ns from a timeval.
34 *
35 * After quite a bit of churning around, we've settled on
36 * a simple thing of taking 0.1% of the timeout as the
37 * slack, with a cap of 100 msec.
38 * "nice" tasks get a 0.5% slack instead.
39 *
40 * Consider this comment an open invitation to come up with even
41 * better solutions..
42 */
43
44static long __estimate_accuracy(struct timespec *tv)
45{
46 long slack;
47 int divfactor = 1000;
48
49 if (task_nice(current) > 0)
50 divfactor = divfactor / 5;
51
52 slack = tv->tv_nsec / divfactor;
53 slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
54
55 if (slack > 100 * NSEC_PER_MSEC)
56 slack = 100 * NSEC_PER_MSEC;
57
58 if (slack < 0)
59 slack = 0;
60 return slack;
61}
62
63static long estimate_accuracy(struct timespec *tv)
64{
65 unsigned long ret;
66 struct timespec now;
67
68 /*
69 * Realtime tasks get a slack of 0 for obvious reasons.
70 */
71
72 if (rt_task(current))
73 return 0;
74
75 ktime_get_ts(&now);
76 now = timespec_sub(*tv, now);
77 ret = __estimate_accuracy(&now);
78 if (ret < current->timer_slack_ns)
79 return current->timer_slack_ns;
80 return ret;
81}
82
83
84
30struct poll_table_page { 85struct poll_table_page {
31 struct poll_table_page * next; 86 struct poll_table_page * next;
32 struct poll_table_entry * entry; 87 struct poll_table_entry * entry;
@@ -130,6 +185,79 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
130 add_wait_queue(wait_address, &entry->wait); 185 add_wait_queue(wait_address, &entry->wait);
131} 186}
132 187
188/**
189 * poll_select_set_timeout - helper function to setup the timeout value
190 * @to: pointer to timespec variable for the final timeout
191 * @sec: seconds (from user space)
192 * @nsec: nanoseconds (from user space)
193 *
194 * Note, we do not use a timespec for the user space value here, That
195 * way we can use the function for timeval and compat interfaces as well.
196 *
197 * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
198 */
199int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
200{
201 struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
202
203 if (!timespec_valid(&ts))
204 return -EINVAL;
205
206 /* Optimize for the zero timeout value here */
207 if (!sec && !nsec) {
208 to->tv_sec = to->tv_nsec = 0;
209 } else {
210 ktime_get_ts(to);
211 *to = timespec_add_safe(*to, ts);
212 }
213 return 0;
214}
215
216static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
217 int timeval, int ret)
218{
219 struct timespec rts;
220 struct timeval rtv;
221
222 if (!p)
223 return ret;
224
225 if (current->personality & STICKY_TIMEOUTS)
226 goto sticky;
227
228 /* No update for zero timeout */
229 if (!end_time->tv_sec && !end_time->tv_nsec)
230 return ret;
231
232 ktime_get_ts(&rts);
233 rts = timespec_sub(*end_time, rts);
234 if (rts.tv_sec < 0)
235 rts.tv_sec = rts.tv_nsec = 0;
236
237 if (timeval) {
238 rtv.tv_sec = rts.tv_sec;
239 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
240
241 if (!copy_to_user(p, &rtv, sizeof(rtv)))
242 return ret;
243
244 } else if (!copy_to_user(p, &rts, sizeof(rts)))
245 return ret;
246
247 /*
248 * If an application puts its timeval in read-only memory, we
249 * don't want the Linux-specific update to the timeval to
250 * cause a fault after the select has completed
251 * successfully. However, because we're not updating the
252 * timeval, we can't restart the system call.
253 */
254
255sticky:
256 if (ret == -ERESTARTNOHAND)
257 ret = -EINTR;
258 return ret;
259}
260
133#define FDS_IN(fds, n) (fds->in + n) 261#define FDS_IN(fds, n) (fds->in + n)
134#define FDS_OUT(fds, n) (fds->out + n) 262#define FDS_OUT(fds, n) (fds->out + n)
135#define FDS_EX(fds, n) (fds->ex + n) 263#define FDS_EX(fds, n) (fds->ex + n)
@@ -182,11 +310,13 @@ get_max:
182#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 310#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
183#define POLLEX_SET (POLLPRI) 311#define POLLEX_SET (POLLPRI)
184 312
185int do_select(int n, fd_set_bits *fds, s64 *timeout) 313int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
186{ 314{
315 ktime_t expire, *to = NULL;
187 struct poll_wqueues table; 316 struct poll_wqueues table;
188 poll_table *wait; 317 poll_table *wait;
189 int retval, i; 318 int retval, i, timed_out = 0;
319 unsigned long slack = 0;
190 320
191 rcu_read_lock(); 321 rcu_read_lock();
192 retval = max_select_fd(n, fds); 322 retval = max_select_fd(n, fds);
@@ -198,12 +328,17 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
198 328
199 poll_initwait(&table); 329 poll_initwait(&table);
200 wait = &table.pt; 330 wait = &table.pt;
201 if (!*timeout) 331 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
202 wait = NULL; 332 wait = NULL;
333 timed_out = 1;
334 }
335
336 if (end_time && !timed_out)
337 slack = estimate_accuracy(end_time);
338
203 retval = 0; 339 retval = 0;
204 for (;;) { 340 for (;;) {
205 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 341 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
206 long __timeout;
207 342
208 set_current_state(TASK_INTERRUPTIBLE); 343 set_current_state(TASK_INTERRUPTIBLE);
209 344
@@ -259,27 +394,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
259 cond_resched(); 394 cond_resched();
260 } 395 }
261 wait = NULL; 396 wait = NULL;
262 if (retval || !*timeout || signal_pending(current)) 397 if (retval || timed_out || signal_pending(current))
263 break; 398 break;
264 if (table.error) { 399 if (table.error) {
265 retval = table.error; 400 retval = table.error;
266 break; 401 break;
267 } 402 }
268 403
269 if (*timeout < 0) { 404 /*
270 /* Wait indefinitely */ 405 * If this is the first loop and we have a timeout
271 __timeout = MAX_SCHEDULE_TIMEOUT; 406 * given, then we convert to ktime_t and set the to
272 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { 407 * pointer to the expiry value.
273 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ 408 */
274 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 409 if (end_time && !to) {
275 *timeout -= __timeout; 410 expire = timespec_to_ktime(*end_time);
276 } else { 411 to = &expire;
277 __timeout = *timeout;
278 *timeout = 0;
279 } 412 }
280 __timeout = schedule_timeout(__timeout); 413
281 if (*timeout >= 0) 414 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
282 *timeout += __timeout; 415 timed_out = 1;
283 } 416 }
284 __set_current_state(TASK_RUNNING); 417 __set_current_state(TASK_RUNNING);
285 418
@@ -300,7 +433,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 433 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
301 434
302int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 435int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
303 fd_set __user *exp, s64 *timeout) 436 fd_set __user *exp, struct timespec *end_time)
304{ 437{
305 fd_set_bits fds; 438 fd_set_bits fds;
306 void *bits; 439 void *bits;
@@ -351,7 +484,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
351 zero_fd_set(n, fds.res_out); 484 zero_fd_set(n, fds.res_out);
352 zero_fd_set(n, fds.res_ex); 485 zero_fd_set(n, fds.res_ex);
353 486
354 ret = do_select(n, &fds, timeout); 487 ret = do_select(n, &fds, end_time);
355 488
356 if (ret < 0) 489 if (ret < 0)
357 goto out; 490 goto out;
@@ -377,7 +510,7 @@ out_nofds:
377asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 510asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
378 fd_set __user *exp, struct timeval __user *tvp) 511 fd_set __user *exp, struct timeval __user *tvp)
379{ 512{
380 s64 timeout = -1; 513 struct timespec end_time, *to = NULL;
381 struct timeval tv; 514 struct timeval tv;
382 int ret; 515 int ret;
383 516
@@ -385,43 +518,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
385 if (copy_from_user(&tv, tvp, sizeof(tv))) 518 if (copy_from_user(&tv, tvp, sizeof(tv)))
386 return -EFAULT; 519 return -EFAULT;
387 520
388 if (tv.tv_sec < 0 || tv.tv_usec < 0) 521 to = &end_time;
522 if (poll_select_set_timeout(to, tv.tv_sec,
523 tv.tv_usec * NSEC_PER_USEC))
389 return -EINVAL; 524 return -EINVAL;
390
391 /* Cast to u64 to make GCC stop complaining */
392 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
393 timeout = -1; /* infinite */
394 else {
395 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
396 timeout += tv.tv_sec * HZ;
397 }
398 } 525 }
399 526
400 ret = core_sys_select(n, inp, outp, exp, &timeout); 527 ret = core_sys_select(n, inp, outp, exp, to);
401 528 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
402 if (tvp) {
403 struct timeval rtv;
404
405 if (current->personality & STICKY_TIMEOUTS)
406 goto sticky;
407 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
408 rtv.tv_sec = timeout;
409 if (timeval_compare(&rtv, &tv) >= 0)
410 rtv = tv;
411 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
412sticky:
413 /*
414 * If an application puts its timeval in read-only
415 * memory, we don't want the Linux-specific update to
416 * the timeval to cause a fault after the select has
417 * completed successfully. However, because we're not
418 * updating the timeval, we can't restart the system
419 * call.
420 */
421 if (ret == -ERESTARTNOHAND)
422 ret = -EINTR;
423 }
424 }
425 529
426 return ret; 530 return ret;
427} 531}
@@ -431,25 +535,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
431 fd_set __user *exp, struct timespec __user *tsp, 535 fd_set __user *exp, struct timespec __user *tsp,
432 const sigset_t __user *sigmask, size_t sigsetsize) 536 const sigset_t __user *sigmask, size_t sigsetsize)
433{ 537{
434 s64 timeout = MAX_SCHEDULE_TIMEOUT;
435 sigset_t ksigmask, sigsaved; 538 sigset_t ksigmask, sigsaved;
436 struct timespec ts; 539 struct timespec ts, end_time, *to = NULL;
437 int ret; 540 int ret;
438 541
439 if (tsp) { 542 if (tsp) {
440 if (copy_from_user(&ts, tsp, sizeof(ts))) 543 if (copy_from_user(&ts, tsp, sizeof(ts)))
441 return -EFAULT; 544 return -EFAULT;
442 545
443 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 546 to = &end_time;
547 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
444 return -EINVAL; 548 return -EINVAL;
445
446 /* Cast to u64 to make GCC stop complaining */
447 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
448 timeout = -1; /* infinite */
449 else {
450 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
451 timeout += ts.tv_sec * HZ;
452 }
453 } 549 }
454 550
455 if (sigmask) { 551 if (sigmask) {
@@ -463,32 +559,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
463 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 559 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
464 } 560 }
465 561
466 ret = core_sys_select(n, inp, outp, exp, &timeout); 562 ret = core_sys_select(n, inp, outp, exp, &end_time);
467 563 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
468 if (tsp) {
469 struct timespec rts;
470
471 if (current->personality & STICKY_TIMEOUTS)
472 goto sticky;
473 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
474 1000;
475 rts.tv_sec = timeout;
476 if (timespec_compare(&rts, &ts) >= 0)
477 rts = ts;
478 if (copy_to_user(tsp, &rts, sizeof(rts))) {
479sticky:
480 /*
481 * If an application puts its timeval in read-only
482 * memory, we don't want the Linux-specific update to
483 * the timeval to cause a fault after the select has
484 * completed successfully. However, because we're not
485 * updating the timeval, we can't restart the system
486 * call.
487 */
488 if (ret == -ERESTARTNOHAND)
489 ret = -EINTR;
490 }
491 }
492 564
493 if (ret == -ERESTARTNOHAND) { 565 if (ret == -ERESTARTNOHAND) {
494 /* 566 /*
@@ -574,18 +646,24 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
574} 646}
575 647
576static int do_poll(unsigned int nfds, struct poll_list *list, 648static int do_poll(unsigned int nfds, struct poll_list *list,
577 struct poll_wqueues *wait, s64 *timeout) 649 struct poll_wqueues *wait, struct timespec *end_time)
578{ 650{
579 int count = 0;
580 poll_table* pt = &wait->pt; 651 poll_table* pt = &wait->pt;
652 ktime_t expire, *to = NULL;
653 int timed_out = 0, count = 0;
654 unsigned long slack = 0;
581 655
582 /* Optimise the no-wait case */ 656 /* Optimise the no-wait case */
583 if (!(*timeout)) 657 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
584 pt = NULL; 658 pt = NULL;
659 timed_out = 1;
660 }
661
662 if (end_time && !timed_out)
663 slack = estimate_accuracy(end_time);
585 664
586 for (;;) { 665 for (;;) {
587 struct poll_list *walk; 666 struct poll_list *walk;
588 long __timeout;
589 667
590 set_current_state(TASK_INTERRUPTIBLE); 668 set_current_state(TASK_INTERRUPTIBLE);
591 for (walk = list; walk != NULL; walk = walk->next) { 669 for (walk = list; walk != NULL; walk = walk->next) {
@@ -617,27 +695,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
617 if (signal_pending(current)) 695 if (signal_pending(current))
618 count = -EINTR; 696 count = -EINTR;
619 } 697 }
620 if (count || !*timeout) 698 if (count || timed_out)
621 break; 699 break;
622 700
623 if (*timeout < 0) { 701 /*
624 /* Wait indefinitely */ 702 * If this is the first loop and we have a timeout
625 __timeout = MAX_SCHEDULE_TIMEOUT; 703 * given, then we convert to ktime_t and set the to
626 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { 704 * pointer to the expiry value.
627 /* 705 */
628 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in 706 if (end_time && !to) {
629 * a loop 707 expire = timespec_to_ktime(*end_time);
630 */ 708 to = &expire;
631 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
632 *timeout -= __timeout;
633 } else {
634 __timeout = *timeout;
635 *timeout = 0;
636 } 709 }
637 710
638 __timeout = schedule_timeout(__timeout); 711 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
639 if (*timeout >= 0) 712 timed_out = 1;
640 *timeout += __timeout;
641 } 713 }
642 __set_current_state(TASK_RUNNING); 714 __set_current_state(TASK_RUNNING);
643 return count; 715 return count;
@@ -646,7 +718,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
646#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 718#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
647 sizeof(struct pollfd)) 719 sizeof(struct pollfd))
648 720
649int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 721int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
722 struct timespec *end_time)
650{ 723{
651 struct poll_wqueues table; 724 struct poll_wqueues table;
652 int err = -EFAULT, fdcount, len, size; 725 int err = -EFAULT, fdcount, len, size;
@@ -686,7 +759,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
686 } 759 }
687 760
688 poll_initwait(&table); 761 poll_initwait(&table);
689 fdcount = do_poll(nfds, head, &table, timeout); 762 fdcount = do_poll(nfds, head, &table, end_time);
690 poll_freewait(&table); 763 poll_freewait(&table);
691 764
692 for (walk = head; walk; walk = walk->next) { 765 for (walk = head; walk; walk = walk->next) {
@@ -712,16 +785,21 @@ out_fds:
712 785
713static long do_restart_poll(struct restart_block *restart_block) 786static long do_restart_poll(struct restart_block *restart_block)
714{ 787{
715 struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; 788 struct pollfd __user *ufds = restart_block->poll.ufds;
716 int nfds = restart_block->arg1; 789 int nfds = restart_block->poll.nfds;
717 s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; 790 struct timespec *to = NULL, end_time;
718 int ret; 791 int ret;
719 792
720 ret = do_sys_poll(ufds, nfds, &timeout); 793 if (restart_block->poll.has_timeout) {
794 end_time.tv_sec = restart_block->poll.tv_sec;
795 end_time.tv_nsec = restart_block->poll.tv_nsec;
796 to = &end_time;
797 }
798
799 ret = do_sys_poll(ufds, nfds, to);
800
721 if (ret == -EINTR) { 801 if (ret == -EINTR) {
722 restart_block->fn = do_restart_poll; 802 restart_block->fn = do_restart_poll;
723 restart_block->arg2 = timeout & 0xFFFFFFFF;
724 restart_block->arg3 = (u64)timeout >> 32;
725 ret = -ERESTART_RESTARTBLOCK; 803 ret = -ERESTART_RESTARTBLOCK;
726 } 804 }
727 return ret; 805 return ret;
@@ -730,31 +808,32 @@ static long do_restart_poll(struct restart_block *restart_block)
730asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 808asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
731 long timeout_msecs) 809 long timeout_msecs)
732{ 810{
733 s64 timeout_jiffies; 811 struct timespec end_time, *to = NULL;
734 int ret; 812 int ret;
735 813
736 if (timeout_msecs > 0) { 814 if (timeout_msecs >= 0) {
737#if HZ > 1000 815 to = &end_time;
738 /* We can only overflow if HZ > 1000 */ 816 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
739 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 817 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
740 timeout_jiffies = -1;
741 else
742#endif
743 timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1;
744 } else {
745 /* Infinite (< 0) or no (0) timeout */
746 timeout_jiffies = timeout_msecs;
747 } 818 }
748 819
749 ret = do_sys_poll(ufds, nfds, &timeout_jiffies); 820 ret = do_sys_poll(ufds, nfds, to);
821
750 if (ret == -EINTR) { 822 if (ret == -EINTR) {
751 struct restart_block *restart_block; 823 struct restart_block *restart_block;
824
752 restart_block = &current_thread_info()->restart_block; 825 restart_block = &current_thread_info()->restart_block;
753 restart_block->fn = do_restart_poll; 826 restart_block->fn = do_restart_poll;
754 restart_block->arg0 = (unsigned long)ufds; 827 restart_block->poll.ufds = ufds;
755 restart_block->arg1 = nfds; 828 restart_block->poll.nfds = nfds;
756 restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; 829
757 restart_block->arg3 = (u64)timeout_jiffies >> 32; 830 if (timeout_msecs >= 0) {
831 restart_block->poll.tv_sec = end_time.tv_sec;
832 restart_block->poll.tv_nsec = end_time.tv_nsec;
833 restart_block->poll.has_timeout = 1;
834 } else
835 restart_block->poll.has_timeout = 0;
836
758 ret = -ERESTART_RESTARTBLOCK; 837 ret = -ERESTART_RESTARTBLOCK;
759 } 838 }
760 return ret; 839 return ret;
@@ -766,21 +845,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
766 size_t sigsetsize) 845 size_t sigsetsize)
767{ 846{
768 sigset_t ksigmask, sigsaved; 847 sigset_t ksigmask, sigsaved;
769 struct timespec ts; 848 struct timespec ts, end_time, *to = NULL;
770 s64 timeout = -1;
771 int ret; 849 int ret;
772 850
773 if (tsp) { 851 if (tsp) {
774 if (copy_from_user(&ts, tsp, sizeof(ts))) 852 if (copy_from_user(&ts, tsp, sizeof(ts)))
775 return -EFAULT; 853 return -EFAULT;
776 854
777 /* Cast to u64 to make GCC stop complaining */ 855 to = &end_time;
778 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 856 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
779 timeout = -1; /* infinite */ 857 return -EINVAL;
780 else {
781 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
782 timeout += ts.tv_sec * HZ;
783 }
784 } 858 }
785 859
786 if (sigmask) { 860 if (sigmask) {
@@ -794,7 +868,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
794 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 868 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
795 } 869 }
796 870
797 ret = do_sys_poll(ufds, nfds, &timeout); 871 ret = do_sys_poll(ufds, nfds, to);
798 872
799 /* We can restart this syscall, usually */ 873 /* We can restart this syscall, usually */
800 if (ret == -EINTR) { 874 if (ret == -EINTR) {
@@ -812,31 +886,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
812 } else if (sigmask) 886 } else if (sigmask)
813 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 887 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
814 888
815 if (tsp && timeout >= 0) { 889 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
816 struct timespec rts;
817
818 if (current->personality & STICKY_TIMEOUTS)
819 goto sticky;
820 /* Yes, we know it's actually an s64, but it's also positive. */
821 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
822 1000;
823 rts.tv_sec = timeout;
824 if (timespec_compare(&rts, &ts) >= 0)
825 rts = ts;
826 if (copy_to_user(tsp, &rts, sizeof(rts))) {
827 sticky:
828 /*
829 * If an application puts its timeval in read-only
830 * memory, we don't want the Linux-specific update to
831 * the timeval to cause a fault after the select has
832 * completed successfully. However, because we're not
833 * updating the timeval, we can't restart the system
834 * call.
835 */
836 if (ret == -ERESTARTNOHAND && timeout >= 0)
837 ret = -EINTR;
838 }
839 }
840 890
841 return ret; 891 return ret;
842} 892}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index c502c60e4f54..0862f0e49d0c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -52,11 +52,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
52 52
53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
54{ 54{
55 ktime_t now, remaining; 55 ktime_t remaining;
56
57 now = ctx->tmr.base->get_time();
58 remaining = ktime_sub(ctx->tmr.expires, now);
59 56
57 remaining = hrtimer_expires_remaining(&ctx->tmr);
60 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 58 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
61} 59}
62 60
@@ -74,7 +72,7 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
74 ctx->ticks = 0; 72 ctx->ticks = 0;
75 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 73 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
76 hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 74 hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
77 ctx->tmr.expires = texp; 75 hrtimer_set_expires(&ctx->tmr, texp);
78 ctx->tmr.function = timerfd_tmrproc; 76 ctx->tmr.function = timerfd_tmrproc;
79 if (texp.tv64 != 0) 77 if (texp.tv64 != 0)
80 hrtimer_start(&ctx->tmr, texp, htmode); 78 hrtimer_start(&ctx->tmr, texp, htmode);