aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArjan van de Ven <arjan@linux.intel.com>2008-08-31 11:26:40 -0400
committerArjan van de Ven <arjan@linux.intel.com>2008-09-06 00:35:03 -0400
commit8ff3e8e85fa6c312051134b3953e397feb639f51 (patch)
tree526886377ca92a62f030c25bc8f91f13a6f991eb
parentbe5dad20a55e054a35dac7f6f5f184dc72b379b4 (diff)
select: switch select() and poll() over to hrtimers
With lots of help, input and cleanups from Thomas Gleixner This patch switches select() and poll() over to hrtimers. The core of the patch is replacing the "s64 timeout" with a "struct timespec end_time" in all the plumbing. But most of the diffstat comes from using the just introduced helpers: poll_select_set_timeout poll_select_copy_remaining timespec_add_safe which make manipulating the timespec easier and less error-prone. Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--fs/compat.c136
-rw-r--r--fs/select.c263
-rw-r--r--include/linux/poll.h6
3 files changed, 111 insertions, 294 deletions
diff --git a/fs/compat.c b/fs/compat.c
index 424767c954a0..133ed7f5d681 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1568,7 +1568,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1568 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 1568 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1569 1569
1570int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1570int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1571 compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) 1571 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1572 struct timespec *end_time)
1572{ 1573{
1573 fd_set_bits fds; 1574 fd_set_bits fds;
1574 void *bits; 1575 void *bits;
@@ -1615,7 +1616,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1615 zero_fd_set(n, fds.res_out); 1616 zero_fd_set(n, fds.res_out);
1616 zero_fd_set(n, fds.res_ex); 1617 zero_fd_set(n, fds.res_ex);
1617 1618
1618 ret = do_select(n, &fds, timeout); 1619 ret = do_select(n, &fds, end_time);
1619 1620
1620 if (ret < 0) 1621 if (ret < 0)
1621 goto out; 1622 goto out;
@@ -1641,7 +1642,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1641 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1642 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1642 struct compat_timeval __user *tvp) 1643 struct compat_timeval __user *tvp)
1643{ 1644{
1644 s64 timeout = -1; 1645 struct timespec end_time, *to = NULL;
1645 struct compat_timeval tv; 1646 struct compat_timeval tv;
1646 int ret; 1647 int ret;
1647 1648
@@ -1649,43 +1650,14 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1649 if (copy_from_user(&tv, tvp, sizeof(tv))) 1650 if (copy_from_user(&tv, tvp, sizeof(tv)))
1650 return -EFAULT; 1651 return -EFAULT;
1651 1652
1652 if (tv.tv_sec < 0 || tv.tv_usec < 0) 1653 to = &end_time;
1654 if (poll_select_set_timeout(to, tv.tv_sec,
1655 tv.tv_usec * NSEC_PER_USEC))
1653 return -EINVAL; 1656 return -EINVAL;
1654
1655 /* Cast to u64 to make GCC stop complaining */
1656 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
1657 timeout = -1; /* infinite */
1658 else {
1659 timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ);
1660 timeout += tv.tv_sec * HZ;
1661 }
1662 } 1657 }
1663 1658
1664 ret = compat_core_sys_select(n, inp, outp, exp, &timeout); 1659 ret = compat_core_sys_select(n, inp, outp, exp, to);
1665 1660 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
1666 if (tvp) {
1667 struct compat_timeval rtv;
1668
1669 if (current->personality & STICKY_TIMEOUTS)
1670 goto sticky;
1671 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1672 rtv.tv_sec = timeout;
1673 if (compat_timeval_compare(&rtv, &tv) >= 0)
1674 rtv = tv;
1675 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
1676sticky:
1677 /*
1678 * If an application puts its timeval in read-only
1679 * memory, we don't want the Linux-specific update to
1680 * the timeval to cause a fault after the select has
1681 * completed successfully. However, because we're not
1682 * updating the timeval, we can't restart the system
1683 * call.
1684 */
1685 if (ret == -ERESTARTNOHAND)
1686 ret = -EINTR;
1687 }
1688 }
1689 1661
1690 return ret; 1662 return ret;
1691} 1663}
@@ -1698,15 +1670,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1698{ 1670{
1699 compat_sigset_t ss32; 1671 compat_sigset_t ss32;
1700 sigset_t ksigmask, sigsaved; 1672 sigset_t ksigmask, sigsaved;
1701 s64 timeout = MAX_SCHEDULE_TIMEOUT;
1702 struct compat_timespec ts; 1673 struct compat_timespec ts;
1674 struct timespec end_time, *to = NULL;
1703 int ret; 1675 int ret;
1704 1676
1705 if (tsp) { 1677 if (tsp) {
1706 if (copy_from_user(&ts, tsp, sizeof(ts))) 1678 if (copy_from_user(&ts, tsp, sizeof(ts)))
1707 return -EFAULT; 1679 return -EFAULT;
1708 1680
1709 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 1681 to = &end_time;
1682 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1710 return -EINVAL; 1683 return -EINVAL;
1711 } 1684 }
1712 1685
@@ -1721,51 +1694,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1721 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1694 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1722 } 1695 }
1723 1696
1724 do { 1697 ret = compat_core_sys_select(n, inp, outp, exp, to);
1725 if (tsp) { 1698 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1726 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
1727 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1728 timeout += ts.tv_sec * (unsigned long)HZ;
1729 ts.tv_sec = 0;
1730 ts.tv_nsec = 0;
1731 } else {
1732 ts.tv_sec -= MAX_SELECT_SECONDS;
1733 timeout = MAX_SELECT_SECONDS * HZ;
1734 }
1735 }
1736
1737 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1738
1739 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1740
1741 if (tsp) {
1742 struct compat_timespec rts;
1743
1744 if (current->personality & STICKY_TIMEOUTS)
1745 goto sticky;
1746
1747 rts.tv_sec = timeout / HZ;
1748 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
1749 if (rts.tv_nsec >= NSEC_PER_SEC) {
1750 rts.tv_sec++;
1751 rts.tv_nsec -= NSEC_PER_SEC;
1752 }
1753 if (compat_timespec_compare(&rts, &ts) >= 0)
1754 rts = ts;
1755 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1756sticky:
1757 /*
1758 * If an application puts its timeval in read-only
1759 * memory, we don't want the Linux-specific update to
1760 * the timeval to cause a fault after the select has
1761 * completed successfully. However, because we're not
1762 * updating the timeval, we can't restart the system
1763 * call.
1764 */
1765 if (ret == -ERESTARTNOHAND)
1766 ret = -EINTR;
1767 }
1768 }
1769 1699
1770 if (ret == -ERESTARTNOHAND) { 1700 if (ret == -ERESTARTNOHAND) {
1771 /* 1701 /*
@@ -1810,18 +1740,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1810 compat_sigset_t ss32; 1740 compat_sigset_t ss32;
1811 sigset_t ksigmask, sigsaved; 1741 sigset_t ksigmask, sigsaved;
1812 struct compat_timespec ts; 1742 struct compat_timespec ts;
1813 s64 timeout = -1; 1743 struct timespec end_time, *to = NULL;
1814 int ret; 1744 int ret;
1815 1745
1816 if (tsp) { 1746 if (tsp) {
1817 if (copy_from_user(&ts, tsp, sizeof(ts))) 1747 if (copy_from_user(&ts, tsp, sizeof(ts)))
1818 return -EFAULT; 1748 return -EFAULT;
1819 1749
1820 /* We assume that ts.tv_sec is always lower than 1750 to = &end_time;
1821 the number of seconds that can be expressed in 1751 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1822 an s64. Otherwise the compiler bitches at us */ 1752 return -EINVAL;
1823 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1824 timeout += ts.tv_sec * HZ;
1825 } 1753 }
1826 1754
1827 if (sigmask) { 1755 if (sigmask) {
@@ -1835,7 +1763,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1835 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1763 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1836 } 1764 }
1837 1765
1838 ret = do_sys_poll(ufds, nfds, &timeout); 1766 ret = do_sys_poll(ufds, nfds, to);
1839 1767
1840 /* We can restart this syscall, usually */ 1768 /* We can restart this syscall, usually */
1841 if (ret == -EINTR) { 1769 if (ret == -EINTR) {
@@ -1853,31 +1781,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1853 } else if (sigmask) 1781 } else if (sigmask)
1854 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1782 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1855 1783
1856 if (tsp && timeout >= 0) { 1784 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1857 struct compat_timespec rts;
1858
1859 if (current->personality & STICKY_TIMEOUTS)
1860 goto sticky;
1861 /* Yes, we know it's actually an s64, but it's also positive. */
1862 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
1863 1000;
1864 rts.tv_sec = timeout;
1865 if (compat_timespec_compare(&rts, &ts) >= 0)
1866 rts = ts;
1867 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1868sticky:
1869 /*
1870 * If an application puts its timeval in read-only
1871 * memory, we don't want the Linux-specific update to
1872 * the timeval to cause a fault after the select has
1873 * completed successfully. However, because we're not
1874 * updating the timeval, we can't restart the system
1875 * call.
1876 */
1877 if (ret == -ERESTARTNOHAND && timeout >= 0)
1878 ret = -EINTR;
1879 }
1880 }
1881 1785
1882 return ret; 1786 return ret;
1883} 1787}
diff --git a/fs/select.c b/fs/select.c
index 1180a6207789..f6dceb56793f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -24,6 +24,7 @@
24#include <linux/fdtable.h> 24#include <linux/fdtable.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/hrtimer.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
@@ -203,8 +204,6 @@ sticky:
203 return ret; 204 return ret;
204} 205}
205 206
206
207
208#define FDS_IN(fds, n) (fds->in + n) 207#define FDS_IN(fds, n) (fds->in + n)
209#define FDS_OUT(fds, n) (fds->out + n) 208#define FDS_OUT(fds, n) (fds->out + n)
210#define FDS_EX(fds, n) (fds->ex + n) 209#define FDS_EX(fds, n) (fds->ex + n)
@@ -257,11 +256,12 @@ get_max:
257#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 256#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
258#define POLLEX_SET (POLLPRI) 257#define POLLEX_SET (POLLPRI)
259 258
260int do_select(int n, fd_set_bits *fds, s64 *timeout) 259int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
261{ 260{
261 ktime_t expire, *to = NULL;
262 struct poll_wqueues table; 262 struct poll_wqueues table;
263 poll_table *wait; 263 poll_table *wait;
264 int retval, i; 264 int retval, i, timed_out = 0;
265 265
266 rcu_read_lock(); 266 rcu_read_lock();
267 retval = max_select_fd(n, fds); 267 retval = max_select_fd(n, fds);
@@ -273,12 +273,14 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
273 273
274 poll_initwait(&table); 274 poll_initwait(&table);
275 wait = &table.pt; 275 wait = &table.pt;
276 if (!*timeout) 276 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
277 wait = NULL; 277 wait = NULL;
278 timed_out = 1;
279 }
280
278 retval = 0; 281 retval = 0;
279 for (;;) { 282 for (;;) {
280 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 283 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
281 long __timeout;
282 284
283 set_current_state(TASK_INTERRUPTIBLE); 285 set_current_state(TASK_INTERRUPTIBLE);
284 286
@@ -334,27 +336,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
334 cond_resched(); 336 cond_resched();
335 } 337 }
336 wait = NULL; 338 wait = NULL;
337 if (retval || !*timeout || signal_pending(current)) 339 if (retval || timed_out || signal_pending(current))
338 break; 340 break;
339 if (table.error) { 341 if (table.error) {
340 retval = table.error; 342 retval = table.error;
341 break; 343 break;
342 } 344 }
343 345
344 if (*timeout < 0) { 346 /*
345 /* Wait indefinitely */ 347 * If this is the first loop and we have a timeout
346 __timeout = MAX_SCHEDULE_TIMEOUT; 348 * given, then we convert to ktime_t and set the to
347 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { 349 * pointer to the expiry value.
348 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ 350 */
349 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 351 if (end_time && !to) {
350 *timeout -= __timeout; 352 expire = timespec_to_ktime(*end_time);
351 } else { 353 to = &expire;
352 __timeout = *timeout;
353 *timeout = 0;
354 } 354 }
355 __timeout = schedule_timeout(__timeout); 355
356 if (*timeout >= 0) 356 if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS))
357 *timeout += __timeout; 357 timed_out = 1;
358 } 358 }
359 __set_current_state(TASK_RUNNING); 359 __set_current_state(TASK_RUNNING);
360 360
@@ -375,7 +375,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
375 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 375 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
376 376
377int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 377int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
378 fd_set __user *exp, s64 *timeout) 378 fd_set __user *exp, struct timespec *end_time)
379{ 379{
380 fd_set_bits fds; 380 fd_set_bits fds;
381 void *bits; 381 void *bits;
@@ -426,7 +426,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
426 zero_fd_set(n, fds.res_out); 426 zero_fd_set(n, fds.res_out);
427 zero_fd_set(n, fds.res_ex); 427 zero_fd_set(n, fds.res_ex);
428 428
429 ret = do_select(n, &fds, timeout); 429 ret = do_select(n, &fds, end_time);
430 430
431 if (ret < 0) 431 if (ret < 0)
432 goto out; 432 goto out;
@@ -452,7 +452,7 @@ out_nofds:
452asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 452asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
453 fd_set __user *exp, struct timeval __user *tvp) 453 fd_set __user *exp, struct timeval __user *tvp)
454{ 454{
455 s64 timeout = -1; 455 struct timespec end_time, *to = NULL;
456 struct timeval tv; 456 struct timeval tv;
457 int ret; 457 int ret;
458 458
@@ -460,43 +460,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
460 if (copy_from_user(&tv, tvp, sizeof(tv))) 460 if (copy_from_user(&tv, tvp, sizeof(tv)))
461 return -EFAULT; 461 return -EFAULT;
462 462
463 if (tv.tv_sec < 0 || tv.tv_usec < 0) 463 to = &end_time;
464 if (poll_select_set_timeout(to, tv.tv_sec,
465 tv.tv_usec * NSEC_PER_USEC))
464 return -EINVAL; 466 return -EINVAL;
465
466 /* Cast to u64 to make GCC stop complaining */
467 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
468 timeout = -1; /* infinite */
469 else {
470 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
471 timeout += tv.tv_sec * HZ;
472 }
473 } 467 }
474 468
475 ret = core_sys_select(n, inp, outp, exp, &timeout); 469 ret = core_sys_select(n, inp, outp, exp, to);
476 470 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
477 if (tvp) {
478 struct timeval rtv;
479
480 if (current->personality & STICKY_TIMEOUTS)
481 goto sticky;
482 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
483 rtv.tv_sec = timeout;
484 if (timeval_compare(&rtv, &tv) >= 0)
485 rtv = tv;
486 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
487sticky:
488 /*
489 * If an application puts its timeval in read-only
490 * memory, we don't want the Linux-specific update to
491 * the timeval to cause a fault after the select has
492 * completed successfully. However, because we're not
493 * updating the timeval, we can't restart the system
494 * call.
495 */
496 if (ret == -ERESTARTNOHAND)
497 ret = -EINTR;
498 }
499 }
500 471
501 return ret; 472 return ret;
502} 473}
@@ -506,25 +477,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
506 fd_set __user *exp, struct timespec __user *tsp, 477 fd_set __user *exp, struct timespec __user *tsp,
507 const sigset_t __user *sigmask, size_t sigsetsize) 478 const sigset_t __user *sigmask, size_t sigsetsize)
508{ 479{
509 s64 timeout = MAX_SCHEDULE_TIMEOUT;
510 sigset_t ksigmask, sigsaved; 480 sigset_t ksigmask, sigsaved;
511 struct timespec ts; 481 struct timespec ts, end_time, *to = NULL;
512 int ret; 482 int ret;
513 483
514 if (tsp) { 484 if (tsp) {
515 if (copy_from_user(&ts, tsp, sizeof(ts))) 485 if (copy_from_user(&ts, tsp, sizeof(ts)))
516 return -EFAULT; 486 return -EFAULT;
517 487
518 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 488 to = &end_time;
489 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
519 return -EINVAL; 490 return -EINVAL;
520
521 /* Cast to u64 to make GCC stop complaining */
522 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
523 timeout = -1; /* infinite */
524 else {
525 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
526 timeout += ts.tv_sec * HZ;
527 }
528 } 491 }
529 492
530 if (sigmask) { 493 if (sigmask) {
@@ -538,32 +501,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
538 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 501 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
539 } 502 }
540 503
541 ret = core_sys_select(n, inp, outp, exp, &timeout); 504 ret = core_sys_select(n, inp, outp, exp, &end_time);
542 505 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
543 if (tsp) {
544 struct timespec rts;
545
546 if (current->personality & STICKY_TIMEOUTS)
547 goto sticky;
548 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
549 1000;
550 rts.tv_sec = timeout;
551 if (timespec_compare(&rts, &ts) >= 0)
552 rts = ts;
553 if (copy_to_user(tsp, &rts, sizeof(rts))) {
554sticky:
555 /*
556 * If an application puts its timeval in read-only
557 * memory, we don't want the Linux-specific update to
558 * the timeval to cause a fault after the select has
559 * completed successfully. However, because we're not
560 * updating the timeval, we can't restart the system
561 * call.
562 */
563 if (ret == -ERESTARTNOHAND)
564 ret = -EINTR;
565 }
566 }
567 506
568 if (ret == -ERESTARTNOHAND) { 507 if (ret == -ERESTARTNOHAND) {
569 /* 508 /*
@@ -649,18 +588,20 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
649} 588}
650 589
651static int do_poll(unsigned int nfds, struct poll_list *list, 590static int do_poll(unsigned int nfds, struct poll_list *list,
652 struct poll_wqueues *wait, s64 *timeout) 591 struct poll_wqueues *wait, struct timespec *end_time)
653{ 592{
654 int count = 0;
655 poll_table* pt = &wait->pt; 593 poll_table* pt = &wait->pt;
594 ktime_t expire, *to = NULL;
595 int timed_out = 0, count = 0;
656 596
657 /* Optimise the no-wait case */ 597 /* Optimise the no-wait case */
658 if (!(*timeout)) 598 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
659 pt = NULL; 599 pt = NULL;
600 timed_out = 1;
601 }
660 602
661 for (;;) { 603 for (;;) {
662 struct poll_list *walk; 604 struct poll_list *walk;
663 long __timeout;
664 605
665 set_current_state(TASK_INTERRUPTIBLE); 606 set_current_state(TASK_INTERRUPTIBLE);
666 for (walk = list; walk != NULL; walk = walk->next) { 607 for (walk = list; walk != NULL; walk = walk->next) {
@@ -692,27 +633,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
692 if (signal_pending(current)) 633 if (signal_pending(current))
693 count = -EINTR; 634 count = -EINTR;
694 } 635 }
695 if (count || !*timeout) 636 if (count || timed_out)
696 break; 637 break;
697 638
698 if (*timeout < 0) { 639 /*
699 /* Wait indefinitely */ 640 * If this is the first loop and we have a timeout
700 __timeout = MAX_SCHEDULE_TIMEOUT; 641 * given, then we convert to ktime_t and set the to
701 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { 642 * pointer to the expiry value.
702 /* 643 */
703 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in 644 if (end_time && !to) {
704 * a loop 645 expire = timespec_to_ktime(*end_time);
705 */ 646 to = &expire;
706 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
707 *timeout -= __timeout;
708 } else {
709 __timeout = *timeout;
710 *timeout = 0;
711 } 647 }
712 648
713 __timeout = schedule_timeout(__timeout); 649 if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS))
714 if (*timeout >= 0) 650 timed_out = 1;
715 *timeout += __timeout;
716 } 651 }
717 __set_current_state(TASK_RUNNING); 652 __set_current_state(TASK_RUNNING);
718 return count; 653 return count;
@@ -721,7 +656,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
721#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 656#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
722 sizeof(struct pollfd)) 657 sizeof(struct pollfd))
723 658
724int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 659int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
660 struct timespec *end_time)
725{ 661{
726 struct poll_wqueues table; 662 struct poll_wqueues table;
727 int err = -EFAULT, fdcount, len, size; 663 int err = -EFAULT, fdcount, len, size;
@@ -761,7 +697,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
761 } 697 }
762 698
763 poll_initwait(&table); 699 poll_initwait(&table);
764 fdcount = do_poll(nfds, head, &table, timeout); 700 fdcount = do_poll(nfds, head, &table, end_time);
765 poll_freewait(&table); 701 poll_freewait(&table);
766 702
767 for (walk = head; walk; walk = walk->next) { 703 for (walk = head; walk; walk = walk->next) {
@@ -787,16 +723,21 @@ out_fds:
787 723
788static long do_restart_poll(struct restart_block *restart_block) 724static long do_restart_poll(struct restart_block *restart_block)
789{ 725{
790 struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; 726 struct pollfd __user *ufds = restart_block->poll.ufds;
791 int nfds = restart_block->arg1; 727 int nfds = restart_block->poll.nfds;
792 s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; 728 struct timespec *to = NULL, end_time;
793 int ret; 729 int ret;
794 730
795 ret = do_sys_poll(ufds, nfds, &timeout); 731 if (restart_block->poll.has_timeout) {
732 end_time.tv_sec = restart_block->poll.tv_sec;
733 end_time.tv_nsec = restart_block->poll.tv_nsec;
734 to = &end_time;
735 }
736
737 ret = do_sys_poll(ufds, nfds, to);
738
796 if (ret == -EINTR) { 739 if (ret == -EINTR) {
797 restart_block->fn = do_restart_poll; 740 restart_block->fn = do_restart_poll;
798 restart_block->arg2 = timeout & 0xFFFFFFFF;
799 restart_block->arg3 = (u64)timeout >> 32;
800 ret = -ERESTART_RESTARTBLOCK; 741 ret = -ERESTART_RESTARTBLOCK;
801 } 742 }
802 return ret; 743 return ret;
@@ -805,31 +746,32 @@ static long do_restart_poll(struct restart_block *restart_block)
805asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 746asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
806 long timeout_msecs) 747 long timeout_msecs)
807{ 748{
808 s64 timeout_jiffies; 749 struct timespec end_time, *to = NULL;
809 int ret; 750 int ret;
810 751
811 if (timeout_msecs > 0) { 752 if (timeout_msecs >= 0) {
812#if HZ > 1000 753 to = &end_time;
813 /* We can only overflow if HZ > 1000 */ 754 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
814 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 755 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
815 timeout_jiffies = -1;
816 else
817#endif
818 timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1;
819 } else {
820 /* Infinite (< 0) or no (0) timeout */
821 timeout_jiffies = timeout_msecs;
822 } 756 }
823 757
824 ret = do_sys_poll(ufds, nfds, &timeout_jiffies); 758 ret = do_sys_poll(ufds, nfds, to);
759
825 if (ret == -EINTR) { 760 if (ret == -EINTR) {
826 struct restart_block *restart_block; 761 struct restart_block *restart_block;
762
827 restart_block = &current_thread_info()->restart_block; 763 restart_block = &current_thread_info()->restart_block;
828 restart_block->fn = do_restart_poll; 764 restart_block->fn = do_restart_poll;
829 restart_block->arg0 = (unsigned long)ufds; 765 restart_block->poll.ufds = ufds;
830 restart_block->arg1 = nfds; 766 restart_block->poll.nfds = nfds;
831 restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; 767
832 restart_block->arg3 = (u64)timeout_jiffies >> 32; 768 if (timeout_msecs >= 0) {
769 restart_block->poll.tv_sec = end_time.tv_sec;
770 restart_block->poll.tv_nsec = end_time.tv_nsec;
771 restart_block->poll.has_timeout = 1;
772 } else
773 restart_block->poll.has_timeout = 0;
774
833 ret = -ERESTART_RESTARTBLOCK; 775 ret = -ERESTART_RESTARTBLOCK;
834 } 776 }
835 return ret; 777 return ret;
@@ -841,21 +783,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
841 size_t sigsetsize) 783 size_t sigsetsize)
842{ 784{
843 sigset_t ksigmask, sigsaved; 785 sigset_t ksigmask, sigsaved;
844 struct timespec ts; 786 struct timespec ts, end_time, *to = NULL;
845 s64 timeout = -1;
846 int ret; 787 int ret;
847 788
848 if (tsp) { 789 if (tsp) {
849 if (copy_from_user(&ts, tsp, sizeof(ts))) 790 if (copy_from_user(&ts, tsp, sizeof(ts)))
850 return -EFAULT; 791 return -EFAULT;
851 792
852 /* Cast to u64 to make GCC stop complaining */ 793 to = &end_time;
853 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 794 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
854 timeout = -1; /* infinite */ 795 return -EINVAL;
855 else {
856 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
857 timeout += ts.tv_sec * HZ;
858 }
859 } 796 }
860 797
861 if (sigmask) { 798 if (sigmask) {
@@ -869,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
869 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 806 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
870 } 807 }
871 808
872 ret = do_sys_poll(ufds, nfds, &timeout); 809 ret = do_sys_poll(ufds, nfds, to);
873 810
874 /* We can restart this syscall, usually */ 811 /* We can restart this syscall, usually */
875 if (ret == -EINTR) { 812 if (ret == -EINTR) {
@@ -887,31 +824,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
887 } else if (sigmask) 824 } else if (sigmask)
888 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 825 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
889 826
890 if (tsp && timeout >= 0) { 827 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
891 struct timespec rts;
892
893 if (current->personality & STICKY_TIMEOUTS)
894 goto sticky;
895 /* Yes, we know it's actually an s64, but it's also positive. */
896 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
897 1000;
898 rts.tv_sec = timeout;
899 if (timespec_compare(&rts, &ts) >= 0)
900 rts = ts;
901 if (copy_to_user(tsp, &rts, sizeof(rts))) {
902 sticky:
903 /*
904 * If an application puts its timeval in read-only
905 * memory, we don't want the Linux-specific update to
906 * the timeval to cause a fault after the select has
907 * completed successfully. However, because we're not
908 * updating the timeval, we can't restart the system
909 * call.
910 */
911 if (ret == -ERESTARTNOHAND && timeout >= 0)
912 ret = -EINTR;
913 }
914 }
915 828
916 return ret; 829 return ret;
917} 830}
diff --git a/include/linux/poll.h b/include/linux/poll.h
index f65de5128a9e..badd98ab06f6 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -114,11 +114,11 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset)
114 114
115#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1) 115#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
116 116
117extern int do_select(int n, fd_set_bits *fds, s64 *timeout); 117extern int do_select(int n, fd_set_bits *fds, struct timespec *end_time);
118extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, 118extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds,
119 s64 *timeout); 119 struct timespec *end_time);
120extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 120extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
121 fd_set __user *exp, s64 *timeout); 121 fd_set __user *exp, struct timespec *end_time);
122 122
123extern int poll_select_set_timeout(struct timespec *to, long sec, long nsec); 123extern int poll_select_set_timeout(struct timespec *to, long sec, long nsec);
124 124