aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2006-01-18 20:44:05 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-18 22:20:30 -0500
commit9f72949f679df06021c9e43886c9191494fdb007 (patch)
treef4d76ed281b34e195db7741b69a7d095e168a864
parent36a7878a224c18aa4a5e098dc93d19cf5601462b (diff)
[PATCH] Add pselect/ppoll system call implementation
The following implementation of ppoll() and pselect() system calls depends on the architecture providing a TIF_RESTORE_SIGMASK flag in the thread_info. These system calls have to change the signal mask during their operation, and signal handlers must be invoked using the new, temporary signal mask. The old signal mask must be restored either upon successful exit from the system call, or upon returning from the invoked signal handler if the system call is interrupted. We can't simply restore the original signal mask and return to userspace, since the restored signal mask may actually block the signal which interrupted the system call. The TIF_RESTORE_SIGMASK flag deals with this by causing the syscall exit path to trap into do_signal() just as TIF_SIGPENDING does, and by causing do_signal() to use the saved signal mask instead of the current signal mask when setting up the stack frame for the signal handler -- or by causing do_signal() to simply restore the saved signal mask in the case where there is no handler to be invoked. The first patch implements the sys_pselect() and sys_ppoll() system calls, which are present only if TIF_RESTORE_SIGMASK is defined. That #ifdef should go away in time when all architectures have implemented it. The second patch implements TIF_RESTORE_SIGMASK for the PowerPC kernel (in the -mm tree), and the third patch then removes the arch-specific implementations of sys_rt_sigsuspend() and replaces them with generic versions using the same trick. The fourth and fifth patches, provided by David Howells, implement TIF_RESTORE_SIGMASK for FR-V and i386 respectively, and the sixth patch adds the syscalls to the i386 syscall table. This patch: Add the pselect() and ppoll() system calls, providing core routines usable by the original select() and poll() system calls and also the new calls (with their semantics w.r.t timeouts). Signed-off-by: David Woodhouse <dwmw2@infradead.org> Cc: Michael Kerrisk <mtk-manpages@gmx.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/compat.c260
-rw-r--r--fs/select.c348
-rw-r--r--include/linux/poll.h6
3 files changed, 519 insertions, 95 deletions
diff --git a/fs/compat.c b/fs/compat.c
index c6ba9deabada..18b21b4c9e3a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -53,6 +53,8 @@
53#include <asm/mmu_context.h> 53#include <asm/mmu_context.h>
54#include <asm/ioctls.h> 54#include <asm/ioctls.h>
55 55
56extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
57
56/* 58/*
57 * Not all architectures have sys_utime, so implement this in terms 59 * Not all architectures have sys_utime, so implement this in terms
58 * of sys_utimes. 60 * of sys_utimes.
@@ -1657,36 +1659,14 @@ static void select_bits_free(void *bits, int size)
1657#define MAX_SELECT_SECONDS \ 1659#define MAX_SELECT_SECONDS \
1658 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 1660 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1659 1661
1660asmlinkage long 1662int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1661compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, 1663 compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout)
1662 compat_ulong_t __user *exp, struct compat_timeval __user *tvp)
1663{ 1664{
1664 fd_set_bits fds; 1665 fd_set_bits fds;
1665 char *bits; 1666 char *bits;
1666 long timeout;
1667 int size, max_fdset, ret = -EINVAL; 1667 int size, max_fdset, ret = -EINVAL;
1668 struct fdtable *fdt; 1668 struct fdtable *fdt;
1669 1669
1670 timeout = MAX_SCHEDULE_TIMEOUT;
1671 if (tvp) {
1672 time_t sec, usec;
1673
1674 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
1675 || __get_user(sec, &tvp->tv_sec)
1676 || __get_user(usec, &tvp->tv_usec)) {
1677 ret = -EFAULT;
1678 goto out_nofds;
1679 }
1680
1681 if (sec < 0 || usec < 0)
1682 goto out_nofds;
1683
1684 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
1685 timeout = ROUND_UP(usec, 1000000/HZ);
1686 timeout += sec * (unsigned long) HZ;
1687 }
1688 }
1689
1690 if (n < 0) 1670 if (n < 0)
1691 goto out_nofds; 1671 goto out_nofds;
1692 1672
@@ -1723,19 +1703,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
1723 zero_fd_set(n, fds.res_out); 1703 zero_fd_set(n, fds.res_out);
1724 zero_fd_set(n, fds.res_ex); 1704 zero_fd_set(n, fds.res_ex);
1725 1705
1726 ret = do_select(n, &fds, &timeout); 1706 ret = do_select(n, &fds, timeout);
1727
1728 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
1729 time_t sec = 0, usec = 0;
1730 if (timeout) {
1731 sec = timeout / HZ;
1732 usec = timeout % HZ;
1733 usec *= (1000000/HZ);
1734 }
1735 if (put_user(sec, &tvp->tv_sec) ||
1736 put_user(usec, &tvp->tv_usec))
1737 ret = -EFAULT;
1738 }
1739 1707
1740 if (ret < 0) 1708 if (ret < 0)
1741 goto out; 1709 goto out;
@@ -1756,6 +1724,224 @@ out_nofds:
1756 return ret; 1724 return ret;
1757} 1725}
1758 1726
1727asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1728 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1729 struct compat_timeval __user *tvp)
1730{
1731 s64 timeout = -1;
1732 struct compat_timeval tv;
1733 int ret;
1734
1735 if (tvp) {
1736 if (copy_from_user(&tv, tvp, sizeof(tv)))
1737 return -EFAULT;
1738
1739 if (tv.tv_sec < 0 || tv.tv_usec < 0)
1740 return -EINVAL;
1741
1742 /* Cast to u64 to make GCC stop complaining */
1743 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
1744 timeout = -1; /* infinite */
1745 else {
1746 timeout = ROUND_UP(tv.tv_sec, 1000000/HZ);
1747 timeout += tv.tv_sec * HZ;
1748 }
1749 }
1750
1751 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1752
1753 if (tvp) {
1754 if (current->personality & STICKY_TIMEOUTS)
1755 goto sticky;
1756 tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1757 tv.tv_sec = timeout;
1758 if (copy_to_user(tvp, &tv, sizeof(tv))) {
1759sticky:
1760 /*
1761 * If an application puts its timeval in read-only
1762 * memory, we don't want the Linux-specific update to
1763 * the timeval to cause a fault after the select has
1764 * completed successfully. However, because we're not
1765 * updating the timeval, we can't restart the system
1766 * call.
1767 */
1768 if (ret == -ERESTARTNOHAND)
1769 ret = -EINTR;
1770 }
1771 }
1772
1773 return ret;
1774}
1775
1776#ifdef TIF_RESTORE_SIGMASK
1777asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1778 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1779 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
1780 compat_size_t sigsetsize)
1781{
1782 compat_sigset_t ss32;
1783 sigset_t ksigmask, sigsaved;
1784 long timeout = MAX_SCHEDULE_TIMEOUT;
1785 struct compat_timespec ts;
1786 int ret;
1787
1788 if (tsp) {
1789 if (copy_from_user(&ts, tsp, sizeof(ts)))
1790 return -EFAULT;
1791
1792 if (ts.tv_sec < 0 || ts.tv_nsec < 0)
1793 return -EINVAL;
1794 }
1795
1796 if (sigmask) {
1797 if (sigsetsize != sizeof(compat_sigset_t))
1798 return -EINVAL;
1799 if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
1800 return -EFAULT;
1801 sigset_from_compat(&ksigmask, &ss32);
1802
1803 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1804 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1805 }
1806
1807 do {
1808 if (tsp) {
1809 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
1810 timeout = ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1811 timeout += ts.tv_sec * (unsigned long)HZ;
1812 ts.tv_sec = 0;
1813 ts.tv_nsec = 0;
1814 } else {
1815 ts.tv_sec -= MAX_SELECT_SECONDS;
1816 timeout = MAX_SELECT_SECONDS * HZ;
1817 }
1818 }
1819
1820 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1821
1822 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1823
1824 if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
1825 ts.tv_sec += timeout / HZ;
1826 ts.tv_nsec += (timeout % HZ) * (1000000000/HZ);
1827 if (ts.tv_nsec >= 1000000000) {
1828 ts.tv_sec++;
1829 ts.tv_nsec -= 1000000000;
1830 }
1831 (void)copy_to_user(tsp, &ts, sizeof(ts));
1832 }
1833
1834 if (ret == -ERESTARTNOHAND) {
1835 /*
1836 * Don't restore the signal mask yet. Let do_signal() deliver
1837 * the signal on the way back to userspace, before the signal
1838 * mask is restored.
1839 */
1840 if (sigmask) {
1841 memcpy(&current->saved_sigmask, &sigsaved,
1842 sizeof(sigsaved));
1843 set_thread_flag(TIF_RESTORE_SIGMASK);
1844 }
1845 } else if (sigmask)
1846 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1847
1848 return ret;
1849}
1850
1851asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
1852 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1853 struct compat_timespec __user *tsp, void __user *sig)
1854{
1855 compat_size_t sigsetsize = 0;
1856 compat_uptr_t up = 0;
1857
1858 if (sig) {
1859 if (!access_ok(VERIFY_READ, sig,
1860 sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
1861 __get_user(up, (compat_uptr_t __user *)sig) ||
1862 __get_user(sigsetsize,
1863 (compat_size_t __user *)(sig+sizeof(up))))
1864 return -EFAULT;
1865 }
1866 return compat_sys_pselect7(n, inp, outp, exp, tsp, compat_ptr(up),
1867 sigsetsize);
1868}
1869
1870asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1871 unsigned int nfds, struct compat_timespec __user *tsp,
1872 const compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
1873{
1874 compat_sigset_t ss32;
1875 sigset_t ksigmask, sigsaved;
1876 struct compat_timespec ts;
1877 s64 timeout = -1;
1878 int ret;
1879
1880 if (tsp) {
1881 if (copy_from_user(&ts, tsp, sizeof(ts)))
1882 return -EFAULT;
1883
1884 /* We assume that ts.tv_sec is always lower than
1885 the number of seconds that can be expressed in
1886 an s64. Otherwise the compiler bitches at us */
1887 timeout = ROUND_UP(ts.tv_sec, 1000000000/HZ);
1888 timeout += ts.tv_sec * HZ;
1889 }
1890
1891 if (sigmask) {
1892 if (sigsetsize |= sizeof(compat_sigset_t))
1893 return -EINVAL;
1894 if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
1895 return -EFAULT;
1896 sigset_from_compat(&ksigmask, &ss32);
1897
1898 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
1899 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1900 }
1901
1902 ret = do_sys_poll(ufds, nfds, &timeout);
1903
1904 /* We can restart this syscall, usually */
1905 if (ret == -EINTR) {
1906 /*
1907 * Don't restore the signal mask yet. Let do_signal() deliver
1908 * the signal on the way back to userspace, before the signal
1909 * mask is restored.
1910 */
1911 if (sigmask) {
1912 memcpy(&current->saved_sigmask, &sigsaved,
1913 sizeof(sigsaved));
1914 set_thread_flag(TIF_RESTORE_SIGMASK);
1915 }
1916 ret = -ERESTARTNOHAND;
1917 } else if (sigmask)
1918 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1919
1920 if (tsp && timeout >= 0) {
1921 if (current->personality & STICKY_TIMEOUTS)
1922 goto sticky;
1923 /* Yes, we know it's actually an s64, but it's also positive. */
1924 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
1925 ts.tv_sec = timeout;
1926 if (copy_to_user(tsp, &ts, sizeof(ts))) {
1927sticky:
1928 /*
1929 * If an application puts its timeval in read-only
1930 * memory, we don't want the Linux-specific update to
1931 * the timeval to cause a fault after the select has
1932 * completed successfully. However, because we're not
1933 * updating the timeval, we can't restart the system
1934 * call.
1935 */
1936 if (ret == -ERESTARTNOHAND && timeout >= 0)
1937 ret = -EINTR;
1938 }
1939 }
1940
1941 return ret;
1942}
1943#endif /* TIF_RESTORE_SIGMASK */
1944
1759#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1945#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
1760/* Stuff for NFS server syscalls... */ 1946/* Stuff for NFS server syscalls... */
1761struct compat_nfsctl_svc { 1947struct compat_nfsctl_svc {
diff --git a/fs/select.c b/fs/select.c
index f10a10317d54..c0f02d36c60e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -179,12 +179,11 @@ get_max:
179#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 179#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
180#define POLLEX_SET (POLLPRI) 180#define POLLEX_SET (POLLPRI)
181 181
182int do_select(int n, fd_set_bits *fds, long *timeout) 182int do_select(int n, fd_set_bits *fds, s64 *timeout)
183{ 183{
184 struct poll_wqueues table; 184 struct poll_wqueues table;
185 poll_table *wait; 185 poll_table *wait;
186 int retval, i; 186 int retval, i;
187 long __timeout = *timeout;
188 187
189 rcu_read_lock(); 188 rcu_read_lock();
190 retval = max_select_fd(n, fds); 189 retval = max_select_fd(n, fds);
@@ -196,11 +195,12 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
196 195
197 poll_initwait(&table); 196 poll_initwait(&table);
198 wait = &table.pt; 197 wait = &table.pt;
199 if (!__timeout) 198 if (!*timeout)
200 wait = NULL; 199 wait = NULL;
201 retval = 0; 200 retval = 0;
202 for (;;) { 201 for (;;) {
203 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 202 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
203 long __timeout;
204 204
205 set_current_state(TASK_INTERRUPTIBLE); 205 set_current_state(TASK_INTERRUPTIBLE);
206 206
@@ -255,22 +255,32 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
255 *rexp = res_ex; 255 *rexp = res_ex;
256 } 256 }
257 wait = NULL; 257 wait = NULL;
258 if (retval || !__timeout || signal_pending(current)) 258 if (retval || !*timeout || signal_pending(current))
259 break; 259 break;
260 if(table.error) { 260 if(table.error) {
261 retval = table.error; 261 retval = table.error;
262 break; 262 break;
263 } 263 }
264
265 if (*timeout < 0) {
266 /* Wait indefinitely */
267 __timeout = MAX_SCHEDULE_TIMEOUT;
268 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) {
269 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */
270 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
271 *timeout -= __timeout;
272 } else {
273 __timeout = *timeout;
274 *timeout = 0;
275 }
264 __timeout = schedule_timeout(__timeout); 276 __timeout = schedule_timeout(__timeout);
277 if (*timeout >= 0)
278 *timeout += __timeout;
265 } 279 }
266 __set_current_state(TASK_RUNNING); 280 __set_current_state(TASK_RUNNING);
267 281
268 poll_freewait(&table); 282 poll_freewait(&table);
269 283
270 /*
271 * Up-to-date the caller timeout.
272 */
273 *timeout = __timeout;
274 return retval; 284 return retval;
275} 285}
276 286
@@ -295,36 +305,14 @@ static void select_bits_free(void *bits, int size)
295#define MAX_SELECT_SECONDS \ 305#define MAX_SELECT_SECONDS \
296 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 306 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
297 307
298asmlinkage long 308static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
299sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) 309 fd_set __user *exp, s64 *timeout)
300{ 310{
301 fd_set_bits fds; 311 fd_set_bits fds;
302 char *bits; 312 char *bits;
303 long timeout;
304 int ret, size, max_fdset; 313 int ret, size, max_fdset;
305 struct fdtable *fdt; 314 struct fdtable *fdt;
306 315
307 timeout = MAX_SCHEDULE_TIMEOUT;
308 if (tvp) {
309 time_t sec, usec;
310
311 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
312 || __get_user(sec, &tvp->tv_sec)
313 || __get_user(usec, &tvp->tv_usec)) {
314 ret = -EFAULT;
315 goto out_nofds;
316 }
317
318 ret = -EINVAL;
319 if (sec < 0 || usec < 0)
320 goto out_nofds;
321
322 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
323 timeout = ROUND_UP(usec, 1000000/HZ);
324 timeout += sec * (unsigned long) HZ;
325 }
326 }
327
328 ret = -EINVAL; 316 ret = -EINVAL;
329 if (n < 0) 317 if (n < 0)
330 goto out_nofds; 318 goto out_nofds;
@@ -362,18 +350,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
362 zero_fd_set(n, fds.res_out); 350 zero_fd_set(n, fds.res_out);
363 zero_fd_set(n, fds.res_ex); 351 zero_fd_set(n, fds.res_ex);
364 352
365 ret = do_select(n, &fds, &timeout); 353 ret = do_select(n, &fds, timeout);
366
367 if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
368 time_t sec = 0, usec = 0;
369 if (timeout) {
370 sec = timeout / HZ;
371 usec = timeout % HZ;
372 usec *= (1000000/HZ);
373 }
374 put_user(sec, &tvp->tv_sec);
375 put_user(usec, &tvp->tv_usec);
376 }
377 354
378 if (ret < 0) 355 if (ret < 0)
379 goto out; 356 goto out;
@@ -395,6 +372,154 @@ out_nofds:
395 return ret; 372 return ret;
396} 373}
397 374
375asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
376 fd_set __user *exp, struct timeval __user *tvp)
377{
378 s64 timeout = -1;
379 struct timeval tv;
380 int ret;
381
382 if (tvp) {
383 if (copy_from_user(&tv, tvp, sizeof(tv)))
384 return -EFAULT;
385
386 if (tv.tv_sec < 0 || tv.tv_usec < 0)
387 return -EINVAL;
388
389 /* Cast to u64 to make GCC stop complaining */
390 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
391 timeout = -1; /* infinite */
392 else {
393 timeout = ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
394 timeout += tv.tv_sec * HZ;
395 }
396 }
397
398 ret = core_sys_select(n, inp, outp, exp, &timeout);
399
400 if (tvp) {
401 if (current->personality & STICKY_TIMEOUTS)
402 goto sticky;
403 tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
404 tv.tv_sec = timeout;
405 if (copy_to_user(tvp, &tv, sizeof(tv))) {
406sticky:
407 /*
408 * If an application puts its timeval in read-only
409 * memory, we don't want the Linux-specific update to
410 * the timeval to cause a fault after the select has
411 * completed successfully. However, because we're not
412 * updating the timeval, we can't restart the system
413 * call.
414 */
415 if (ret == -ERESTARTNOHAND)
416 ret = -EINTR;
417 }
418 }
419
420 return ret;
421}
422
423#ifdef TIF_RESTORE_SIGMASK
424asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
425 fd_set __user *exp, struct timespec __user *tsp,
426 const sigset_t __user *sigmask, size_t sigsetsize)
427{
428 s64 timeout = MAX_SCHEDULE_TIMEOUT;
429 sigset_t ksigmask, sigsaved;
430 struct timespec ts;
431 int ret;
432
433 if (tsp) {
434 if (copy_from_user(&ts, tsp, sizeof(ts)))
435 return -EFAULT;
436
437 if (ts.tv_sec < 0 || ts.tv_nsec < 0)
438 return -EINVAL;
439
440 /* Cast to u64 to make GCC stop complaining */
441 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
442 timeout = -1; /* infinite */
443 else {
444 timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
445 timeout += ts.tv_sec * HZ;
446 }
447 }
448
449 if (sigmask) {
450 /* XXX: Don't preclude handling different sized sigset_t's. */
451 if (sigsetsize != sizeof(sigset_t))
452 return -EINVAL;
453 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
454 return -EFAULT;
455
456 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
457 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
458 }
459
460 ret = core_sys_select(n, inp, outp, exp, &timeout);
461
462 if (tsp) {
463 if (current->personality & STICKY_TIMEOUTS)
464 goto sticky;
465 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
466 ts.tv_sec = timeout;
467 if (copy_to_user(tsp, &ts, sizeof(ts))) {
468sticky:
469 /*
470 * If an application puts its timeval in read-only
471 * memory, we don't want the Linux-specific update to
472 * the timeval to cause a fault after the select has
473 * completed successfully. However, because we're not
474 * updating the timeval, we can't restart the system
475 * call.
476 */
477 if (ret == -ERESTARTNOHAND)
478 ret = -EINTR;
479 }
480 }
481
482 if (ret == -ERESTARTNOHAND) {
483 /*
484 * Don't restore the signal mask yet. Let do_signal() deliver
485 * the signal on the way back to userspace, before the signal
486 * mask is restored.
487 */
488 if (sigmask) {
489 memcpy(&current->saved_sigmask, &sigsaved,
490 sizeof(sigsaved));
491 set_thread_flag(TIF_RESTORE_SIGMASK);
492 }
493 } else if (sigmask)
494 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
495
496 return ret;
497}
498
499/*
500 * Most architectures can't handle 7-argument syscalls. So we provide a
501 * 6-argument version where the sixth argument is a pointer to a structure
502 * which has a pointer to the sigset_t itself followed by a size_t containing
503 * the sigset size.
504 */
505asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
506 fd_set __user *exp, struct timespec __user *tsp, void __user *sig)
507{
508 size_t sigsetsize = 0;
509 sigset_t __user *up = NULL;
510
511 if (sig) {
512 if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
513 || __get_user(up, (sigset_t * __user *)sig)
514 || __get_user(sigsetsize,
515 (size_t * __user)(sig+sizeof(void *))))
516 return -EFAULT;
517 }
518
519 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
520}
521#endif /* TIF_RESTORE_SIGMASK */
522
398struct poll_list { 523struct poll_list {
399 struct poll_list *next; 524 struct poll_list *next;
400 int len; 525 int len;
@@ -436,16 +561,19 @@ static void do_pollfd(unsigned int num, struct pollfd * fdpage,
436} 561}
437 562
438static int do_poll(unsigned int nfds, struct poll_list *list, 563static int do_poll(unsigned int nfds, struct poll_list *list,
439 struct poll_wqueues *wait, long timeout) 564 struct poll_wqueues *wait, s64 *timeout)
440{ 565{
441 int count = 0; 566 int count = 0;
442 poll_table* pt = &wait->pt; 567 poll_table* pt = &wait->pt;
443 568
444 if (!timeout) 569 /* Optimise the no-wait case */
570 if (!(*timeout))
445 pt = NULL; 571 pt = NULL;
446 572
447 for (;;) { 573 for (;;) {
448 struct poll_list *walk; 574 struct poll_list *walk;
575 long __timeout;
576
449 set_current_state(TASK_INTERRUPTIBLE); 577 set_current_state(TASK_INTERRUPTIBLE);
450 walk = list; 578 walk = list;
451 while(walk != NULL) { 579 while(walk != NULL) {
@@ -453,18 +581,36 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
453 walk = walk->next; 581 walk = walk->next;
454 } 582 }
455 pt = NULL; 583 pt = NULL;
456 if (count || !timeout || signal_pending(current)) 584 if (count || !*timeout || signal_pending(current))
457 break; 585 break;
458 count = wait->error; 586 count = wait->error;
459 if (count) 587 if (count)
460 break; 588 break;
461 timeout = schedule_timeout(timeout); 589
590 if (*timeout < 0) {
591 /* Wait indefinitely */
592 __timeout = MAX_SCHEDULE_TIMEOUT;
593 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) {
594 /*
595 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in
596 * a loop
597 */
598 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
599 *timeout -= __timeout;
600 } else {
601 __timeout = *timeout;
602 *timeout = 0;
603 }
604
605 __timeout = schedule_timeout(__timeout);
606 if (*timeout >= 0)
607 *timeout += __timeout;
462 } 608 }
463 __set_current_state(TASK_RUNNING); 609 __set_current_state(TASK_RUNNING);
464 return count; 610 return count;
465} 611}
466 612
467asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout) 613int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
468{ 614{
469 struct poll_wqueues table; 615 struct poll_wqueues table;
470 int fdcount, err; 616 int fdcount, err;
@@ -482,14 +628,6 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
482 if (nfds > max_fdset && nfds > OPEN_MAX) 628 if (nfds > max_fdset && nfds > OPEN_MAX)
483 return -EINVAL; 629 return -EINVAL;
484 630
485 if (timeout) {
486 /* Careful about overflow in the intermediate values */
487 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
488 timeout = (unsigned long)(timeout*HZ+999)/1000+1;
489 else /* Negative or overflow */
490 timeout = MAX_SCHEDULE_TIMEOUT;
491 }
492
493 poll_initwait(&table); 631 poll_initwait(&table);
494 632
495 head = NULL; 633 head = NULL;
@@ -519,6 +657,7 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
519 } 657 }
520 i -= pp->len; 658 i -= pp->len;
521 } 659 }
660
522 fdcount = do_poll(nfds, head, &table, timeout); 661 fdcount = do_poll(nfds, head, &table, timeout);
523 662
524 /* OK, now copy the revents fields back to user space. */ 663 /* OK, now copy the revents fields back to user space. */
@@ -547,3 +686,98 @@ out_fds:
547 poll_freewait(&table); 686 poll_freewait(&table);
548 return err; 687 return err;
549} 688}
689
690asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
691 long timeout_msecs)
692{
693 s64 timeout_jiffies = 0;
694
695 if (timeout_msecs) {
696#if HZ > 1000
697 /* We can only overflow if HZ > 1000 */
698 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ)
699 timeout_jiffies = -1;
700 else
701#endif
702 timeout_jiffies = msecs_to_jiffies(timeout_msecs);
703 }
704
705 return do_sys_poll(ufds, nfds, &timeout_jiffies);
706}
707
708#ifdef TIF_RESTORE_SIGMASK
709asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
710 struct timespec __user *tsp, const sigset_t __user *sigmask,
711 size_t sigsetsize)
712{
713 sigset_t ksigmask, sigsaved;
714 struct timespec ts;
715 s64 timeout = -1;
716 int ret;
717
718 if (tsp) {
719 if (copy_from_user(&ts, tsp, sizeof(ts)))
720 return -EFAULT;
721
722 /* Cast to u64 to make GCC stop complaining */
723 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
724 timeout = -1; /* infinite */
725 else {
726 timeout = ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
727 timeout += ts.tv_sec * HZ;
728 }
729 }
730
731 if (sigmask) {
732 /* XXX: Don't preclude handling different sized sigset_t's. */
733 if (sigsetsize != sizeof(sigset_t))
734 return -EINVAL;
735 if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
736 return -EFAULT;
737
738 sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
739 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
740 }
741
742 ret = do_sys_poll(ufds, nfds, &timeout);
743
744 /* We can restart this syscall, usually */
745 if (ret == -EINTR) {
746 /*
747 * Don't restore the signal mask yet. Let do_signal() deliver
748 * the signal on the way back to userspace, before the signal
749 * mask is restored.
750 */
751 if (sigmask) {
752 memcpy(&current->saved_sigmask, &sigsaved,
753 sizeof(sigsaved));
754 set_thread_flag(TIF_RESTORE_SIGMASK);
755 }
756 ret = -ERESTARTNOHAND;
757 } else if (sigmask)
758 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
759
760 if (tsp && timeout >= 0) {
761 if (current->personality & STICKY_TIMEOUTS)
762 goto sticky;
763 /* Yes, we know it's actually an s64, but it's also positive. */
764 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000;
765 ts.tv_sec = timeout;
766 if (copy_to_user(tsp, &ts, sizeof(ts))) {
767 sticky:
768 /*
769 * If an application puts its timeval in read-only
770 * memory, we don't want the Linux-specific update to
771 * the timeval to cause a fault after the select has
772 * completed successfully. However, because we're not
773 * updating the timeval, we can't restart the system
774 * call.
775 */
776 if (ret == -ERESTARTNOHAND && timeout >= 0)
777 ret = -EINTR;
778 }
779 }
780
781 return ret;
782}
783#endif /* TIF_RESTORE_SIGMASK */
diff --git a/include/linux/poll.h b/include/linux/poll.h
index f6da702088f4..8e8f6098508a 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -92,7 +92,11 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset)
92 memset(fdset, 0, FDS_BYTES(nr)); 92 memset(fdset, 0, FDS_BYTES(nr));
93} 93}
94 94
95extern int do_select(int n, fd_set_bits *fds, long *timeout); 95#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
96
97extern int do_select(int n, fd_set_bits *fds, s64 *timeout);
98extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds,
99 s64 *timeout);
96 100
97#endif /* KERNEL */ 101#endif /* KERNEL */
98 102