aboutsummaryrefslogtreecommitdiffstats
path: root/fs/select.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/select.c')
-rw-r--r--fs/select.c263
1 files changed, 88 insertions, 175 deletions
diff --git a/fs/select.c b/fs/select.c
index 1180a6207789..f6dceb56793f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -24,6 +24,7 @@
24#include <linux/fdtable.h> 24#include <linux/fdtable.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/hrtimer.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
@@ -203,8 +204,6 @@ sticky:
203 return ret; 204 return ret;
204} 205}
205 206
206
207
208#define FDS_IN(fds, n) (fds->in + n) 207#define FDS_IN(fds, n) (fds->in + n)
209#define FDS_OUT(fds, n) (fds->out + n) 208#define FDS_OUT(fds, n) (fds->out + n)
210#define FDS_EX(fds, n) (fds->ex + n) 209#define FDS_EX(fds, n) (fds->ex + n)
@@ -257,11 +256,12 @@ get_max:
257#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 256#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
258#define POLLEX_SET (POLLPRI) 257#define POLLEX_SET (POLLPRI)
259 258
260int do_select(int n, fd_set_bits *fds, s64 *timeout) 259int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
261{ 260{
261 ktime_t expire, *to = NULL;
262 struct poll_wqueues table; 262 struct poll_wqueues table;
263 poll_table *wait; 263 poll_table *wait;
264 int retval, i; 264 int retval, i, timed_out = 0;
265 265
266 rcu_read_lock(); 266 rcu_read_lock();
267 retval = max_select_fd(n, fds); 267 retval = max_select_fd(n, fds);
@@ -273,12 +273,14 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
273 273
274 poll_initwait(&table); 274 poll_initwait(&table);
275 wait = &table.pt; 275 wait = &table.pt;
276 if (!*timeout) 276 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
277 wait = NULL; 277 wait = NULL;
278 timed_out = 1;
279 }
280
278 retval = 0; 281 retval = 0;
279 for (;;) { 282 for (;;) {
280 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 283 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
281 long __timeout;
282 284
283 set_current_state(TASK_INTERRUPTIBLE); 285 set_current_state(TASK_INTERRUPTIBLE);
284 286
@@ -334,27 +336,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
334 cond_resched(); 336 cond_resched();
335 } 337 }
336 wait = NULL; 338 wait = NULL;
337 if (retval || !*timeout || signal_pending(current)) 339 if (retval || timed_out || signal_pending(current))
338 break; 340 break;
339 if (table.error) { 341 if (table.error) {
340 retval = table.error; 342 retval = table.error;
341 break; 343 break;
342 } 344 }
343 345
344 if (*timeout < 0) { 346 /*
345 /* Wait indefinitely */ 347 * If this is the first loop and we have a timeout
346 __timeout = MAX_SCHEDULE_TIMEOUT; 348 * given, then we convert to ktime_t and set the to
347 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { 349 * pointer to the expiry value.
348 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ 350 */
349 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 351 if (end_time && !to) {
350 *timeout -= __timeout; 352 expire = timespec_to_ktime(*end_time);
351 } else { 353 to = &expire;
352 __timeout = *timeout;
353 *timeout = 0;
354 } 354 }
355 __timeout = schedule_timeout(__timeout); 355
356 if (*timeout >= 0) 356 if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS))
357 *timeout += __timeout; 357 timed_out = 1;
358 } 358 }
359 __set_current_state(TASK_RUNNING); 359 __set_current_state(TASK_RUNNING);
360 360
@@ -375,7 +375,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
375 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 375 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
376 376
377int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 377int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
378 fd_set __user *exp, s64 *timeout) 378 fd_set __user *exp, struct timespec *end_time)
379{ 379{
380 fd_set_bits fds; 380 fd_set_bits fds;
381 void *bits; 381 void *bits;
@@ -426,7 +426,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
426 zero_fd_set(n, fds.res_out); 426 zero_fd_set(n, fds.res_out);
427 zero_fd_set(n, fds.res_ex); 427 zero_fd_set(n, fds.res_ex);
428 428
429 ret = do_select(n, &fds, timeout); 429 ret = do_select(n, &fds, end_time);
430 430
431 if (ret < 0) 431 if (ret < 0)
432 goto out; 432 goto out;
@@ -452,7 +452,7 @@ out_nofds:
452asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 452asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
453 fd_set __user *exp, struct timeval __user *tvp) 453 fd_set __user *exp, struct timeval __user *tvp)
454{ 454{
455 s64 timeout = -1; 455 struct timespec end_time, *to = NULL;
456 struct timeval tv; 456 struct timeval tv;
457 int ret; 457 int ret;
458 458
@@ -460,43 +460,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
460 if (copy_from_user(&tv, tvp, sizeof(tv))) 460 if (copy_from_user(&tv, tvp, sizeof(tv)))
461 return -EFAULT; 461 return -EFAULT;
462 462
463 if (tv.tv_sec < 0 || tv.tv_usec < 0) 463 to = &end_time;
464 if (poll_select_set_timeout(to, tv.tv_sec,
465 tv.tv_usec * NSEC_PER_USEC))
464 return -EINVAL; 466 return -EINVAL;
465
466 /* Cast to u64 to make GCC stop complaining */
467 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
468 timeout = -1; /* infinite */
469 else {
470 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
471 timeout += tv.tv_sec * HZ;
472 }
473 } 467 }
474 468
475 ret = core_sys_select(n, inp, outp, exp, &timeout); 469 ret = core_sys_select(n, inp, outp, exp, to);
476 470 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
477 if (tvp) {
478 struct timeval rtv;
479
480 if (current->personality & STICKY_TIMEOUTS)
481 goto sticky;
482 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
483 rtv.tv_sec = timeout;
484 if (timeval_compare(&rtv, &tv) >= 0)
485 rtv = tv;
486 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
487sticky:
488 /*
489 * If an application puts its timeval in read-only
490 * memory, we don't want the Linux-specific update to
491 * the timeval to cause a fault after the select has
492 * completed successfully. However, because we're not
493 * updating the timeval, we can't restart the system
494 * call.
495 */
496 if (ret == -ERESTARTNOHAND)
497 ret = -EINTR;
498 }
499 }
500 471
501 return ret; 472 return ret;
502} 473}
@@ -506,25 +477,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
506 fd_set __user *exp, struct timespec __user *tsp, 477 fd_set __user *exp, struct timespec __user *tsp,
507 const sigset_t __user *sigmask, size_t sigsetsize) 478 const sigset_t __user *sigmask, size_t sigsetsize)
508{ 479{
509 s64 timeout = MAX_SCHEDULE_TIMEOUT;
510 sigset_t ksigmask, sigsaved; 480 sigset_t ksigmask, sigsaved;
511 struct timespec ts; 481 struct timespec ts, end_time, *to = NULL;
512 int ret; 482 int ret;
513 483
514 if (tsp) { 484 if (tsp) {
515 if (copy_from_user(&ts, tsp, sizeof(ts))) 485 if (copy_from_user(&ts, tsp, sizeof(ts)))
516 return -EFAULT; 486 return -EFAULT;
517 487
518 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 488 to = &end_time;
489 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
519 return -EINVAL; 490 return -EINVAL;
520
521 /* Cast to u64 to make GCC stop complaining */
522 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
523 timeout = -1; /* infinite */
524 else {
525 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
526 timeout += ts.tv_sec * HZ;
527 }
528 } 491 }
529 492
530 if (sigmask) { 493 if (sigmask) {
@@ -538,32 +501,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
538 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 501 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
539 } 502 }
540 503
541 ret = core_sys_select(n, inp, outp, exp, &timeout); 504 ret = core_sys_select(n, inp, outp, exp, &end_time);
542 505 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
543 if (tsp) {
544 struct timespec rts;
545
546 if (current->personality & STICKY_TIMEOUTS)
547 goto sticky;
548 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
549 1000;
550 rts.tv_sec = timeout;
551 if (timespec_compare(&rts, &ts) >= 0)
552 rts = ts;
553 if (copy_to_user(tsp, &rts, sizeof(rts))) {
554sticky:
555 /*
556 * If an application puts its timeval in read-only
557 * memory, we don't want the Linux-specific update to
558 * the timeval to cause a fault after the select has
559 * completed successfully. However, because we're not
560 * updating the timeval, we can't restart the system
561 * call.
562 */
563 if (ret == -ERESTARTNOHAND)
564 ret = -EINTR;
565 }
566 }
567 506
568 if (ret == -ERESTARTNOHAND) { 507 if (ret == -ERESTARTNOHAND) {
569 /* 508 /*
@@ -649,18 +588,20 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
649} 588}
650 589
651static int do_poll(unsigned int nfds, struct poll_list *list, 590static int do_poll(unsigned int nfds, struct poll_list *list,
652 struct poll_wqueues *wait, s64 *timeout) 591 struct poll_wqueues *wait, struct timespec *end_time)
653{ 592{
654 int count = 0;
655 poll_table* pt = &wait->pt; 593 poll_table* pt = &wait->pt;
594 ktime_t expire, *to = NULL;
595 int timed_out = 0, count = 0;
656 596
657 /* Optimise the no-wait case */ 597 /* Optimise the no-wait case */
658 if (!(*timeout)) 598 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
659 pt = NULL; 599 pt = NULL;
600 timed_out = 1;
601 }
660 602
661 for (;;) { 603 for (;;) {
662 struct poll_list *walk; 604 struct poll_list *walk;
663 long __timeout;
664 605
665 set_current_state(TASK_INTERRUPTIBLE); 606 set_current_state(TASK_INTERRUPTIBLE);
666 for (walk = list; walk != NULL; walk = walk->next) { 607 for (walk = list; walk != NULL; walk = walk->next) {
@@ -692,27 +633,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
692 if (signal_pending(current)) 633 if (signal_pending(current))
693 count = -EINTR; 634 count = -EINTR;
694 } 635 }
695 if (count || !*timeout) 636 if (count || timed_out)
696 break; 637 break;
697 638
698 if (*timeout < 0) { 639 /*
699 /* Wait indefinitely */ 640 * If this is the first loop and we have a timeout
700 __timeout = MAX_SCHEDULE_TIMEOUT; 641 * given, then we convert to ktime_t and set the to
701 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { 642 * pointer to the expiry value.
702 /* 643 */
703 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in 644 if (end_time && !to) {
704 * a loop 645 expire = timespec_to_ktime(*end_time);
705 */ 646 to = &expire;
706 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
707 *timeout -= __timeout;
708 } else {
709 __timeout = *timeout;
710 *timeout = 0;
711 } 647 }
712 648
713 __timeout = schedule_timeout(__timeout); 649 if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS))
714 if (*timeout >= 0) 650 timed_out = 1;
715 *timeout += __timeout;
716 } 651 }
717 __set_current_state(TASK_RUNNING); 652 __set_current_state(TASK_RUNNING);
718 return count; 653 return count;
@@ -721,7 +656,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
721#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 656#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
722 sizeof(struct pollfd)) 657 sizeof(struct pollfd))
723 658
724int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 659int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
660 struct timespec *end_time)
725{ 661{
726 struct poll_wqueues table; 662 struct poll_wqueues table;
727 int err = -EFAULT, fdcount, len, size; 663 int err = -EFAULT, fdcount, len, size;
@@ -761,7 +697,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
761 } 697 }
762 698
763 poll_initwait(&table); 699 poll_initwait(&table);
764 fdcount = do_poll(nfds, head, &table, timeout); 700 fdcount = do_poll(nfds, head, &table, end_time);
765 poll_freewait(&table); 701 poll_freewait(&table);
766 702
767 for (walk = head; walk; walk = walk->next) { 703 for (walk = head; walk; walk = walk->next) {
@@ -787,16 +723,21 @@ out_fds:
787 723
788static long do_restart_poll(struct restart_block *restart_block) 724static long do_restart_poll(struct restart_block *restart_block)
789{ 725{
790 struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; 726 struct pollfd __user *ufds = restart_block->poll.ufds;
791 int nfds = restart_block->arg1; 727 int nfds = restart_block->poll.nfds;
792 s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; 728 struct timespec *to = NULL, end_time;
793 int ret; 729 int ret;
794 730
795 ret = do_sys_poll(ufds, nfds, &timeout); 731 if (restart_block->poll.has_timeout) {
732 end_time.tv_sec = restart_block->poll.tv_sec;
733 end_time.tv_nsec = restart_block->poll.tv_nsec;
734 to = &end_time;
735 }
736
737 ret = do_sys_poll(ufds, nfds, to);
738
796 if (ret == -EINTR) { 739 if (ret == -EINTR) {
797 restart_block->fn = do_restart_poll; 740 restart_block->fn = do_restart_poll;
798 restart_block->arg2 = timeout & 0xFFFFFFFF;
799 restart_block->arg3 = (u64)timeout >> 32;
800 ret = -ERESTART_RESTARTBLOCK; 741 ret = -ERESTART_RESTARTBLOCK;
801 } 742 }
802 return ret; 743 return ret;
@@ -805,31 +746,32 @@ static long do_restart_poll(struct restart_block *restart_block)
805asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 746asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
806 long timeout_msecs) 747 long timeout_msecs)
807{ 748{
808 s64 timeout_jiffies; 749 struct timespec end_time, *to = NULL;
809 int ret; 750 int ret;
810 751
811 if (timeout_msecs > 0) { 752 if (timeout_msecs >= 0) {
812#if HZ > 1000 753 to = &end_time;
813 /* We can only overflow if HZ > 1000 */ 754 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
814 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 755 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
815 timeout_jiffies = -1;
816 else
817#endif
818 timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1;
819 } else {
820 /* Infinite (< 0) or no (0) timeout */
821 timeout_jiffies = timeout_msecs;
822 } 756 }
823 757
824 ret = do_sys_poll(ufds, nfds, &timeout_jiffies); 758 ret = do_sys_poll(ufds, nfds, to);
759
825 if (ret == -EINTR) { 760 if (ret == -EINTR) {
826 struct restart_block *restart_block; 761 struct restart_block *restart_block;
762
827 restart_block = &current_thread_info()->restart_block; 763 restart_block = &current_thread_info()->restart_block;
828 restart_block->fn = do_restart_poll; 764 restart_block->fn = do_restart_poll;
829 restart_block->arg0 = (unsigned long)ufds; 765 restart_block->poll.ufds = ufds;
830 restart_block->arg1 = nfds; 766 restart_block->poll.nfds = nfds;
831 restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; 767
832 restart_block->arg3 = (u64)timeout_jiffies >> 32; 768 if (timeout_msecs >= 0) {
769 restart_block->poll.tv_sec = end_time.tv_sec;
770 restart_block->poll.tv_nsec = end_time.tv_nsec;
771 restart_block->poll.has_timeout = 1;
772 } else
773 restart_block->poll.has_timeout = 0;
774
833 ret = -ERESTART_RESTARTBLOCK; 775 ret = -ERESTART_RESTARTBLOCK;
834 } 776 }
835 return ret; 777 return ret;
@@ -841,21 +783,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
841 size_t sigsetsize) 783 size_t sigsetsize)
842{ 784{
843 sigset_t ksigmask, sigsaved; 785 sigset_t ksigmask, sigsaved;
844 struct timespec ts; 786 struct timespec ts, end_time, *to = NULL;
845 s64 timeout = -1;
846 int ret; 787 int ret;
847 788
848 if (tsp) { 789 if (tsp) {
849 if (copy_from_user(&ts, tsp, sizeof(ts))) 790 if (copy_from_user(&ts, tsp, sizeof(ts)))
850 return -EFAULT; 791 return -EFAULT;
851 792
852 /* Cast to u64 to make GCC stop complaining */ 793 to = &end_time;
853 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 794 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
854 timeout = -1; /* infinite */ 795 return -EINVAL;
855 else {
856 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
857 timeout += ts.tv_sec * HZ;
858 }
859 } 796 }
860 797
861 if (sigmask) { 798 if (sigmask) {
@@ -869,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
869 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 806 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
870 } 807 }
871 808
872 ret = do_sys_poll(ufds, nfds, &timeout); 809 ret = do_sys_poll(ufds, nfds, to);
873 810
874 /* We can restart this syscall, usually */ 811 /* We can restart this syscall, usually */
875 if (ret == -EINTR) { 812 if (ret == -EINTR) {
@@ -887,31 +824,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
887 } else if (sigmask) 824 } else if (sigmask)
888 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 825 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
889 826
890 if (tsp && timeout >= 0) { 827 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
891 struct timespec rts;
892
893 if (current->personality & STICKY_TIMEOUTS)
894 goto sticky;
895 /* Yes, we know it's actually an s64, but it's also positive. */
896 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
897 1000;
898 rts.tv_sec = timeout;
899 if (timespec_compare(&rts, &ts) >= 0)
900 rts = ts;
901 if (copy_to_user(tsp, &rts, sizeof(rts))) {
902 sticky:
903 /*
904 * If an application puts its timeval in read-only
905 * memory, we don't want the Linux-specific update to
906 * the timeval to cause a fault after the select has
907 * completed successfully. However, because we're not
908 * updating the timeval, we can't restart the system
909 * call.
910 */
911 if (ret == -ERESTARTNOHAND && timeout >= 0)
912 ret = -EINTR;
913 }
914 }
915 828
916 return ret; 829 return ret;
917} 830}