aboutsummaryrefslogtreecommitdiffstats
path: root/fs/select.c
diff options
context:
space:
mode:
authorArjan van de Ven <arjan@linux.intel.com>2008-08-31 11:26:40 -0400
committerArjan van de Ven <arjan@linux.intel.com>2008-09-06 00:35:03 -0400
commit8ff3e8e85fa6c312051134b3953e397feb639f51 (patch)
tree526886377ca92a62f030c25bc8f91f13a6f991eb /fs/select.c
parentbe5dad20a55e054a35dac7f6f5f184dc72b379b4 (diff)
select: switch select() and poll() over to hrtimers
With lots of help, input and cleanups from Thomas Gleixner This patch switches select() and poll() over to hrtimers. The core of the patch is replacing the "s64 timeout" with a "struct timespec end_time" in all the plumbing. But most of the diffstat comes from using the just introduced helpers: poll_select_set_timeout poll_select_copy_remaining timespec_add_safe which make manipulating the timespec easier and less error-prone. Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'fs/select.c')
-rw-r--r--fs/select.c263
1 files changed, 88 insertions, 175 deletions
diff --git a/fs/select.c b/fs/select.c
index 1180a6207789..f6dceb56793f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -24,6 +24,7 @@
24#include <linux/fdtable.h> 24#include <linux/fdtable.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/hrtimer.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
@@ -203,8 +204,6 @@ sticky:
203 return ret; 204 return ret;
204} 205}
205 206
206
207
208#define FDS_IN(fds, n) (fds->in + n) 207#define FDS_IN(fds, n) (fds->in + n)
209#define FDS_OUT(fds, n) (fds->out + n) 208#define FDS_OUT(fds, n) (fds->out + n)
210#define FDS_EX(fds, n) (fds->ex + n) 209#define FDS_EX(fds, n) (fds->ex + n)
@@ -257,11 +256,12 @@ get_max:
257#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 256#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
258#define POLLEX_SET (POLLPRI) 257#define POLLEX_SET (POLLPRI)
259 258
260int do_select(int n, fd_set_bits *fds, s64 *timeout) 259int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
261{ 260{
261 ktime_t expire, *to = NULL;
262 struct poll_wqueues table; 262 struct poll_wqueues table;
263 poll_table *wait; 263 poll_table *wait;
264 int retval, i; 264 int retval, i, timed_out = 0;
265 265
266 rcu_read_lock(); 266 rcu_read_lock();
267 retval = max_select_fd(n, fds); 267 retval = max_select_fd(n, fds);
@@ -273,12 +273,14 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
273 273
274 poll_initwait(&table); 274 poll_initwait(&table);
275 wait = &table.pt; 275 wait = &table.pt;
276 if (!*timeout) 276 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
277 wait = NULL; 277 wait = NULL;
278 timed_out = 1;
279 }
280
278 retval = 0; 281 retval = 0;
279 for (;;) { 282 for (;;) {
280 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 283 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
281 long __timeout;
282 284
283 set_current_state(TASK_INTERRUPTIBLE); 285 set_current_state(TASK_INTERRUPTIBLE);
284 286
@@ -334,27 +336,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
334 cond_resched(); 336 cond_resched();
335 } 337 }
336 wait = NULL; 338 wait = NULL;
337 if (retval || !*timeout || signal_pending(current)) 339 if (retval || timed_out || signal_pending(current))
338 break; 340 break;
339 if (table.error) { 341 if (table.error) {
340 retval = table.error; 342 retval = table.error;
341 break; 343 break;
342 } 344 }
343 345
344 if (*timeout < 0) { 346 /*
345 /* Wait indefinitely */ 347 * If this is the first loop and we have a timeout
346 __timeout = MAX_SCHEDULE_TIMEOUT; 348 * given, then we convert to ktime_t and set the to
347 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { 349 * pointer to the expiry value.
348 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ 350 */
349 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 351 if (end_time && !to) {
350 *timeout -= __timeout; 352 expire = timespec_to_ktime(*end_time);
351 } else { 353 to = &expire;
352 __timeout = *timeout;
353 *timeout = 0;
354 } 354 }
355 __timeout = schedule_timeout(__timeout); 355
356 if (*timeout >= 0) 356 if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS))
357 *timeout += __timeout; 357 timed_out = 1;
358 } 358 }
359 __set_current_state(TASK_RUNNING); 359 __set_current_state(TASK_RUNNING);
360 360
@@ -375,7 +375,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
375 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 375 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
376 376
377int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 377int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
378 fd_set __user *exp, s64 *timeout) 378 fd_set __user *exp, struct timespec *end_time)
379{ 379{
380 fd_set_bits fds; 380 fd_set_bits fds;
381 void *bits; 381 void *bits;
@@ -426,7 +426,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
426 zero_fd_set(n, fds.res_out); 426 zero_fd_set(n, fds.res_out);
427 zero_fd_set(n, fds.res_ex); 427 zero_fd_set(n, fds.res_ex);
428 428
429 ret = do_select(n, &fds, timeout); 429 ret = do_select(n, &fds, end_time);
430 430
431 if (ret < 0) 431 if (ret < 0)
432 goto out; 432 goto out;
@@ -452,7 +452,7 @@ out_nofds:
452asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 452asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
453 fd_set __user *exp, struct timeval __user *tvp) 453 fd_set __user *exp, struct timeval __user *tvp)
454{ 454{
455 s64 timeout = -1; 455 struct timespec end_time, *to = NULL;
456 struct timeval tv; 456 struct timeval tv;
457 int ret; 457 int ret;
458 458
@@ -460,43 +460,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
460 if (copy_from_user(&tv, tvp, sizeof(tv))) 460 if (copy_from_user(&tv, tvp, sizeof(tv)))
461 return -EFAULT; 461 return -EFAULT;
462 462
463 if (tv.tv_sec < 0 || tv.tv_usec < 0) 463 to = &end_time;
464 if (poll_select_set_timeout(to, tv.tv_sec,
465 tv.tv_usec * NSEC_PER_USEC))
464 return -EINVAL; 466 return -EINVAL;
465
466 /* Cast to u64 to make GCC stop complaining */
467 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
468 timeout = -1; /* infinite */
469 else {
470 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
471 timeout += tv.tv_sec * HZ;
472 }
473 } 467 }
474 468
475 ret = core_sys_select(n, inp, outp, exp, &timeout); 469 ret = core_sys_select(n, inp, outp, exp, to);
476 470 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
477 if (tvp) {
478 struct timeval rtv;
479
480 if (current->personality & STICKY_TIMEOUTS)
481 goto sticky;
482 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
483 rtv.tv_sec = timeout;
484 if (timeval_compare(&rtv, &tv) >= 0)
485 rtv = tv;
486 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
487sticky:
488 /*
489 * If an application puts its timeval in read-only
490 * memory, we don't want the Linux-specific update to
491 * the timeval to cause a fault after the select has
492 * completed successfully. However, because we're not
493 * updating the timeval, we can't restart the system
494 * call.
495 */
496 if (ret == -ERESTARTNOHAND)
497 ret = -EINTR;
498 }
499 }
500 471
501 return ret; 472 return ret;
502} 473}
@@ -506,25 +477,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
506 fd_set __user *exp, struct timespec __user *tsp, 477 fd_set __user *exp, struct timespec __user *tsp,
507 const sigset_t __user *sigmask, size_t sigsetsize) 478 const sigset_t __user *sigmask, size_t sigsetsize)
508{ 479{
509 s64 timeout = MAX_SCHEDULE_TIMEOUT;
510 sigset_t ksigmask, sigsaved; 480 sigset_t ksigmask, sigsaved;
511 struct timespec ts; 481 struct timespec ts, end_time, *to = NULL;
512 int ret; 482 int ret;
513 483
514 if (tsp) { 484 if (tsp) {
515 if (copy_from_user(&ts, tsp, sizeof(ts))) 485 if (copy_from_user(&ts, tsp, sizeof(ts)))
516 return -EFAULT; 486 return -EFAULT;
517 487
518 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 488 to = &end_time;
489 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
519 return -EINVAL; 490 return -EINVAL;
520
521 /* Cast to u64 to make GCC stop complaining */
522 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
523 timeout = -1; /* infinite */
524 else {
525 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
526 timeout += ts.tv_sec * HZ;
527 }
528 } 491 }
529 492
530 if (sigmask) { 493 if (sigmask) {
@@ -538,32 +501,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
538 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 501 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
539 } 502 }
540 503
541 ret = core_sys_select(n, inp, outp, exp, &timeout); 504 ret = core_sys_select(n, inp, outp, exp, &end_time);
542 505 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
543 if (tsp) {
544 struct timespec rts;
545
546 if (current->personality & STICKY_TIMEOUTS)
547 goto sticky;
548 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
549 1000;
550 rts.tv_sec = timeout;
551 if (timespec_compare(&rts, &ts) >= 0)
552 rts = ts;
553 if (copy_to_user(tsp, &rts, sizeof(rts))) {
554sticky:
555 /*
556 * If an application puts its timeval in read-only
557 * memory, we don't want the Linux-specific update to
558 * the timeval to cause a fault after the select has
559 * completed successfully. However, because we're not
560 * updating the timeval, we can't restart the system
561 * call.
562 */
563 if (ret == -ERESTARTNOHAND)
564 ret = -EINTR;
565 }
566 }
567 506
568 if (ret == -ERESTARTNOHAND) { 507 if (ret == -ERESTARTNOHAND) {
569 /* 508 /*
@@ -649,18 +588,20 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
649} 588}
650 589
651static int do_poll(unsigned int nfds, struct poll_list *list, 590static int do_poll(unsigned int nfds, struct poll_list *list,
652 struct poll_wqueues *wait, s64 *timeout) 591 struct poll_wqueues *wait, struct timespec *end_time)
653{ 592{
654 int count = 0;
655 poll_table* pt = &wait->pt; 593 poll_table* pt = &wait->pt;
594 ktime_t expire, *to = NULL;
595 int timed_out = 0, count = 0;
656 596
657 /* Optimise the no-wait case */ 597 /* Optimise the no-wait case */
658 if (!(*timeout)) 598 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
659 pt = NULL; 599 pt = NULL;
600 timed_out = 1;
601 }
660 602
661 for (;;) { 603 for (;;) {
662 struct poll_list *walk; 604 struct poll_list *walk;
663 long __timeout;
664 605
665 set_current_state(TASK_INTERRUPTIBLE); 606 set_current_state(TASK_INTERRUPTIBLE);
666 for (walk = list; walk != NULL; walk = walk->next) { 607 for (walk = list; walk != NULL; walk = walk->next) {
@@ -692,27 +633,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
692 if (signal_pending(current)) 633 if (signal_pending(current))
693 count = -EINTR; 634 count = -EINTR;
694 } 635 }
695 if (count || !*timeout) 636 if (count || timed_out)
696 break; 637 break;
697 638
698 if (*timeout < 0) { 639 /*
699 /* Wait indefinitely */ 640 * If this is the first loop and we have a timeout
700 __timeout = MAX_SCHEDULE_TIMEOUT; 641 * given, then we convert to ktime_t and set the to
701 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { 642 * pointer to the expiry value.
702 /* 643 */
703 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in 644 if (end_time && !to) {
704 * a loop 645 expire = timespec_to_ktime(*end_time);
705 */ 646 to = &expire;
706 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
707 *timeout -= __timeout;
708 } else {
709 __timeout = *timeout;
710 *timeout = 0;
711 } 647 }
712 648
713 __timeout = schedule_timeout(__timeout); 649 if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS))
714 if (*timeout >= 0) 650 timed_out = 1;
715 *timeout += __timeout;
716 } 651 }
717 __set_current_state(TASK_RUNNING); 652 __set_current_state(TASK_RUNNING);
718 return count; 653 return count;
@@ -721,7 +656,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
721#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 656#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
722 sizeof(struct pollfd)) 657 sizeof(struct pollfd))
723 658
724int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 659int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
660 struct timespec *end_time)
725{ 661{
726 struct poll_wqueues table; 662 struct poll_wqueues table;
727 int err = -EFAULT, fdcount, len, size; 663 int err = -EFAULT, fdcount, len, size;
@@ -761,7 +697,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
761 } 697 }
762 698
763 poll_initwait(&table); 699 poll_initwait(&table);
764 fdcount = do_poll(nfds, head, &table, timeout); 700 fdcount = do_poll(nfds, head, &table, end_time);
765 poll_freewait(&table); 701 poll_freewait(&table);
766 702
767 for (walk = head; walk; walk = walk->next) { 703 for (walk = head; walk; walk = walk->next) {
@@ -787,16 +723,21 @@ out_fds:
787 723
788static long do_restart_poll(struct restart_block *restart_block) 724static long do_restart_poll(struct restart_block *restart_block)
789{ 725{
790 struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; 726 struct pollfd __user *ufds = restart_block->poll.ufds;
791 int nfds = restart_block->arg1; 727 int nfds = restart_block->poll.nfds;
792 s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; 728 struct timespec *to = NULL, end_time;
793 int ret; 729 int ret;
794 730
795 ret = do_sys_poll(ufds, nfds, &timeout); 731 if (restart_block->poll.has_timeout) {
732 end_time.tv_sec = restart_block->poll.tv_sec;
733 end_time.tv_nsec = restart_block->poll.tv_nsec;
734 to = &end_time;
735 }
736
737 ret = do_sys_poll(ufds, nfds, to);
738
796 if (ret == -EINTR) { 739 if (ret == -EINTR) {
797 restart_block->fn = do_restart_poll; 740 restart_block->fn = do_restart_poll;
798 restart_block->arg2 = timeout & 0xFFFFFFFF;
799 restart_block->arg3 = (u64)timeout >> 32;
800 ret = -ERESTART_RESTARTBLOCK; 741 ret = -ERESTART_RESTARTBLOCK;
801 } 742 }
802 return ret; 743 return ret;
@@ -805,31 +746,32 @@ static long do_restart_poll(struct restart_block *restart_block)
805asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 746asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
806 long timeout_msecs) 747 long timeout_msecs)
807{ 748{
808 s64 timeout_jiffies; 749 struct timespec end_time, *to = NULL;
809 int ret; 750 int ret;
810 751
811 if (timeout_msecs > 0) { 752 if (timeout_msecs >= 0) {
812#if HZ > 1000 753 to = &end_time;
813 /* We can only overflow if HZ > 1000 */ 754 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
814 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 755 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
815 timeout_jiffies = -1;
816 else
817#endif
818 timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1;
819 } else {
820 /* Infinite (< 0) or no (0) timeout */
821 timeout_jiffies = timeout_msecs;
822 } 756 }
823 757
824 ret = do_sys_poll(ufds, nfds, &timeout_jiffies); 758 ret = do_sys_poll(ufds, nfds, to);
759
825 if (ret == -EINTR) { 760 if (ret == -EINTR) {
826 struct restart_block *restart_block; 761 struct restart_block *restart_block;
762
827 restart_block = &current_thread_info()->restart_block; 763 restart_block = &current_thread_info()->restart_block;
828 restart_block->fn = do_restart_poll; 764 restart_block->fn = do_restart_poll;
829 restart_block->arg0 = (unsigned long)ufds; 765 restart_block->poll.ufds = ufds;
830 restart_block->arg1 = nfds; 766 restart_block->poll.nfds = nfds;
831 restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; 767
832 restart_block->arg3 = (u64)timeout_jiffies >> 32; 768 if (timeout_msecs >= 0) {
769 restart_block->poll.tv_sec = end_time.tv_sec;
770 restart_block->poll.tv_nsec = end_time.tv_nsec;
771 restart_block->poll.has_timeout = 1;
772 } else
773 restart_block->poll.has_timeout = 0;
774
833 ret = -ERESTART_RESTARTBLOCK; 775 ret = -ERESTART_RESTARTBLOCK;
834 } 776 }
835 return ret; 777 return ret;
@@ -841,21 +783,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
841 size_t sigsetsize) 783 size_t sigsetsize)
842{ 784{
843 sigset_t ksigmask, sigsaved; 785 sigset_t ksigmask, sigsaved;
844 struct timespec ts; 786 struct timespec ts, end_time, *to = NULL;
845 s64 timeout = -1;
846 int ret; 787 int ret;
847 788
848 if (tsp) { 789 if (tsp) {
849 if (copy_from_user(&ts, tsp, sizeof(ts))) 790 if (copy_from_user(&ts, tsp, sizeof(ts)))
850 return -EFAULT; 791 return -EFAULT;
851 792
852 /* Cast to u64 to make GCC stop complaining */ 793 to = &end_time;
853 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 794 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
854 timeout = -1; /* infinite */ 795 return -EINVAL;
855 else {
856 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
857 timeout += ts.tv_sec * HZ;
858 }
859 } 796 }
860 797
861 if (sigmask) { 798 if (sigmask) {
@@ -869,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
869 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 806 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
870 } 807 }
871 808
872 ret = do_sys_poll(ufds, nfds, &timeout); 809 ret = do_sys_poll(ufds, nfds, to);
873 810
874 /* We can restart this syscall, usually */ 811 /* We can restart this syscall, usually */
875 if (ret == -EINTR) { 812 if (ret == -EINTR) {
@@ -887,31 +824,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
887 } else if (sigmask) 824 } else if (sigmask)
888 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 825 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
889 826
890 if (tsp && timeout >= 0) { 827 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
891 struct timespec rts;
892
893 if (current->personality & STICKY_TIMEOUTS)
894 goto sticky;
895 /* Yes, we know it's actually an s64, but it's also positive. */
896 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
897 1000;
898 rts.tv_sec = timeout;
899 if (timespec_compare(&rts, &ts) >= 0)
900 rts = ts;
901 if (copy_to_user(tsp, &rts, sizeof(rts))) {
902 sticky:
903 /*
904 * If an application puts its timeval in read-only
905 * memory, we don't want the Linux-specific update to
906 * the timeval to cause a fault after the select has
907 * completed successfully. However, because we're not
908 * updating the timeval, we can't restart the system
909 * call.
910 */
911 if (ret == -ERESTARTNOHAND && timeout >= 0)
912 ret = -EINTR;
913 }
914 }
915 828
916 return ret; 829 return ret;
917} 830}