diff options
author | Eliezer Tamir <eliezer.tamir@linux.intel.com> | 2013-06-24 03:28:03 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-06-25 19:35:52 -0400 |
commit | 2d48d67fa8cd129ea85ea02d91b4a793286866f8 (patch) | |
tree | be47e2406605760d949b08d29d988d46c4a20799 /fs | |
parent | e4f2379db6c6823c5d4a4c2c912df00c65de51d7 (diff) |
net: poll/select low latency socket support
select/poll busy-poll support.
Split sysctl value into two separate ones, one for read and one for poll.
updated Documentation/sysctl/net.txt
Add a new poll flag POLL_LL. When this flag is set, sock_poll will call
sk_poll_ll if possible. sock_poll sets this flag in its return value
to indicate to select/poll when a socket that can busy poll is found.
When poll/select have nothing to report, call the low-level
sock_poll again until we are out of time or we find something.
Once the system call finds something, it stops setting POLL_LL, so it can
return the result to the user ASAP.
Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/select.c | 34 |
1 files changed, 29 insertions, 5 deletions
diff --git a/fs/select.c b/fs/select.c index 8c1c96c27062..79b876eb91da 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/rcupdate.h> | 27 | #include <linux/rcupdate.h> |
28 | #include <linux/hrtimer.h> | 28 | #include <linux/hrtimer.h> |
29 | #include <linux/sched/rt.h> | 29 | #include <linux/sched/rt.h> |
30 | #include <net/ll_poll.h> | ||
30 | 31 | ||
31 | #include <asm/uaccess.h> | 32 | #include <asm/uaccess.h> |
32 | 33 | ||
@@ -384,9 +385,10 @@ get_max: | |||
384 | #define POLLEX_SET (POLLPRI) | 385 | #define POLLEX_SET (POLLPRI) |
385 | 386 | ||
386 | static inline void wait_key_set(poll_table *wait, unsigned long in, | 387 | static inline void wait_key_set(poll_table *wait, unsigned long in, |
387 | unsigned long out, unsigned long bit) | 388 | unsigned long out, unsigned long bit, |
389 | unsigned int ll_flag) | ||
388 | { | 390 | { |
389 | wait->_key = POLLEX_SET; | 391 | wait->_key = POLLEX_SET | ll_flag; |
390 | if (in & bit) | 392 | if (in & bit) |
391 | wait->_key |= POLLIN_SET; | 393 | wait->_key |= POLLIN_SET; |
392 | if (out & bit) | 394 | if (out & bit) |
@@ -400,6 +402,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
400 | poll_table *wait; | 402 | poll_table *wait; |
401 | int retval, i, timed_out = 0; | 403 | int retval, i, timed_out = 0; |
402 | unsigned long slack = 0; | 404 | unsigned long slack = 0; |
405 | unsigned int ll_flag = POLL_LL; | ||
406 | u64 ll_time = ll_end_time(); | ||
403 | 407 | ||
404 | rcu_read_lock(); | 408 | rcu_read_lock(); |
405 | retval = max_select_fd(n, fds); | 409 | retval = max_select_fd(n, fds); |
@@ -422,6 +426,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
422 | retval = 0; | 426 | retval = 0; |
423 | for (;;) { | 427 | for (;;) { |
424 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; | 428 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; |
429 | bool can_ll = false; | ||
425 | 430 | ||
426 | inp = fds->in; outp = fds->out; exp = fds->ex; | 431 | inp = fds->in; outp = fds->out; exp = fds->ex; |
427 | rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; | 432 | rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; |
@@ -449,7 +454,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
449 | f_op = f.file->f_op; | 454 | f_op = f.file->f_op; |
450 | mask = DEFAULT_POLLMASK; | 455 | mask = DEFAULT_POLLMASK; |
451 | if (f_op && f_op->poll) { | 456 | if (f_op && f_op->poll) { |
452 | wait_key_set(wait, in, out, bit); | 457 | wait_key_set(wait, in, out, |
458 | bit, ll_flag); | ||
453 | mask = (*f_op->poll)(f.file, wait); | 459 | mask = (*f_op->poll)(f.file, wait); |
454 | } | 460 | } |
455 | fdput(f); | 461 | fdput(f); |
@@ -468,6 +474,11 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
468 | retval++; | 474 | retval++; |
469 | wait->_qproc = NULL; | 475 | wait->_qproc = NULL; |
470 | } | 476 | } |
477 | if (mask & POLL_LL) | ||
478 | can_ll = true; | ||
479 | /* got something, stop busy polling */ | ||
480 | if (retval) | ||
481 | ll_flag = 0; | ||
471 | } | 482 | } |
472 | } | 483 | } |
473 | if (res_in) | 484 | if (res_in) |
@@ -486,6 +497,9 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
486 | break; | 497 | break; |
487 | } | 498 | } |
488 | 499 | ||
500 | if (can_ll && can_poll_ll(ll_time)) | ||
501 | continue; | ||
502 | |||
489 | /* | 503 | /* |
490 | * If this is the first loop and we have a timeout | 504 | * If this is the first loop and we have a timeout |
491 | * given, then we convert to ktime_t and set the to | 505 | * given, then we convert to ktime_t and set the to |
@@ -717,7 +731,8 @@ struct poll_list { | |||
717 | * pwait poll_table will be used by the fd-provided poll handler for waiting, | 731 | * pwait poll_table will be used by the fd-provided poll handler for waiting, |
718 | * if pwait->_qproc is non-NULL. | 732 | * if pwait->_qproc is non-NULL. |
719 | */ | 733 | */ |
720 | static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | 734 | static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, |
735 | bool *can_ll, unsigned int ll_flag) | ||
721 | { | 736 | { |
722 | unsigned int mask; | 737 | unsigned int mask; |
723 | int fd; | 738 | int fd; |
@@ -731,7 +746,10 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | |||
731 | mask = DEFAULT_POLLMASK; | 746 | mask = DEFAULT_POLLMASK; |
732 | if (f.file->f_op && f.file->f_op->poll) { | 747 | if (f.file->f_op && f.file->f_op->poll) { |
733 | pwait->_key = pollfd->events|POLLERR|POLLHUP; | 748 | pwait->_key = pollfd->events|POLLERR|POLLHUP; |
749 | pwait->_key |= ll_flag; | ||
734 | mask = f.file->f_op->poll(f.file, pwait); | 750 | mask = f.file->f_op->poll(f.file, pwait); |
751 | if (mask & POLL_LL) | ||
752 | *can_ll = true; | ||
735 | } | 753 | } |
736 | /* Mask out unneeded events. */ | 754 | /* Mask out unneeded events. */ |
737 | mask &= pollfd->events | POLLERR | POLLHUP; | 755 | mask &= pollfd->events | POLLERR | POLLHUP; |
@@ -750,6 +768,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
750 | ktime_t expire, *to = NULL; | 768 | ktime_t expire, *to = NULL; |
751 | int timed_out = 0, count = 0; | 769 | int timed_out = 0, count = 0; |
752 | unsigned long slack = 0; | 770 | unsigned long slack = 0; |
771 | unsigned int ll_flag = POLL_LL; | ||
772 | u64 ll_time = ll_end_time(); | ||
753 | 773 | ||
754 | /* Optimise the no-wait case */ | 774 | /* Optimise the no-wait case */ |
755 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { | 775 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { |
@@ -762,6 +782,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
762 | 782 | ||
763 | for (;;) { | 783 | for (;;) { |
764 | struct poll_list *walk; | 784 | struct poll_list *walk; |
785 | bool can_ll = false; | ||
765 | 786 | ||
766 | for (walk = list; walk != NULL; walk = walk->next) { | 787 | for (walk = list; walk != NULL; walk = walk->next) { |
767 | struct pollfd * pfd, * pfd_end; | 788 | struct pollfd * pfd, * pfd_end; |
@@ -776,9 +797,10 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
776 | * this. They'll get immediately deregistered | 797 | * this. They'll get immediately deregistered |
777 | * when we break out and return. | 798 | * when we break out and return. |
778 | */ | 799 | */ |
779 | if (do_pollfd(pfd, pt)) { | 800 | if (do_pollfd(pfd, pt, &can_ll, ll_flag)) { |
780 | count++; | 801 | count++; |
781 | pt->_qproc = NULL; | 802 | pt->_qproc = NULL; |
803 | ll_flag = 0; | ||
782 | } | 804 | } |
783 | } | 805 | } |
784 | } | 806 | } |
@@ -795,6 +817,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
795 | if (count || timed_out) | 817 | if (count || timed_out) |
796 | break; | 818 | break; |
797 | 819 | ||
820 | if (can_ll && can_poll_ll(ll_time)) | ||
821 | continue; | ||
798 | /* | 822 | /* |
799 | * If this is the first loop and we have a timeout | 823 | * If this is the first loop and we have a timeout |
800 | * given, then we convert to ktime_t and set the to | 824 | * given, then we convert to ktime_t and set the to |