aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/net.txt12
-rw-r--r--fs/select.c60
-rw-r--r--include/net/ll_poll.h46
-rw-r--r--include/uapi/asm-generic/poll.h2
-rw-r--r--net/core/datagram.c3
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/socket.c12
7 files changed, 80 insertions, 61 deletions
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index e658bbfb641f..7323b88e26be 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -53,22 +53,24 @@ Default: 64
53low_latency_read 53low_latency_read
54---------------- 54----------------
55Low latency busy poll timeout for socket reads. (needs CONFIG_NET_LL_RX_POLL) 55Low latency busy poll timeout for socket reads. (needs CONFIG_NET_LL_RX_POLL)
56Approximate time in us to spin waiting for packets on the device queue. 56Approximate time in us to busy loop waiting for packets on the device queue.
57This sets the default value of the SO_LL socket option. 57This sets the default value of the SO_LL socket option.
58Can be set or overridden per socket by setting socket option SO_LL. 58Can be set or overridden per socket by setting socket option SO_LL, which is
59Recommended value is 50. May increase power usage. 59the preferred method of enabling.
60If you need to enable the feature globally via sysctl, a value of 50 is recommended.
61Will increase power usage.
60Default: 0 (off) 62Default: 0 (off)
61 63
62low_latency_poll 64low_latency_poll
63---------------- 65----------------
64Low latency busy poll timeout for poll and select. (needs CONFIG_NET_LL_RX_POLL) 66Low latency busy poll timeout for poll and select. (needs CONFIG_NET_LL_RX_POLL)
65Approximate time in us to spin waiting for packets on the device queue. 67Approximate time in us to busy loop waiting for events.
66Recommended value depends on the number of sockets you poll on. 68Recommended value depends on the number of sockets you poll on.
67For several sockets 50, for several hundreds 100. 69For several sockets 50, for several hundreds 100.
68For more than that you probably want to use epoll. 70For more than that you probably want to use epoll.
69Note that only sockets with SO_LL set will be busy polled, so you want to either 71Note that only sockets with SO_LL set will be busy polled, so you want to either
70selectively set SO_LL on those sockets or set sysctl.net.low_latency_read globally. 72selectively set SO_LL on those sockets or set sysctl.net.low_latency_read globally.
71May increase power usage. 73Will increase power usage.
72Default: 0 (off) 74Default: 0 (off)
73 75
74rmem_default 76rmem_default
diff --git a/fs/select.c b/fs/select.c
index f28a58592725..25cac5faf6d6 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -402,9 +402,9 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
402 poll_table *wait; 402 poll_table *wait;
403 int retval, i, timed_out = 0; 403 int retval, i, timed_out = 0;
404 unsigned long slack = 0; 404 unsigned long slack = 0;
405 unsigned int ll_flag = ll_get_flag(); 405 unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
406 u64 ll_start = ll_start_time(ll_flag); 406 u64 busy_start = busy_loop_start_time(busy_flag);
407 u64 ll_time = ll_run_time(); 407 u64 busy_end = busy_loop_end_time();
408 408
409 rcu_read_lock(); 409 rcu_read_lock();
410 retval = max_select_fd(n, fds); 410 retval = max_select_fd(n, fds);
@@ -427,7 +427,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
427 retval = 0; 427 retval = 0;
428 for (;;) { 428 for (;;) {
429 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 429 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
430 bool can_ll = false; 430 bool can_busy_loop = false;
431 431
432 inp = fds->in; outp = fds->out; exp = fds->ex; 432 inp = fds->in; outp = fds->out; exp = fds->ex;
433 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; 433 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
@@ -456,7 +456,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
456 mask = DEFAULT_POLLMASK; 456 mask = DEFAULT_POLLMASK;
457 if (f_op && f_op->poll) { 457 if (f_op && f_op->poll) {
458 wait_key_set(wait, in, out, 458 wait_key_set(wait, in, out,
459 bit, ll_flag); 459 bit, busy_flag);
460 mask = (*f_op->poll)(f.file, wait); 460 mask = (*f_op->poll)(f.file, wait);
461 } 461 }
462 fdput(f); 462 fdput(f);
@@ -475,11 +475,18 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
475 retval++; 475 retval++;
476 wait->_qproc = NULL; 476 wait->_qproc = NULL;
477 } 477 }
478 if (mask & POLL_LL)
479 can_ll = true;
480 /* got something, stop busy polling */ 478 /* got something, stop busy polling */
481 if (retval) 479 if (retval) {
482 ll_flag = 0; 480 can_busy_loop = false;
481 busy_flag = 0;
482
483 /*
484 * only remember a returned
485 * POLL_BUSY_LOOP if we asked for it
486 */
487 } else if (busy_flag & mask)
488 can_busy_loop = true;
489
483 } 490 }
484 } 491 }
485 if (res_in) 492 if (res_in)
@@ -498,8 +505,9 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
498 break; 505 break;
499 } 506 }
500 507
501 /* only if on, have sockets with POLL_LL and not out of time */ 508 /* only if found POLL_BUSY_LOOP sockets && not out of time */
502 if (ll_flag && can_ll && can_poll_ll(ll_start, ll_time)) 509 if (!need_resched() && can_busy_loop &&
510 busy_loop_range(busy_start, busy_end))
503 continue; 511 continue;
504 512
505 /* 513 /*
@@ -734,7 +742,8 @@ struct poll_list {
734 * if pwait->_qproc is non-NULL. 742 * if pwait->_qproc is non-NULL.
735 */ 743 */
736static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, 744static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait,
737 bool *can_ll, unsigned int ll_flag) 745 bool *can_busy_poll,
746 unsigned int busy_flag)
738{ 747{
739 unsigned int mask; 748 unsigned int mask;
740 int fd; 749 int fd;
@@ -748,10 +757,10 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait,
748 mask = DEFAULT_POLLMASK; 757 mask = DEFAULT_POLLMASK;
749 if (f.file->f_op && f.file->f_op->poll) { 758 if (f.file->f_op && f.file->f_op->poll) {
750 pwait->_key = pollfd->events|POLLERR|POLLHUP; 759 pwait->_key = pollfd->events|POLLERR|POLLHUP;
751 pwait->_key |= ll_flag; 760 pwait->_key |= busy_flag;
752 mask = f.file->f_op->poll(f.file, pwait); 761 mask = f.file->f_op->poll(f.file, pwait);
753 if (mask & POLL_LL) 762 if (mask & busy_flag)
754 *can_ll = true; 763 *can_busy_poll = true;
755 } 764 }
756 /* Mask out unneeded events. */ 765 /* Mask out unneeded events. */
757 mask &= pollfd->events | POLLERR | POLLHUP; 766 mask &= pollfd->events | POLLERR | POLLHUP;
@@ -770,9 +779,10 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
770 ktime_t expire, *to = NULL; 779 ktime_t expire, *to = NULL;
771 int timed_out = 0, count = 0; 780 int timed_out = 0, count = 0;
772 unsigned long slack = 0; 781 unsigned long slack = 0;
773 unsigned int ll_flag = ll_get_flag(); 782 unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
774 u64 ll_start = ll_start_time(ll_flag); 783 u64 busy_start = busy_loop_start_time(busy_flag);
775 u64 ll_time = ll_run_time(); 784 u64 busy_end = busy_loop_end_time();
785
776 786
777 /* Optimise the no-wait case */ 787 /* Optimise the no-wait case */
778 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { 788 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
@@ -785,7 +795,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
785 795
786 for (;;) { 796 for (;;) {
787 struct poll_list *walk; 797 struct poll_list *walk;
788 bool can_ll = false; 798 bool can_busy_loop = false;
789 799
790 for (walk = list; walk != NULL; walk = walk->next) { 800 for (walk = list; walk != NULL; walk = walk->next) {
791 struct pollfd * pfd, * pfd_end; 801 struct pollfd * pfd, * pfd_end;
@@ -800,10 +810,13 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
800 * this. They'll get immediately deregistered 810 * this. They'll get immediately deregistered
801 * when we break out and return. 811 * when we break out and return.
802 */ 812 */
803 if (do_pollfd(pfd, pt, &can_ll, ll_flag)) { 813 if (do_pollfd(pfd, pt, &can_busy_loop,
814 busy_flag)) {
804 count++; 815 count++;
805 pt->_qproc = NULL; 816 pt->_qproc = NULL;
806 ll_flag = 0; 817 /* found something, stop busy polling */
818 busy_flag = 0;
819 can_busy_loop = false;
807 } 820 }
808 } 821 }
809 } 822 }
@@ -820,8 +833,9 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
820 if (count || timed_out) 833 if (count || timed_out)
821 break; 834 break;
822 835
823 /* only if on, have sockets with POLL_LL and not out of time */ 836 /* only if found POLL_BUSY_LOOP sockets && not out of time */
824 if (ll_flag && can_ll && can_poll_ll(ll_start, ll_time)) 837 if (!need_resched() && can_busy_loop &&
838 busy_loop_range(busy_start, busy_end))
825 continue; 839 continue;
826 840
827 /* 841 /*
diff --git a/include/net/ll_poll.h b/include/net/ll_poll.h
index 0d620ba19bc5..f14dd88dafc8 100644
--- a/include/net/ll_poll.h
+++ b/include/net/ll_poll.h
@@ -37,9 +37,9 @@ extern unsigned int sysctl_net_ll_poll __read_mostly;
37#define LL_FLUSH_FAILED -1 37#define LL_FLUSH_FAILED -1
38#define LL_FLUSH_BUSY -2 38#define LL_FLUSH_BUSY -2
39 39
40static inline unsigned int ll_get_flag(void) 40static inline bool net_busy_loop_on(void)
41{ 41{
42 return sysctl_net_ll_poll ? POLL_LL : 0; 42 return sysctl_net_ll_poll;
43} 43}
44 44
45/* a wrapper to make debug_smp_processor_id() happy 45/* a wrapper to make debug_smp_processor_id() happy
@@ -47,7 +47,7 @@ static inline unsigned int ll_get_flag(void)
47 * we only care that the average is bounded 47 * we only care that the average is bounded
48 */ 48 */
49#ifdef CONFIG_DEBUG_PREEMPT 49#ifdef CONFIG_DEBUG_PREEMPT
50static inline u64 ll_sched_clock(void) 50static inline u64 busy_loop_sched_clock(void)
51{ 51{
52 u64 rc; 52 u64 rc;
53 53
@@ -58,7 +58,7 @@ static inline u64 ll_sched_clock(void)
58 return rc; 58 return rc;
59} 59}
60#else /* CONFIG_DEBUG_PREEMPT */ 60#else /* CONFIG_DEBUG_PREEMPT */
61static inline u64 ll_sched_clock(void) 61static inline u64 busy_loop_sched_clock(void)
62{ 62{
63 return sched_clock(); 63 return sched_clock();
64} 64}
@@ -67,7 +67,7 @@ static inline u64 ll_sched_clock(void)
67/* we don't mind a ~2.5% imprecision so <<10 instead of *1000 67/* we don't mind a ~2.5% imprecision so <<10 instead of *1000
68 * sk->sk_ll_usec is a u_int so this can't overflow 68 * sk->sk_ll_usec is a u_int so this can't overflow
69 */ 69 */
70static inline u64 ll_sk_run_time(struct sock *sk) 70static inline u64 sk_busy_loop_end_time(struct sock *sk)
71{ 71{
72 return (u64)ACCESS_ONCE(sk->sk_ll_usec) << 10; 72 return (u64)ACCESS_ONCE(sk->sk_ll_usec) << 10;
73} 73}
@@ -75,27 +75,29 @@ static inline u64 ll_sk_run_time(struct sock *sk)
75/* in poll/select we use the global sysctl_net_ll_poll value 75/* in poll/select we use the global sysctl_net_ll_poll value
76 * only call sched_clock() if enabled 76 * only call sched_clock() if enabled
77 */ 77 */
78static inline u64 ll_run_time(void) 78static inline u64 busy_loop_end_time(void)
79{ 79{
80 return (u64)ACCESS_ONCE(sysctl_net_ll_poll) << 10; 80 return (u64)ACCESS_ONCE(sysctl_net_ll_poll) << 10;
81} 81}
82 82
83/* if flag is not set we don't need to know the time */ 83/* if flag is not set we don't need to know the time
84static inline u64 ll_start_time(unsigned int flag) 84 * so we want to avoid a potentially expensive sched_clock()
85 */
86static inline u64 busy_loop_start_time(unsigned int flag)
85{ 87{
86 return flag ? ll_sched_clock() : 0; 88 return flag ? busy_loop_sched_clock() : 0;
87} 89}
88 90
89static inline bool sk_valid_ll(struct sock *sk) 91static inline bool sk_can_busy_loop(struct sock *sk)
90{ 92{
91 return sk->sk_ll_usec && sk->sk_napi_id && 93 return sk->sk_ll_usec && sk->sk_napi_id &&
92 !need_resched() && !signal_pending(current); 94 !need_resched() && !signal_pending(current);
93} 95}
94 96
95/* careful! time_in_range64 will evaluate now twice */ 97/* careful! time_in_range64 will evaluate now twice */
96static inline bool can_poll_ll(u64 start_time, u64 run_time) 98static inline bool busy_loop_range(u64 start_time, u64 run_time)
97{ 99{
98 u64 now = ll_sched_clock(); 100 u64 now = busy_loop_sched_clock();
99 101
100 return time_in_range64(now, start_time, start_time + run_time); 102 return time_in_range64(now, start_time, start_time + run_time);
101} 103}
@@ -103,10 +105,10 @@ static inline bool can_poll_ll(u64 start_time, u64 run_time)
103/* when used in sock_poll() nonblock is known at compile time to be true 105/* when used in sock_poll() nonblock is known at compile time to be true
104 * so the loop and end_time will be optimized out 106 * so the loop and end_time will be optimized out
105 */ 107 */
106static inline bool sk_poll_ll(struct sock *sk, int nonblock) 108static inline bool sk_busy_loop(struct sock *sk, int nonblock)
107{ 109{
108 u64 start_time = ll_start_time(!nonblock); 110 u64 start_time = busy_loop_start_time(!nonblock);
109 u64 run_time = ll_sk_run_time(sk); 111 u64 end_time = sk_busy_loop_end_time(sk);
110 const struct net_device_ops *ops; 112 const struct net_device_ops *ops;
111 struct napi_struct *napi; 113 struct napi_struct *napi;
112 int rc = false; 114 int rc = false;
@@ -137,7 +139,7 @@ static inline bool sk_poll_ll(struct sock *sk, int nonblock)
137 LINUX_MIB_LOWLATENCYRXPACKETS, rc); 139 LINUX_MIB_LOWLATENCYRXPACKETS, rc);
138 140
139 } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && 141 } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
140 can_poll_ll(start_time, run_time)); 142 busy_loop_range(start_time, end_time));
141 143
142 rc = !skb_queue_empty(&sk->sk_receive_queue); 144 rc = !skb_queue_empty(&sk->sk_receive_queue);
143out: 145out:
@@ -158,27 +160,27 @@ static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb)
158} 160}
159 161
160#else /* CONFIG_NET_LL_RX_POLL */ 162#else /* CONFIG_NET_LL_RX_POLL */
161static inline unsigned long ll_get_flag(void) 163static inline unsigned long net_busy_loop_on(void)
162{ 164{
163 return 0; 165 return 0;
164} 166}
165 167
166static inline u64 ll_start_time(unsigned int flag) 168static inline u64 busy_loop_start_time(unsigned int flag)
167{ 169{
168 return 0; 170 return 0;
169} 171}
170 172
171static inline u64 ll_run_time(void) 173static inline u64 busy_loop_end_time(void)
172{ 174{
173 return 0; 175 return 0;
174} 176}
175 177
176static inline bool sk_valid_ll(struct sock *sk) 178static inline bool sk_can_busy_loop(struct sock *sk)
177{ 179{
178 return false; 180 return false;
179} 181}
180 182
181static inline bool sk_poll_ll(struct sock *sk, int nonblock) 183static inline bool sk_busy_poll(struct sock *sk, int nonblock)
182{ 184{
183 return false; 185 return false;
184} 186}
@@ -191,7 +193,7 @@ static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb)
191{ 193{
192} 194}
193 195
194static inline bool can_poll_ll(u64 start_time, u64 run_time) 196static inline bool busy_loop_range(u64 start_time, u64 run_time)
195{ 197{
196 return false; 198 return false;
197} 199}
diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h
index 4aee586979ca..a9694982689f 100644
--- a/include/uapi/asm-generic/poll.h
+++ b/include/uapi/asm-generic/poll.h
@@ -30,7 +30,7 @@
30 30
31#define POLLFREE 0x4000 /* currently only for epoll */ 31#define POLLFREE 0x4000 /* currently only for epoll */
32 32
33#define POLL_LL 0x8000 33#define POLL_BUSY_LOOP 0x8000
34 34
35struct pollfd { 35struct pollfd {
36 int fd; 36 int fd;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 9cbaba98ce4c..6e9ab31e457e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -208,7 +208,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
208 } 208 }
209 spin_unlock_irqrestore(&queue->lock, cpu_flags); 209 spin_unlock_irqrestore(&queue->lock, cpu_flags);
210 210
211 if (sk_valid_ll(sk) && sk_poll_ll(sk, flags & MSG_DONTWAIT)) 211 if (sk_can_busy_loop(sk) &&
212 sk_busy_loop(sk, flags & MSG_DONTWAIT))
212 continue; 213 continue;
213 214
214 /* User doesn't want to wait */ 215 /* User doesn't want to wait */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46ed9afd1f5e..15cbfa94bd8e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1554,9 +1554,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1554 struct sk_buff *skb; 1554 struct sk_buff *skb;
1555 u32 urg_hole = 0; 1555 u32 urg_hole = 0;
1556 1556
1557 if (sk_valid_ll(sk) && skb_queue_empty(&sk->sk_receive_queue) 1557 if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
1558 && (sk->sk_state == TCP_ESTABLISHED)) 1558 (sk->sk_state == TCP_ESTABLISHED))
1559 sk_poll_ll(sk, nonblock); 1559 sk_busy_loop(sk, nonblock);
1560 1560
1561 lock_sock(sk); 1561 lock_sock(sk);
1562 1562
diff --git a/net/socket.c b/net/socket.c
index 4da14cbd49b6..45afa648364a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1148,7 +1148,7 @@ EXPORT_SYMBOL(sock_create_lite);
1148/* No kernel lock held - perfect */ 1148/* No kernel lock held - perfect */
1149static unsigned int sock_poll(struct file *file, poll_table *wait) 1149static unsigned int sock_poll(struct file *file, poll_table *wait)
1150{ 1150{
1151 unsigned int ll_flag = 0; 1151 unsigned int busy_flag = 0;
1152 struct socket *sock; 1152 struct socket *sock;
1153 1153
1154 /* 1154 /*
@@ -1156,16 +1156,16 @@ static unsigned int sock_poll(struct file *file, poll_table *wait)
1156 */ 1156 */
1157 sock = file->private_data; 1157 sock = file->private_data;
1158 1158
1159 if (sk_valid_ll(sock->sk)) { 1159 if (sk_can_busy_loop(sock->sk)) {
1160 /* this socket can poll_ll so tell the system call */ 1160 /* this socket can poll_ll so tell the system call */
1161 ll_flag = POLL_LL; 1161 busy_flag = POLL_BUSY_LOOP;
1162 1162
1163 /* once, only if requested by syscall */ 1163 /* once, only if requested by syscall */
1164 if (wait && (wait->_key & POLL_LL)) 1164 if (wait && (wait->_key & POLL_BUSY_LOOP))
1165 sk_poll_ll(sock->sk, 1); 1165 sk_busy_loop(sock->sk, 1);
1166 } 1166 }
1167 1167
1168 return ll_flag | sock->ops->poll(file, sock, wait); 1168 return busy_flag | sock->ops->poll(file, sock, wait);
1169} 1169}
1170 1170
1171static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1171static int sock_mmap(struct file *file, struct vm_area_struct *vma)