aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2017-07-29 21:57:18 -0400
committerDavid S. Miller <davem@davemloft.net>2017-07-31 17:37:49 -0400
commite7942d0633c47c791ece6afa038be9cf977226de (patch)
tree27dddb46a5358137f6cb6e63bddab14a77a840ec /net/ipv4/tcp.c
parent764646b08d09d29adced740c26447ecdaabc9088 (diff)
tcp: remove prequeue support
prequeue is a tcp receive optimization that moves part of rx processing from bh to process context. This only works if the socket being processed belongs to a process that is blocked in recv on that socket. In practice, this doesn't happen anymore that often because nowadays servers tend to use an event driven (epoll) model. Even normal client applications (web browsers) commonly use many tcp connections in parallel. This has measureable impact only in netperf (which uses plain recv and thus allows prequeue use) from host to locally running vm (~4%), however, there were no changes when using netperf between two physical hosts with ixgbe interfaces. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c105
1 files changed, 0 insertions, 105 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 71ce33decd97..62018ea6f45f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -400,7 +400,6 @@ void tcp_init_sock(struct sock *sk)
400 400
401 tp->out_of_order_queue = RB_ROOT; 401 tp->out_of_order_queue = RB_ROOT;
402 tcp_init_xmit_timers(sk); 402 tcp_init_xmit_timers(sk);
403 tcp_prequeue_init(tp);
404 INIT_LIST_HEAD(&tp->tsq_node); 403 INIT_LIST_HEAD(&tp->tsq_node);
405 404
406 icsk->icsk_rto = TCP_TIMEOUT_INIT; 405 icsk->icsk_rto = TCP_TIMEOUT_INIT;
@@ -1525,20 +1524,6 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
1525 tcp_send_ack(sk); 1524 tcp_send_ack(sk);
1526} 1525}
1527 1526
1528static void tcp_prequeue_process(struct sock *sk)
1529{
1530 struct sk_buff *skb;
1531 struct tcp_sock *tp = tcp_sk(sk);
1532
1533 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1534
1535 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1536 sk_backlog_rcv(sk, skb);
1537
1538 /* Clear memory counter. */
1539 tp->ucopy.memory = 0;
1540}
1541
1542static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) 1527static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1543{ 1528{
1544 struct sk_buff *skb; 1529 struct sk_buff *skb;
@@ -1671,7 +1656,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1671 int err; 1656 int err;
1672 int target; /* Read at least this many bytes */ 1657 int target; /* Read at least this many bytes */
1673 long timeo; 1658 long timeo;
1674 struct task_struct *user_recv = NULL;
1675 struct sk_buff *skb, *last; 1659 struct sk_buff *skb, *last;
1676 u32 urg_hole = 0; 1660 u32 urg_hole = 0;
1677 1661
@@ -1806,51 +1790,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1806 1790
1807 tcp_cleanup_rbuf(sk, copied); 1791 tcp_cleanup_rbuf(sk, copied);
1808 1792
1809 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1810 /* Install new reader */
1811 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1812 user_recv = current;
1813 tp->ucopy.task = user_recv;
1814 tp->ucopy.msg = msg;
1815 }
1816
1817 tp->ucopy.len = len;
1818
1819 WARN_ON(tp->copied_seq != tp->rcv_nxt &&
1820 !(flags & (MSG_PEEK | MSG_TRUNC)));
1821
1822 /* Ugly... If prequeue is not empty, we have to
1823 * process it before releasing socket, otherwise
1824 * order will be broken at second iteration.
1825 * More elegant solution is required!!!
1826 *
1827 * Look: we have the following (pseudo)queues:
1828 *
1829 * 1. packets in flight
1830 * 2. backlog
1831 * 3. prequeue
1832 * 4. receive_queue
1833 *
1834 * Each queue can be processed only if the next ones
1835 * are empty. At this point we have empty receive_queue.
1836 * But prequeue _can_ be not empty after 2nd iteration,
1837 * when we jumped to start of loop because backlog
1838 * processing added something to receive_queue.
1839 * We cannot release_sock(), because backlog contains
1840 * packets arrived _after_ prequeued ones.
1841 *
1842 * Shortly, algorithm is clear --- to process all
1843 * the queues in order. We could make it more directly,
1844 * requeueing packets from backlog to prequeue, if
1845 * is not empty. It is more elegant, but eats cycles,
1846 * unfortunately.
1847 */
1848 if (!skb_queue_empty(&tp->ucopy.prequeue))
1849 goto do_prequeue;
1850
1851 /* __ Set realtime policy in scheduler __ */
1852 }
1853
1854 if (copied >= target) { 1793 if (copied >= target) {
1855 /* Do not sleep, just process backlog. */ 1794 /* Do not sleep, just process backlog. */
1856 release_sock(sk); 1795 release_sock(sk);
@@ -1859,31 +1798,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1859 sk_wait_data(sk, &timeo, last); 1798 sk_wait_data(sk, &timeo, last);
1860 } 1799 }
1861 1800
1862 if (user_recv) {
1863 int chunk;
1864
1865 /* __ Restore normal policy in scheduler __ */
1866
1867 chunk = len - tp->ucopy.len;
1868 if (chunk != 0) {
1869 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1870 len -= chunk;
1871 copied += chunk;
1872 }
1873
1874 if (tp->rcv_nxt == tp->copied_seq &&
1875 !skb_queue_empty(&tp->ucopy.prequeue)) {
1876do_prequeue:
1877 tcp_prequeue_process(sk);
1878
1879 chunk = len - tp->ucopy.len;
1880 if (chunk != 0) {
1881 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1882 len -= chunk;
1883 copied += chunk;
1884 }
1885 }
1886 }
1887 if ((flags & MSG_PEEK) && 1801 if ((flags & MSG_PEEK) &&
1888 (peek_seq - copied - urg_hole != tp->copied_seq)) { 1802 (peek_seq - copied - urg_hole != tp->copied_seq)) {
1889 net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", 1803 net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
@@ -1955,25 +1869,6 @@ skip_copy:
1955 break; 1869 break;
1956 } while (len > 0); 1870 } while (len > 0);
1957 1871
1958 if (user_recv) {
1959 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
1960 int chunk;
1961
1962 tp->ucopy.len = copied > 0 ? len : 0;
1963
1964 tcp_prequeue_process(sk);
1965
1966 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1967 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1968 len -= chunk;
1969 copied += chunk;
1970 }
1971 }
1972
1973 tp->ucopy.task = NULL;
1974 tp->ucopy.len = 0;
1975 }
1976
1977 /* According to UNIX98, msg_name/msg_namelen are ignored 1872 /* According to UNIX98, msg_name/msg_namelen are ignored
1978 * on connected socket. I was just happy when found this 8) --ANK 1873 * on connected socket. I was just happy when found this 8) --ANK
1979 */ 1874 */