aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2017-07-29 21:57:18 -0400
committerDavid S. Miller <davem@davemloft.net>2017-07-31 17:37:49 -0400
commite7942d0633c47c791ece6afa038be9cf977226de (patch)
tree27dddb46a5358137f6cb6e63bddab14a77a840ec /net/ipv4/tcp_ipv4.c
parent764646b08d09d29adced740c26447ecdaabc9088 (diff)
tcp: remove prequeue support
prequeue is a tcp receive optimization that moves part of rx processing from bh to process context. This only works if the socket being processed belongs to a process that is blocked in recv on that socket. In practice, this doesn't happen anymore that often because nowadays servers tend to use an event driven (epoll) model. Even normal client applications (web browsers) commonly use many tcp connections in parallel. This has measureable impact only in netperf (which uses plain recv and thus allows prequeue use) from host to locally running vm (~4%), however, there were no changes when using netperf between two physical hosts with ixgbe interfaces. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c61
1 files changed, 1 insertions, 60 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3a19ea28339f..a68eb4577d36 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1541,61 +1541,6 @@ void tcp_v4_early_demux(struct sk_buff *skb)
1541 } 1541 }
1542} 1542}
1543 1543
1544/* Packet is added to VJ-style prequeue for processing in process
1545 * context, if a reader task is waiting. Apparently, this exciting
1546 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1547 * failed somewhere. Latency? Burstiness? Well, at least now we will
1548 * see, why it failed. 8)8) --ANK
1549 *
1550 */
1551bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1552{
1553 struct tcp_sock *tp = tcp_sk(sk);
1554
1555 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1556 return false;
1557
1558 if (skb->len <= tcp_hdrlen(skb) &&
1559 skb_queue_len(&tp->ucopy.prequeue) == 0)
1560 return false;
1561
1562 /* Before escaping RCU protected region, we need to take care of skb
1563 * dst. Prequeue is only enabled for established sockets.
1564 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1565 * Instead of doing full sk_rx_dst validity here, let's perform
1566 * an optimistic check.
1567 */
1568 if (likely(sk->sk_rx_dst))
1569 skb_dst_drop(skb);
1570 else
1571 skb_dst_force_safe(skb);
1572
1573 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1574 tp->ucopy.memory += skb->truesize;
1575 if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1576 tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1577 struct sk_buff *skb1;
1578
1579 BUG_ON(sock_owned_by_user(sk));
1580 __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1581 skb_queue_len(&tp->ucopy.prequeue));
1582
1583 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1584 sk_backlog_rcv(sk, skb1);
1585
1586 tp->ucopy.memory = 0;
1587 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1588 wake_up_interruptible_sync_poll(sk_sleep(sk),
1589 POLLIN | POLLRDNORM | POLLRDBAND);
1590 if (!inet_csk_ack_scheduled(sk))
1591 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1592 (3 * tcp_rto_min(sk)) / 4,
1593 TCP_RTO_MAX);
1594 }
1595 return true;
1596}
1597EXPORT_SYMBOL(tcp_prequeue);
1598
1599bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) 1544bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1600{ 1545{
1601 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; 1546 u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
@@ -1770,8 +1715,7 @@ process:
1770 tcp_segs_in(tcp_sk(sk), skb); 1715 tcp_segs_in(tcp_sk(sk), skb);
1771 ret = 0; 1716 ret = 0;
1772 if (!sock_owned_by_user(sk)) { 1717 if (!sock_owned_by_user(sk)) {
1773 if (!tcp_prequeue(sk, skb)) 1718 ret = tcp_v4_do_rcv(sk, skb);
1774 ret = tcp_v4_do_rcv(sk, skb);
1775 } else if (tcp_add_backlog(sk, skb)) { 1719 } else if (tcp_add_backlog(sk, skb)) {
1776 goto discard_and_relse; 1720 goto discard_and_relse;
1777 } 1721 }
@@ -1936,9 +1880,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
1936 } 1880 }
1937#endif 1881#endif
1938 1882
1939 /* Clean prequeue, it must be empty really */
1940 __skb_queue_purge(&tp->ucopy.prequeue);
1941
1942 /* Clean up a referenced TCP bind bucket. */ 1883 /* Clean up a referenced TCP bind bucket. */
1943 if (inet_csk(sk)->icsk_bind_hash) 1884 if (inet_csk(sk)->icsk_bind_hash)
1944 inet_put_port(sk); 1885 inet_put_port(sk);