aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2016-04-27 13:12:25 -0400
committerDavid S. Miller <davem@davemloft.net>2016-04-28 17:14:35 -0400
commit0cef6a4c34b56a9a6894f2dad2fad4be789990e1 (patch)
tree149b97c4eeb62be7aaf5b96d948c411b385179d7 /net/ipv4/tcp_ipv4.c
parentb43e7199a9061562e28c72192a1d07e00ec4e97f (diff)
tcp: give prequeue mode some care
TCP prequeue goal is to defer processing of incoming packets to user space thread currently blocked in a recvmsg() system call. Intent is to spend less time processing these packets on behalf of softirq handler, as softirq handler is unfair to normal process scheduler decisions, as it might interrupt threads that do not even use networking. Current prequeue implementation has following issues : 1) It only checks size of the prequeue against sk_rcvbuf It was fine 15 years ago when sk_rcvbuf was in the 64KB vicinity. But we now have ~8MB values to cope with modern networking needs. We have to add sk_rmem_alloc in the equation, since out of order packets can definitely use up to sk_rcvbuf memory themselves. 2) Even with a fixed memory truesize check, prequeue can be filled by thousands of packets. When prequeue needs to be flushed, either from sofirq context (in tcp_prequeue() or timer code), or process context (in tcp_prequeue_process()), this adds a latency spike which is often not desirable. I added a fixed limit of 32 packets, as this translated to a max flush time of 60 us on my test hosts. Also note that all packets in prequeue are not accounted for tcp_mem, since they are not charged against sk_forward_alloc at this point. This is probably not a big deal. Note that this might increase LINUX_MIB_TCPPREQUEUEDROPPED counts, which is misnamed, as packets are not dropped at all, but rather pushed to the stack (where they can be either consumed or dropped) Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 510f7a3c758b..87b173b563b0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1506,16 +1506,16 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1506 1506
1507 __skb_queue_tail(&tp->ucopy.prequeue, skb); 1507 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1508 tp->ucopy.memory += skb->truesize; 1508 tp->ucopy.memory += skb->truesize;
1509 if (tp->ucopy.memory > sk->sk_rcvbuf) { 1509 if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1510 tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1510 struct sk_buff *skb1; 1511 struct sk_buff *skb1;
1511 1512
1512 BUG_ON(sock_owned_by_user(sk)); 1513 BUG_ON(sock_owned_by_user(sk));
1514 __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1515 skb_queue_len(&tp->ucopy.prequeue));
1513 1516
1514 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { 1517 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1515 sk_backlog_rcv(sk, skb1); 1518 sk_backlog_rcv(sk, skb1);
1516 __NET_INC_STATS(sock_net(sk),
1517 LINUX_MIB_TCPPREQUEUEDROPPED);
1518 }
1519 1519
1520 tp->ucopy.memory = 0; 1520 tp->ucopy.memory = 0;
1521 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 1521 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {