aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2018-07-23 12:28:17 -0400
committerDavid S. Miller <davem@davemloft.net>2018-07-23 15:01:36 -0400
commit72cd43ba64fc172a443410ce01645895850844c8 (patch)
tree37fdff3fb8646f8324564338ee992ddb1d3bb980 /net/ipv4/tcp_input.c
parent3dd1c9a1270736029ffca670e9bd0265f4120600 (diff)
tcp: free batches of packets in tcp_prune_ofo_queue()
Juha-Matti Tilli reported that malicious peers could inject tiny packets in out_of_order_queue, forcing very expensive calls to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for every incoming packet. out_of_order_queue rb-tree can contain thousands of nodes, iterating over all of them is not nice. Before linux-4.9, we would have pruned all packets in ofo_queue in one go, every XXXX packets. XXXX depends on sk_rcvbuf and skbs truesize, but is about 7000 packets with tcp_rmem[2] default of 6 MB. Since we plan to increase tcp_rmem[2] in the future to cope with modern BDP, can not revert to the old behavior, without great pain. Strategy taken in this patch is to purge ~12.5 % of the queue capacity. Fixes: 36a6503fedda ("tcp: refine tcp_prune_ofo_queue() to not drop all packets") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi> Acked-by: Yuchung Cheng <ycheng@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c15
1 files changed, 11 insertions, 4 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6bade06aaf72..64e45b279431 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4942,6 +4942,7 @@ new_range:
4942 * 2) not add too big latencies if thousands of packets sit there. 4942 * 2) not add too big latencies if thousands of packets sit there.
4943 * (But if application shrinks SO_RCVBUF, we could still end up 4943 * (But if application shrinks SO_RCVBUF, we could still end up
4944 * freeing whole queue here) 4944 * freeing whole queue here)
4945 * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
4945 * 4946 *
4946 * Return true if queue has shrunk. 4947 * Return true if queue has shrunk.
4947 */ 4948 */
@@ -4949,20 +4950,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
4949{ 4950{
4950 struct tcp_sock *tp = tcp_sk(sk); 4951 struct tcp_sock *tp = tcp_sk(sk);
4951 struct rb_node *node, *prev; 4952 struct rb_node *node, *prev;
4953 int goal;
4952 4954
4953 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) 4955 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
4954 return false; 4956 return false;
4955 4957
4956 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); 4958 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
4959 goal = sk->sk_rcvbuf >> 3;
4957 node = &tp->ooo_last_skb->rbnode; 4960 node = &tp->ooo_last_skb->rbnode;
4958 do { 4961 do {
4959 prev = rb_prev(node); 4962 prev = rb_prev(node);
4960 rb_erase(node, &tp->out_of_order_queue); 4963 rb_erase(node, &tp->out_of_order_queue);
4964 goal -= rb_to_skb(node)->truesize;
4961 tcp_drop(sk, rb_to_skb(node)); 4965 tcp_drop(sk, rb_to_skb(node));
4962 sk_mem_reclaim(sk); 4966 if (!prev || goal <= 0) {
4963 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 4967 sk_mem_reclaim(sk);
4964 !tcp_under_memory_pressure(sk)) 4968 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
4965 break; 4969 !tcp_under_memory_pressure(sk))
4970 break;
4971 goal = sk->sk_rcvbuf >> 3;
4972 }
4966 node = prev; 4973 node = prev;
4967 } while (node); 4974 } while (node);
4968 tp->ooo_last_skb = rb_to_skb(prev); 4975 tp->ooo_last_skb = rb_to_skb(prev);