diff options
author | Eric Dumazet <edumazet@google.com> | 2018-07-23 12:28:17 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-07-23 15:01:36 -0400 |
commit | 72cd43ba64fc172a443410ce01645895850844c8 (patch) | |
tree | 37fdff3fb8646f8324564338ee992ddb1d3bb980 /net/ipv4/tcp_input.c | |
parent | 3dd1c9a1270736029ffca670e9bd0265f4120600 (diff) |
tcp: free batches of packets in tcp_prune_ofo_queue()
Juha-Matti Tilli reported that malicious peers could inject tiny
packets in out_of_order_queue, forcing very expensive calls
to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for
every incoming packet. out_of_order_queue rb-tree can contain
thousands of nodes, iterating over all of them is not nice.
Before linux-4.9, we would have pruned all packets in ofo_queue
in one go, every XXXX packets. XXXX depends on sk_rcvbuf and skbs
truesize, but is about 7000 packets with tcp_rmem[2] default of 6 MB.
Since we plan to increase tcp_rmem[2] in the future to cope with
modern BDP, can not revert to the old behavior, without great pain.
Strategy taken in this patch is to purge ~12.5 % of the queue capacity.
Fixes: 36a6503fedda ("tcp: refine tcp_prune_ofo_queue() to not drop all packets")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6bade06aaf72..64e45b279431 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -4942,6 +4942,7 @@ new_range: | |||
4942 | * 2) not add too big latencies if thousands of packets sit there. | 4942 | * 2) not add too big latencies if thousands of packets sit there. |
4943 | * (But if application shrinks SO_RCVBUF, we could still end up | 4943 | * (But if application shrinks SO_RCVBUF, we could still end up |
4944 | * freeing whole queue here) | 4944 | * freeing whole queue here) |
4945 | * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks. | ||
4945 | * | 4946 | * |
4946 | * Return true if queue has shrunk. | 4947 | * Return true if queue has shrunk. |
4947 | */ | 4948 | */ |
@@ -4949,20 +4950,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) | |||
4949 | { | 4950 | { |
4950 | struct tcp_sock *tp = tcp_sk(sk); | 4951 | struct tcp_sock *tp = tcp_sk(sk); |
4951 | struct rb_node *node, *prev; | 4952 | struct rb_node *node, *prev; |
4953 | int goal; | ||
4952 | 4954 | ||
4953 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) | 4955 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) |
4954 | return false; | 4956 | return false; |
4955 | 4957 | ||
4956 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); | 4958 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); |
4959 | goal = sk->sk_rcvbuf >> 3; | ||
4957 | node = &tp->ooo_last_skb->rbnode; | 4960 | node = &tp->ooo_last_skb->rbnode; |
4958 | do { | 4961 | do { |
4959 | prev = rb_prev(node); | 4962 | prev = rb_prev(node); |
4960 | rb_erase(node, &tp->out_of_order_queue); | 4963 | rb_erase(node, &tp->out_of_order_queue); |
4964 | goal -= rb_to_skb(node)->truesize; | ||
4961 | tcp_drop(sk, rb_to_skb(node)); | 4965 | tcp_drop(sk, rb_to_skb(node)); |
4962 | sk_mem_reclaim(sk); | 4966 | if (!prev || goal <= 0) { |
4963 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && | 4967 | sk_mem_reclaim(sk); |
4964 | !tcp_under_memory_pressure(sk)) | 4968 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && |
4965 | break; | 4969 | !tcp_under_memory_pressure(sk)) |
4970 | break; | ||
4971 | goal = sk->sk_rcvbuf >> 3; | ||
4972 | } | ||
4966 | node = prev; | 4973 | node = prev; |
4967 | } while (node); | 4974 | } while (node); |
4968 | tp->ooo_last_skb = rb_to_skb(prev); | 4975 | tp->ooo_last_skb = rb_to_skb(prev); |