diff options
author | David S. Miller <davem@davemloft.net> | 2018-07-23 15:01:36 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-07-23 15:01:48 -0400 |
commit | 1a4f14bab1868b443f0dd3c55b689a478f82e72e (patch) | |
tree | 67c0e66dbb4895d1c03116df3a63896c67431680 | |
parent | 3dd1c9a1270736029ffca670e9bd0265f4120600 (diff) | |
parent | 58152ecbbcc6a0ce7fddd5bf5f6ee535834ece0c (diff) |
Merge branch 'tcp-robust-ooo'
Eric Dumazet says:
====================
Juha-Matti Tilli reported that malicious peers could inject tiny
packets in out_of_order_queue, forcing very expensive calls
to tcp_collapse_ofo_queue() and tcp_prune_ofo_queue() for
every incoming packet.
With tcp_rmem[2] default of 6MB, the ooo queue could
contain ~7000 nodes.
This patch series makes sure we cut cpu cycles enough to
render the attack not critical.
We might in the future go further, like disconnecting
or black-holing proven malicious flows.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp_input.c | 62 |
1 files changed, 50 insertions, 12 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6bade06aaf72..3bcd30a2ba06 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -4358,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk, | |||
4358 | return true; | 4358 | return true; |
4359 | } | 4359 | } |
4360 | 4360 | ||
4361 | static bool tcp_ooo_try_coalesce(struct sock *sk, | ||
4362 | struct sk_buff *to, | ||
4363 | struct sk_buff *from, | ||
4364 | bool *fragstolen) | ||
4365 | { | ||
4366 | bool res = tcp_try_coalesce(sk, to, from, fragstolen); | ||
4367 | |||
4368 | /* In case tcp_drop() is called later, update to->gso_segs */ | ||
4369 | if (res) { | ||
4370 | u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + | ||
4371 | max_t(u16, 1, skb_shinfo(from)->gso_segs); | ||
4372 | |||
4373 | skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); | ||
4374 | } | ||
4375 | return res; | ||
4376 | } | ||
4377 | |||
4361 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) | 4378 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) |
4362 | { | 4379 | { |
4363 | sk_drops_add(sk, skb); | 4380 | sk_drops_add(sk, skb); |
@@ -4481,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4481 | /* In the typical case, we are adding an skb to the end of the list. | 4498 | /* In the typical case, we are adding an skb to the end of the list. |
4482 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | 4499 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. |
4483 | */ | 4500 | */ |
4484 | if (tcp_try_coalesce(sk, tp->ooo_last_skb, | 4501 | if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, |
4485 | skb, &fragstolen)) { | 4502 | skb, &fragstolen)) { |
4486 | coalesce_done: | 4503 | coalesce_done: |
4487 | tcp_grow_window(sk, skb); | 4504 | tcp_grow_window(sk, skb); |
4488 | kfree_skb_partial(skb, fragstolen); | 4505 | kfree_skb_partial(skb, fragstolen); |
@@ -4510,7 +4527,7 @@ coalesce_done: | |||
4510 | /* All the bits are present. Drop. */ | 4527 | /* All the bits are present. Drop. */ |
4511 | NET_INC_STATS(sock_net(sk), | 4528 | NET_INC_STATS(sock_net(sk), |
4512 | LINUX_MIB_TCPOFOMERGE); | 4529 | LINUX_MIB_TCPOFOMERGE); |
4513 | __kfree_skb(skb); | 4530 | tcp_drop(sk, skb); |
4514 | skb = NULL; | 4531 | skb = NULL; |
4515 | tcp_dsack_set(sk, seq, end_seq); | 4532 | tcp_dsack_set(sk, seq, end_seq); |
4516 | goto add_sack; | 4533 | goto add_sack; |
@@ -4529,11 +4546,11 @@ coalesce_done: | |||
4529 | TCP_SKB_CB(skb1)->end_seq); | 4546 | TCP_SKB_CB(skb1)->end_seq); |
4530 | NET_INC_STATS(sock_net(sk), | 4547 | NET_INC_STATS(sock_net(sk), |
4531 | LINUX_MIB_TCPOFOMERGE); | 4548 | LINUX_MIB_TCPOFOMERGE); |
4532 | __kfree_skb(skb1); | 4549 | tcp_drop(sk, skb1); |
4533 | goto merge_right; | 4550 | goto merge_right; |
4534 | } | 4551 | } |
4535 | } else if (tcp_try_coalesce(sk, skb1, | 4552 | } else if (tcp_ooo_try_coalesce(sk, skb1, |
4536 | skb, &fragstolen)) { | 4553 | skb, &fragstolen)) { |
4537 | goto coalesce_done; | 4554 | goto coalesce_done; |
4538 | } | 4555 | } |
4539 | p = &parent->rb_right; | 4556 | p = &parent->rb_right; |
@@ -4902,6 +4919,7 @@ end: | |||
4902 | static void tcp_collapse_ofo_queue(struct sock *sk) | 4919 | static void tcp_collapse_ofo_queue(struct sock *sk) |
4903 | { | 4920 | { |
4904 | struct tcp_sock *tp = tcp_sk(sk); | 4921 | struct tcp_sock *tp = tcp_sk(sk); |
4922 | u32 range_truesize, sum_tiny = 0; | ||
4905 | struct sk_buff *skb, *head; | 4923 | struct sk_buff *skb, *head; |
4906 | u32 start, end; | 4924 | u32 start, end; |
4907 | 4925 | ||
@@ -4913,6 +4931,7 @@ new_range: | |||
4913 | } | 4931 | } |
4914 | start = TCP_SKB_CB(skb)->seq; | 4932 | start = TCP_SKB_CB(skb)->seq; |
4915 | end = TCP_SKB_CB(skb)->end_seq; | 4933 | end = TCP_SKB_CB(skb)->end_seq; |
4934 | range_truesize = skb->truesize; | ||
4916 | 4935 | ||
4917 | for (head = skb;;) { | 4936 | for (head = skb;;) { |
4918 | skb = skb_rb_next(skb); | 4937 | skb = skb_rb_next(skb); |
@@ -4923,11 +4942,20 @@ new_range: | |||
4923 | if (!skb || | 4942 | if (!skb || |
4924 | after(TCP_SKB_CB(skb)->seq, end) || | 4943 | after(TCP_SKB_CB(skb)->seq, end) || |
4925 | before(TCP_SKB_CB(skb)->end_seq, start)) { | 4944 | before(TCP_SKB_CB(skb)->end_seq, start)) { |
4926 | tcp_collapse(sk, NULL, &tp->out_of_order_queue, | 4945 | /* Do not attempt collapsing tiny skbs */ |
4927 | head, skb, start, end); | 4946 | if (range_truesize != head->truesize || |
4947 | end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { | ||
4948 | tcp_collapse(sk, NULL, &tp->out_of_order_queue, | ||
4949 | head, skb, start, end); | ||
4950 | } else { | ||
4951 | sum_tiny += range_truesize; | ||
4952 | if (sum_tiny > sk->sk_rcvbuf >> 3) | ||
4953 | return; | ||
4954 | } | ||
4928 | goto new_range; | 4955 | goto new_range; |
4929 | } | 4956 | } |
4930 | 4957 | ||
4958 | range_truesize += skb->truesize; | ||
4931 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) | 4959 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) |
4932 | start = TCP_SKB_CB(skb)->seq; | 4960 | start = TCP_SKB_CB(skb)->seq; |
4933 | if (after(TCP_SKB_CB(skb)->end_seq, end)) | 4961 | if (after(TCP_SKB_CB(skb)->end_seq, end)) |
@@ -4942,6 +4970,7 @@ new_range: | |||
4942 | * 2) not add too big latencies if thousands of packets sit there. | 4970 | * 2) not add too big latencies if thousands of packets sit there. |
4943 | * (But if application shrinks SO_RCVBUF, we could still end up | 4971 | * (But if application shrinks SO_RCVBUF, we could still end up |
4944 | * freeing whole queue here) | 4972 | * freeing whole queue here) |
4973 | * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks. | ||
4945 | * | 4974 | * |
4946 | * Return true if queue has shrunk. | 4975 | * Return true if queue has shrunk. |
4947 | */ | 4976 | */ |
@@ -4949,20 +4978,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) | |||
4949 | { | 4978 | { |
4950 | struct tcp_sock *tp = tcp_sk(sk); | 4979 | struct tcp_sock *tp = tcp_sk(sk); |
4951 | struct rb_node *node, *prev; | 4980 | struct rb_node *node, *prev; |
4981 | int goal; | ||
4952 | 4982 | ||
4953 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) | 4983 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) |
4954 | return false; | 4984 | return false; |
4955 | 4985 | ||
4956 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); | 4986 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); |
4987 | goal = sk->sk_rcvbuf >> 3; | ||
4957 | node = &tp->ooo_last_skb->rbnode; | 4988 | node = &tp->ooo_last_skb->rbnode; |
4958 | do { | 4989 | do { |
4959 | prev = rb_prev(node); | 4990 | prev = rb_prev(node); |
4960 | rb_erase(node, &tp->out_of_order_queue); | 4991 | rb_erase(node, &tp->out_of_order_queue); |
4992 | goal -= rb_to_skb(node)->truesize; | ||
4961 | tcp_drop(sk, rb_to_skb(node)); | 4993 | tcp_drop(sk, rb_to_skb(node)); |
4962 | sk_mem_reclaim(sk); | 4994 | if (!prev || goal <= 0) { |
4963 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && | 4995 | sk_mem_reclaim(sk); |
4964 | !tcp_under_memory_pressure(sk)) | 4996 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && |
4965 | break; | 4997 | !tcp_under_memory_pressure(sk)) |
4998 | break; | ||
4999 | goal = sk->sk_rcvbuf >> 3; | ||
5000 | } | ||
4966 | node = prev; | 5001 | node = prev; |
4967 | } while (node); | 5002 | } while (node); |
4968 | tp->ooo_last_skb = rb_to_skb(prev); | 5003 | tp->ooo_last_skb = rb_to_skb(prev); |
@@ -4997,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk) | |||
4997 | else if (tcp_under_memory_pressure(sk)) | 5032 | else if (tcp_under_memory_pressure(sk)) |
4998 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | 5033 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
4999 | 5034 | ||
5035 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) | ||
5036 | return 0; | ||
5037 | |||
5000 | tcp_collapse_ofo_queue(sk); | 5038 | tcp_collapse_ofo_queue(sk); |
5001 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 5039 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
5002 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, | 5040 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, |