diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-07-24 20:31:47 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-07-24 20:31:47 -0400 |
commit | 0723090656a03940c5ea536342f109e34b8d1257 (patch) | |
tree | e44648aec5b63bbdd7ab71501b6207d6431dc709 /net/ipv4/tcp_input.c | |
parent | f89ed2f880ccb117246ba095e12087d9c3df89c5 (diff) | |
parent | 03bc7cab7d7218088412a75e141696a89059ab00 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Handle stations tied to AP_VLANs properly during mac80211 hw
reconfig. From Manikanta Pubbisetty.
2) Fix jump stack depth validation in nf_tables, from Taehee Yoo.
3) Fix quota handling in aRFS flow expiration of mlx5 driver, from Eran
Ben Elisha.
4) Exit path handling fix in powerpc64 BPF JIT, from Daniel Borkmann.
5) Use ptr_ring_consume_bh() in page pool code, from Tariq Toukan.
6) Fix cached netdev name leak in nf_tables, from Florian Westphal.
7) Fix memory leaks on chain rename, also from Florian Westphal.
8) Several fixes to DCTCP congestion control ACK handling, from Yuchunk
Cheng.
9) Missing rcu_read_unlock() in CAIF protocol code, from Yue Haibing.
10) Fix link local address handling with VRF, from David Ahern.
11) Don't clobber 'err' on a successful call to __skb_linearize() in
skb_segment(). From Eric Dumazet.
12) Fix vxlan fdb notification races, from Roopa Prabhu.
13) Hash UDP fragments consistently, from Paolo Abeni.
14) If TCP receives lots of out of order tiny packets, we do really
silly stuff. Make the out-of-order queue ending more robust to this
kind of behavior, from Eric Dumazet.
15) Don't leak netlink dump state in nf_tables, from Florian Westphal.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (76 commits)
net: axienet: Fix double deregister of mdio
qmi_wwan: fix interface number for DW5821e production firmware
ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull
bnx2x: Fix invalid memory access in rss hash config path.
net/mlx4_core: Save the qpn from the input modifier in RST2INIT wrapper
r8169: restore previous behavior to accept BIOS WoL settings
cfg80211: never ignore user regulatory hint
sock: fix sg page frag coalescing in sk_alloc_sg
netfilter: nf_tables: move dumper state allocation into ->start
tcp: add tcp_ooo_try_coalesce() helper
tcp: call tcp_drop() from tcp_data_queue_ofo()
tcp: detect malicious patterns in tcp_collapse_ofo_queue()
tcp: avoid collapses in tcp_prune_queue() if possible
tcp: free batches of packets in tcp_prune_ofo_queue()
ip: hash fragments consistently
ipv6: use fib6_info_hold_safe() when necessary
can: xilinx_can: fix power management handling
can: xilinx_can: fix incorrect clear of non-processed interrupts
can: xilinx_can: fix RX overflow interrupt not being enabled
can: xilinx_can: keep only 1-2 frames in TX FIFO to fix TX accounting
...
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 65 |
1 files changed, 52 insertions, 13 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8e5522c6833a..3bcd30a2ba06 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -215,7 +215,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) | |||
215 | icsk->icsk_ack.quick = quickacks; | 215 | icsk->icsk_ack.quick = quickacks; |
216 | } | 216 | } |
217 | 217 | ||
218 | static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) | 218 | void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) |
219 | { | 219 | { |
220 | struct inet_connection_sock *icsk = inet_csk(sk); | 220 | struct inet_connection_sock *icsk = inet_csk(sk); |
221 | 221 | ||
@@ -223,6 +223,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) | |||
223 | icsk->icsk_ack.pingpong = 0; | 223 | icsk->icsk_ack.pingpong = 0; |
224 | icsk->icsk_ack.ato = TCP_ATO_MIN; | 224 | icsk->icsk_ack.ato = TCP_ATO_MIN; |
225 | } | 225 | } |
226 | EXPORT_SYMBOL(tcp_enter_quickack_mode); | ||
226 | 227 | ||
227 | /* Send ACKs quickly, if "quick" count is not exhausted | 228 | /* Send ACKs quickly, if "quick" count is not exhausted |
228 | * and the session is not interactive. | 229 | * and the session is not interactive. |
@@ -4357,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk, | |||
4357 | return true; | 4358 | return true; |
4358 | } | 4359 | } |
4359 | 4360 | ||
4361 | static bool tcp_ooo_try_coalesce(struct sock *sk, | ||
4362 | struct sk_buff *to, | ||
4363 | struct sk_buff *from, | ||
4364 | bool *fragstolen) | ||
4365 | { | ||
4366 | bool res = tcp_try_coalesce(sk, to, from, fragstolen); | ||
4367 | |||
4368 | /* In case tcp_drop() is called later, update to->gso_segs */ | ||
4369 | if (res) { | ||
4370 | u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + | ||
4371 | max_t(u16, 1, skb_shinfo(from)->gso_segs); | ||
4372 | |||
4373 | skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); | ||
4374 | } | ||
4375 | return res; | ||
4376 | } | ||
4377 | |||
4360 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) | 4378 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) |
4361 | { | 4379 | { |
4362 | sk_drops_add(sk, skb); | 4380 | sk_drops_add(sk, skb); |
@@ -4480,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4480 | /* In the typical case, we are adding an skb to the end of the list. | 4498 | /* In the typical case, we are adding an skb to the end of the list. |
4481 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | 4499 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. |
4482 | */ | 4500 | */ |
4483 | if (tcp_try_coalesce(sk, tp->ooo_last_skb, | 4501 | if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, |
4484 | skb, &fragstolen)) { | 4502 | skb, &fragstolen)) { |
4485 | coalesce_done: | 4503 | coalesce_done: |
4486 | tcp_grow_window(sk, skb); | 4504 | tcp_grow_window(sk, skb); |
4487 | kfree_skb_partial(skb, fragstolen); | 4505 | kfree_skb_partial(skb, fragstolen); |
@@ -4509,7 +4527,7 @@ coalesce_done: | |||
4509 | /* All the bits are present. Drop. */ | 4527 | /* All the bits are present. Drop. */ |
4510 | NET_INC_STATS(sock_net(sk), | 4528 | NET_INC_STATS(sock_net(sk), |
4511 | LINUX_MIB_TCPOFOMERGE); | 4529 | LINUX_MIB_TCPOFOMERGE); |
4512 | __kfree_skb(skb); | 4530 | tcp_drop(sk, skb); |
4513 | skb = NULL; | 4531 | skb = NULL; |
4514 | tcp_dsack_set(sk, seq, end_seq); | 4532 | tcp_dsack_set(sk, seq, end_seq); |
4515 | goto add_sack; | 4533 | goto add_sack; |
@@ -4528,11 +4546,11 @@ coalesce_done: | |||
4528 | TCP_SKB_CB(skb1)->end_seq); | 4546 | TCP_SKB_CB(skb1)->end_seq); |
4529 | NET_INC_STATS(sock_net(sk), | 4547 | NET_INC_STATS(sock_net(sk), |
4530 | LINUX_MIB_TCPOFOMERGE); | 4548 | LINUX_MIB_TCPOFOMERGE); |
4531 | __kfree_skb(skb1); | 4549 | tcp_drop(sk, skb1); |
4532 | goto merge_right; | 4550 | goto merge_right; |
4533 | } | 4551 | } |
4534 | } else if (tcp_try_coalesce(sk, skb1, | 4552 | } else if (tcp_ooo_try_coalesce(sk, skb1, |
4535 | skb, &fragstolen)) { | 4553 | skb, &fragstolen)) { |
4536 | goto coalesce_done; | 4554 | goto coalesce_done; |
4537 | } | 4555 | } |
4538 | p = &parent->rb_right; | 4556 | p = &parent->rb_right; |
@@ -4901,6 +4919,7 @@ end: | |||
4901 | static void tcp_collapse_ofo_queue(struct sock *sk) | 4919 | static void tcp_collapse_ofo_queue(struct sock *sk) |
4902 | { | 4920 | { |
4903 | struct tcp_sock *tp = tcp_sk(sk); | 4921 | struct tcp_sock *tp = tcp_sk(sk); |
4922 | u32 range_truesize, sum_tiny = 0; | ||
4904 | struct sk_buff *skb, *head; | 4923 | struct sk_buff *skb, *head; |
4905 | u32 start, end; | 4924 | u32 start, end; |
4906 | 4925 | ||
@@ -4912,6 +4931,7 @@ new_range: | |||
4912 | } | 4931 | } |
4913 | start = TCP_SKB_CB(skb)->seq; | 4932 | start = TCP_SKB_CB(skb)->seq; |
4914 | end = TCP_SKB_CB(skb)->end_seq; | 4933 | end = TCP_SKB_CB(skb)->end_seq; |
4934 | range_truesize = skb->truesize; | ||
4915 | 4935 | ||
4916 | for (head = skb;;) { | 4936 | for (head = skb;;) { |
4917 | skb = skb_rb_next(skb); | 4937 | skb = skb_rb_next(skb); |
@@ -4922,11 +4942,20 @@ new_range: | |||
4922 | if (!skb || | 4942 | if (!skb || |
4923 | after(TCP_SKB_CB(skb)->seq, end) || | 4943 | after(TCP_SKB_CB(skb)->seq, end) || |
4924 | before(TCP_SKB_CB(skb)->end_seq, start)) { | 4944 | before(TCP_SKB_CB(skb)->end_seq, start)) { |
4925 | tcp_collapse(sk, NULL, &tp->out_of_order_queue, | 4945 | /* Do not attempt collapsing tiny skbs */ |
4926 | head, skb, start, end); | 4946 | if (range_truesize != head->truesize || |
4947 | end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { | ||
4948 | tcp_collapse(sk, NULL, &tp->out_of_order_queue, | ||
4949 | head, skb, start, end); | ||
4950 | } else { | ||
4951 | sum_tiny += range_truesize; | ||
4952 | if (sum_tiny > sk->sk_rcvbuf >> 3) | ||
4953 | return; | ||
4954 | } | ||
4927 | goto new_range; | 4955 | goto new_range; |
4928 | } | 4956 | } |
4929 | 4957 | ||
4958 | range_truesize += skb->truesize; | ||
4930 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) | 4959 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) |
4931 | start = TCP_SKB_CB(skb)->seq; | 4960 | start = TCP_SKB_CB(skb)->seq; |
4932 | if (after(TCP_SKB_CB(skb)->end_seq, end)) | 4961 | if (after(TCP_SKB_CB(skb)->end_seq, end)) |
@@ -4941,6 +4970,7 @@ new_range: | |||
4941 | * 2) not add too big latencies if thousands of packets sit there. | 4970 | * 2) not add too big latencies if thousands of packets sit there. |
4942 | * (But if application shrinks SO_RCVBUF, we could still end up | 4971 | * (But if application shrinks SO_RCVBUF, we could still end up |
4943 | * freeing whole queue here) | 4972 | * freeing whole queue here) |
4973 | * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks. | ||
4944 | * | 4974 | * |
4945 | * Return true if queue has shrunk. | 4975 | * Return true if queue has shrunk. |
4946 | */ | 4976 | */ |
@@ -4948,20 +4978,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) | |||
4948 | { | 4978 | { |
4949 | struct tcp_sock *tp = tcp_sk(sk); | 4979 | struct tcp_sock *tp = tcp_sk(sk); |
4950 | struct rb_node *node, *prev; | 4980 | struct rb_node *node, *prev; |
4981 | int goal; | ||
4951 | 4982 | ||
4952 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) | 4983 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) |
4953 | return false; | 4984 | return false; |
4954 | 4985 | ||
4955 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); | 4986 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); |
4987 | goal = sk->sk_rcvbuf >> 3; | ||
4956 | node = &tp->ooo_last_skb->rbnode; | 4988 | node = &tp->ooo_last_skb->rbnode; |
4957 | do { | 4989 | do { |
4958 | prev = rb_prev(node); | 4990 | prev = rb_prev(node); |
4959 | rb_erase(node, &tp->out_of_order_queue); | 4991 | rb_erase(node, &tp->out_of_order_queue); |
4992 | goal -= rb_to_skb(node)->truesize; | ||
4960 | tcp_drop(sk, rb_to_skb(node)); | 4993 | tcp_drop(sk, rb_to_skb(node)); |
4961 | sk_mem_reclaim(sk); | 4994 | if (!prev || goal <= 0) { |
4962 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && | 4995 | sk_mem_reclaim(sk); |
4963 | !tcp_under_memory_pressure(sk)) | 4996 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && |
4964 | break; | 4997 | !tcp_under_memory_pressure(sk)) |
4998 | break; | ||
4999 | goal = sk->sk_rcvbuf >> 3; | ||
5000 | } | ||
4965 | node = prev; | 5001 | node = prev; |
4966 | } while (node); | 5002 | } while (node); |
4967 | tp->ooo_last_skb = rb_to_skb(prev); | 5003 | tp->ooo_last_skb = rb_to_skb(prev); |
@@ -4996,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk) | |||
4996 | else if (tcp_under_memory_pressure(sk)) | 5032 | else if (tcp_under_memory_pressure(sk)) |
4997 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | 5033 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
4998 | 5034 | ||
5035 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) | ||
5036 | return 0; | ||
5037 | |||
4999 | tcp_collapse_ofo_queue(sk); | 5038 | tcp_collapse_ofo_queue(sk); |
5000 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 5039 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
5001 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, | 5040 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, |