aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-07-24 20:31:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-07-24 20:31:47 -0400
commit0723090656a03940c5ea536342f109e34b8d1257 (patch)
treee44648aec5b63bbdd7ab71501b6207d6431dc709 /net/ipv4/tcp_input.c
parentf89ed2f880ccb117246ba095e12087d9c3df89c5 (diff)
parent03bc7cab7d7218088412a75e141696a89059ab00 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Handle stations tied to AP_VLANs properly during mac80211 hw reconfig. From Manikanta Pubbisetty. 2) Fix jump stack depth validation in nf_tables, from Taehee Yoo. 3) Fix quota handling in aRFS flow expiration of mlx5 driver, from Eran Ben Elisha. 4) Exit path handling fix in powerpc64 BPF JIT, from Daniel Borkmann. 5) Use ptr_ring_consume_bh() in page pool code, from Tariq Toukan. 6) Fix cached netdev name leak in nf_tables, from Florian Westphal. 7) Fix memory leaks on chain rename, also from Florian Westphal. 8) Several fixes to DCTCP congestion control ACK handling, from Yuchunk Cheng. 9) Missing rcu_read_unlock() in CAIF protocol code, from Yue Haibing. 10) Fix link local address handling with VRF, from David Ahern. 11) Don't clobber 'err' on a successful call to __skb_linearize() in skb_segment(). From Eric Dumazet. 12) Fix vxlan fdb notification races, from Roopa Prabhu. 13) Hash UDP fragments consistently, from Paolo Abeni. 14) If TCP receives lots of out of order tiny packets, we do really silly stuff. Make the out-of-order queue ending more robust to this kind of behavior, from Eric Dumazet. 15) Don't leak netlink dump state in nf_tables, from Florian Westphal. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (76 commits) net: axienet: Fix double deregister of mdio qmi_wwan: fix interface number for DW5821e production firmware ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull bnx2x: Fix invalid memory access in rss hash config path. net/mlx4_core: Save the qpn from the input modifier in RST2INIT wrapper r8169: restore previous behavior to accept BIOS WoL settings cfg80211: never ignore user regulatory hint sock: fix sg page frag coalescing in sk_alloc_sg netfilter: nf_tables: move dumper state allocation into ->start tcp: add tcp_ooo_try_coalesce() helper tcp: call tcp_drop() from tcp_data_queue_ofo() tcp: detect malicious patterns in tcp_collapse_ofo_queue() tcp: avoid collapses in tcp_prune_queue() if possible tcp: free batches of packets in tcp_prune_ofo_queue() ip: hash fragments consistently ipv6: use fib6_info_hold_safe() when necessary can: xilinx_can: fix power management handling can: xilinx_can: fix incorrect clear of non-processed interrupts can: xilinx_can: fix RX overflow interrupt not being enabled can: xilinx_can: keep only 1-2 frames in TX FIFO to fix TX accounting ...
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c65
1 files changed, 52 insertions, 13 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8e5522c6833a..3bcd30a2ba06 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -215,7 +215,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
215 icsk->icsk_ack.quick = quickacks; 215 icsk->icsk_ack.quick = quickacks;
216} 216}
217 217
218static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) 218void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
219{ 219{
220 struct inet_connection_sock *icsk = inet_csk(sk); 220 struct inet_connection_sock *icsk = inet_csk(sk);
221 221
@@ -223,6 +223,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
223 icsk->icsk_ack.pingpong = 0; 223 icsk->icsk_ack.pingpong = 0;
224 icsk->icsk_ack.ato = TCP_ATO_MIN; 224 icsk->icsk_ack.ato = TCP_ATO_MIN;
225} 225}
226EXPORT_SYMBOL(tcp_enter_quickack_mode);
226 227
227/* Send ACKs quickly, if "quick" count is not exhausted 228/* Send ACKs quickly, if "quick" count is not exhausted
228 * and the session is not interactive. 229 * and the session is not interactive.
@@ -4357,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk,
4357 return true; 4358 return true;
4358} 4359}
4359 4360
4361static bool tcp_ooo_try_coalesce(struct sock *sk,
4362 struct sk_buff *to,
4363 struct sk_buff *from,
4364 bool *fragstolen)
4365{
4366 bool res = tcp_try_coalesce(sk, to, from, fragstolen);
4367
4368 /* In case tcp_drop() is called later, update to->gso_segs */
4369 if (res) {
4370 u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
4371 max_t(u16, 1, skb_shinfo(from)->gso_segs);
4372
4373 skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
4374 }
4375 return res;
4376}
4377
4360static void tcp_drop(struct sock *sk, struct sk_buff *skb) 4378static void tcp_drop(struct sock *sk, struct sk_buff *skb)
4361{ 4379{
4362 sk_drops_add(sk, skb); 4380 sk_drops_add(sk, skb);
@@ -4480,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4480 /* In the typical case, we are adding an skb to the end of the list. 4498 /* In the typical case, we are adding an skb to the end of the list.
4481 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. 4499 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
4482 */ 4500 */
4483 if (tcp_try_coalesce(sk, tp->ooo_last_skb, 4501 if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
4484 skb, &fragstolen)) { 4502 skb, &fragstolen)) {
4485coalesce_done: 4503coalesce_done:
4486 tcp_grow_window(sk, skb); 4504 tcp_grow_window(sk, skb);
4487 kfree_skb_partial(skb, fragstolen); 4505 kfree_skb_partial(skb, fragstolen);
@@ -4509,7 +4527,7 @@ coalesce_done:
4509 /* All the bits are present. Drop. */ 4527 /* All the bits are present. Drop. */
4510 NET_INC_STATS(sock_net(sk), 4528 NET_INC_STATS(sock_net(sk),
4511 LINUX_MIB_TCPOFOMERGE); 4529 LINUX_MIB_TCPOFOMERGE);
4512 __kfree_skb(skb); 4530 tcp_drop(sk, skb);
4513 skb = NULL; 4531 skb = NULL;
4514 tcp_dsack_set(sk, seq, end_seq); 4532 tcp_dsack_set(sk, seq, end_seq);
4515 goto add_sack; 4533 goto add_sack;
@@ -4528,11 +4546,11 @@ coalesce_done:
4528 TCP_SKB_CB(skb1)->end_seq); 4546 TCP_SKB_CB(skb1)->end_seq);
4529 NET_INC_STATS(sock_net(sk), 4547 NET_INC_STATS(sock_net(sk),
4530 LINUX_MIB_TCPOFOMERGE); 4548 LINUX_MIB_TCPOFOMERGE);
4531 __kfree_skb(skb1); 4549 tcp_drop(sk, skb1);
4532 goto merge_right; 4550 goto merge_right;
4533 } 4551 }
4534 } else if (tcp_try_coalesce(sk, skb1, 4552 } else if (tcp_ooo_try_coalesce(sk, skb1,
4535 skb, &fragstolen)) { 4553 skb, &fragstolen)) {
4536 goto coalesce_done; 4554 goto coalesce_done;
4537 } 4555 }
4538 p = &parent->rb_right; 4556 p = &parent->rb_right;
@@ -4901,6 +4919,7 @@ end:
4901static void tcp_collapse_ofo_queue(struct sock *sk) 4919static void tcp_collapse_ofo_queue(struct sock *sk)
4902{ 4920{
4903 struct tcp_sock *tp = tcp_sk(sk); 4921 struct tcp_sock *tp = tcp_sk(sk);
4922 u32 range_truesize, sum_tiny = 0;
4904 struct sk_buff *skb, *head; 4923 struct sk_buff *skb, *head;
4905 u32 start, end; 4924 u32 start, end;
4906 4925
@@ -4912,6 +4931,7 @@ new_range:
4912 } 4931 }
4913 start = TCP_SKB_CB(skb)->seq; 4932 start = TCP_SKB_CB(skb)->seq;
4914 end = TCP_SKB_CB(skb)->end_seq; 4933 end = TCP_SKB_CB(skb)->end_seq;
4934 range_truesize = skb->truesize;
4915 4935
4916 for (head = skb;;) { 4936 for (head = skb;;) {
4917 skb = skb_rb_next(skb); 4937 skb = skb_rb_next(skb);
@@ -4922,11 +4942,20 @@ new_range:
4922 if (!skb || 4942 if (!skb ||
4923 after(TCP_SKB_CB(skb)->seq, end) || 4943 after(TCP_SKB_CB(skb)->seq, end) ||
4924 before(TCP_SKB_CB(skb)->end_seq, start)) { 4944 before(TCP_SKB_CB(skb)->end_seq, start)) {
4925 tcp_collapse(sk, NULL, &tp->out_of_order_queue, 4945 /* Do not attempt collapsing tiny skbs */
4926 head, skb, start, end); 4946 if (range_truesize != head->truesize ||
4947 end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
4948 tcp_collapse(sk, NULL, &tp->out_of_order_queue,
4949 head, skb, start, end);
4950 } else {
4951 sum_tiny += range_truesize;
4952 if (sum_tiny > sk->sk_rcvbuf >> 3)
4953 return;
4954 }
4927 goto new_range; 4955 goto new_range;
4928 } 4956 }
4929 4957
4958 range_truesize += skb->truesize;
4930 if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) 4959 if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
4931 start = TCP_SKB_CB(skb)->seq; 4960 start = TCP_SKB_CB(skb)->seq;
4932 if (after(TCP_SKB_CB(skb)->end_seq, end)) 4961 if (after(TCP_SKB_CB(skb)->end_seq, end))
@@ -4941,6 +4970,7 @@ new_range:
4941 * 2) not add too big latencies if thousands of packets sit there. 4970 * 2) not add too big latencies if thousands of packets sit there.
4942 * (But if application shrinks SO_RCVBUF, we could still end up 4971 * (But if application shrinks SO_RCVBUF, we could still end up
4943 * freeing whole queue here) 4972 * freeing whole queue here)
4973 * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
4944 * 4974 *
4945 * Return true if queue has shrunk. 4975 * Return true if queue has shrunk.
4946 */ 4976 */
@@ -4948,20 +4978,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
4948{ 4978{
4949 struct tcp_sock *tp = tcp_sk(sk); 4979 struct tcp_sock *tp = tcp_sk(sk);
4950 struct rb_node *node, *prev; 4980 struct rb_node *node, *prev;
4981 int goal;
4951 4982
4952 if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) 4983 if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
4953 return false; 4984 return false;
4954 4985
4955 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); 4986 NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
4987 goal = sk->sk_rcvbuf >> 3;
4956 node = &tp->ooo_last_skb->rbnode; 4988 node = &tp->ooo_last_skb->rbnode;
4957 do { 4989 do {
4958 prev = rb_prev(node); 4990 prev = rb_prev(node);
4959 rb_erase(node, &tp->out_of_order_queue); 4991 rb_erase(node, &tp->out_of_order_queue);
4992 goal -= rb_to_skb(node)->truesize;
4960 tcp_drop(sk, rb_to_skb(node)); 4993 tcp_drop(sk, rb_to_skb(node));
4961 sk_mem_reclaim(sk); 4994 if (!prev || goal <= 0) {
4962 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 4995 sk_mem_reclaim(sk);
4963 !tcp_under_memory_pressure(sk)) 4996 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
4964 break; 4997 !tcp_under_memory_pressure(sk))
4998 break;
4999 goal = sk->sk_rcvbuf >> 3;
5000 }
4965 node = prev; 5001 node = prev;
4966 } while (node); 5002 } while (node);
4967 tp->ooo_last_skb = rb_to_skb(prev); 5003 tp->ooo_last_skb = rb_to_skb(prev);
@@ -4996,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk)
4996 else if (tcp_under_memory_pressure(sk)) 5032 else if (tcp_under_memory_pressure(sk))
4997 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); 5033 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
4998 5034
5035 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5036 return 0;
5037
4999 tcp_collapse_ofo_queue(sk); 5038 tcp_collapse_ofo_queue(sk);
5000 if (!skb_queue_empty(&sk->sk_receive_queue)) 5039 if (!skb_queue_empty(&sk->sk_receive_queue))
5001 tcp_collapse(sk, &sk->sk_receive_queue, NULL, 5040 tcp_collapse(sk, &sk->sk_receive_queue, NULL,