diff options
author | David S. Miller <davem@davemloft.net> | 2018-07-24 22:21:58 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-07-24 22:21:58 -0400 |
commit | 19725496da5602b401eae389736ab00d1817e264 (patch) | |
tree | 4c1a94bf0121769110f1b9c08ee337a55679a48a /net/ipv4/tcp_input.c | |
parent | aea5f654e6b78a0c976f7a25950155932c77a53f (diff) | |
parent | 9981b4fb8684883dcc0daf088891ff32260b9794 (diff) |
Merge ra.kernel.org:/pub/scm/linux/kernel/git/davem/net
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 65 |
1 files changed, 52 insertions, 13 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 91dbb9afb950..d51fa358b2b1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -216,7 +216,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) | |||
216 | icsk->icsk_ack.quick = quickacks; | 216 | icsk->icsk_ack.quick = quickacks; |
217 | } | 217 | } |
218 | 218 | ||
219 | static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) | 219 | void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) |
220 | { | 220 | { |
221 | struct inet_connection_sock *icsk = inet_csk(sk); | 221 | struct inet_connection_sock *icsk = inet_csk(sk); |
222 | 222 | ||
@@ -224,6 +224,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) | |||
224 | icsk->icsk_ack.pingpong = 0; | 224 | icsk->icsk_ack.pingpong = 0; |
225 | icsk->icsk_ack.ato = TCP_ATO_MIN; | 225 | icsk->icsk_ack.ato = TCP_ATO_MIN; |
226 | } | 226 | } |
227 | EXPORT_SYMBOL(tcp_enter_quickack_mode); | ||
227 | 228 | ||
228 | /* Send ACKs quickly, if "quick" count is not exhausted | 229 | /* Send ACKs quickly, if "quick" count is not exhausted |
229 | * and the session is not interactive. | 230 | * and the session is not interactive. |
@@ -4366,6 +4367,23 @@ static bool tcp_try_coalesce(struct sock *sk, | |||
4366 | return true; | 4367 | return true; |
4367 | } | 4368 | } |
4368 | 4369 | ||
4370 | static bool tcp_ooo_try_coalesce(struct sock *sk, | ||
4371 | struct sk_buff *to, | ||
4372 | struct sk_buff *from, | ||
4373 | bool *fragstolen) | ||
4374 | { | ||
4375 | bool res = tcp_try_coalesce(sk, to, from, fragstolen); | ||
4376 | |||
4377 | /* In case tcp_drop() is called later, update to->gso_segs */ | ||
4378 | if (res) { | ||
4379 | u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + | ||
4380 | max_t(u16, 1, skb_shinfo(from)->gso_segs); | ||
4381 | |||
4382 | skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); | ||
4383 | } | ||
4384 | return res; | ||
4385 | } | ||
4386 | |||
4369 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) | 4387 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) |
4370 | { | 4388 | { |
4371 | sk_drops_add(sk, skb); | 4389 | sk_drops_add(sk, skb); |
@@ -4489,8 +4507,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4489 | /* In the typical case, we are adding an skb to the end of the list. | 4507 | /* In the typical case, we are adding an skb to the end of the list. |
4490 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | 4508 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. |
4491 | */ | 4509 | */ |
4492 | if (tcp_try_coalesce(sk, tp->ooo_last_skb, | 4510 | if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, |
4493 | skb, &fragstolen)) { | 4511 | skb, &fragstolen)) { |
4494 | coalesce_done: | 4512 | coalesce_done: |
4495 | tcp_grow_window(sk, skb); | 4513 | tcp_grow_window(sk, skb); |
4496 | kfree_skb_partial(skb, fragstolen); | 4514 | kfree_skb_partial(skb, fragstolen); |
@@ -4518,7 +4536,7 @@ coalesce_done: | |||
4518 | /* All the bits are present. Drop. */ | 4536 | /* All the bits are present. Drop. */ |
4519 | NET_INC_STATS(sock_net(sk), | 4537 | NET_INC_STATS(sock_net(sk), |
4520 | LINUX_MIB_TCPOFOMERGE); | 4538 | LINUX_MIB_TCPOFOMERGE); |
4521 | __kfree_skb(skb); | 4539 | tcp_drop(sk, skb); |
4522 | skb = NULL; | 4540 | skb = NULL; |
4523 | tcp_dsack_set(sk, seq, end_seq); | 4541 | tcp_dsack_set(sk, seq, end_seq); |
4524 | goto add_sack; | 4542 | goto add_sack; |
@@ -4537,11 +4555,11 @@ coalesce_done: | |||
4537 | TCP_SKB_CB(skb1)->end_seq); | 4555 | TCP_SKB_CB(skb1)->end_seq); |
4538 | NET_INC_STATS(sock_net(sk), | 4556 | NET_INC_STATS(sock_net(sk), |
4539 | LINUX_MIB_TCPOFOMERGE); | 4557 | LINUX_MIB_TCPOFOMERGE); |
4540 | __kfree_skb(skb1); | 4558 | tcp_drop(sk, skb1); |
4541 | goto merge_right; | 4559 | goto merge_right; |
4542 | } | 4560 | } |
4543 | } else if (tcp_try_coalesce(sk, skb1, | 4561 | } else if (tcp_ooo_try_coalesce(sk, skb1, |
4544 | skb, &fragstolen)) { | 4562 | skb, &fragstolen)) { |
4545 | goto coalesce_done; | 4563 | goto coalesce_done; |
4546 | } | 4564 | } |
4547 | p = &parent->rb_right; | 4565 | p = &parent->rb_right; |
@@ -4924,6 +4942,7 @@ end: | |||
4924 | static void tcp_collapse_ofo_queue(struct sock *sk) | 4942 | static void tcp_collapse_ofo_queue(struct sock *sk) |
4925 | { | 4943 | { |
4926 | struct tcp_sock *tp = tcp_sk(sk); | 4944 | struct tcp_sock *tp = tcp_sk(sk); |
4945 | u32 range_truesize, sum_tiny = 0; | ||
4927 | struct sk_buff *skb, *head; | 4946 | struct sk_buff *skb, *head; |
4928 | u32 start, end; | 4947 | u32 start, end; |
4929 | 4948 | ||
@@ -4935,6 +4954,7 @@ new_range: | |||
4935 | } | 4954 | } |
4936 | start = TCP_SKB_CB(skb)->seq; | 4955 | start = TCP_SKB_CB(skb)->seq; |
4937 | end = TCP_SKB_CB(skb)->end_seq; | 4956 | end = TCP_SKB_CB(skb)->end_seq; |
4957 | range_truesize = skb->truesize; | ||
4938 | 4958 | ||
4939 | for (head = skb;;) { | 4959 | for (head = skb;;) { |
4940 | skb = skb_rb_next(skb); | 4960 | skb = skb_rb_next(skb); |
@@ -4945,11 +4965,20 @@ new_range: | |||
4945 | if (!skb || | 4965 | if (!skb || |
4946 | after(TCP_SKB_CB(skb)->seq, end) || | 4966 | after(TCP_SKB_CB(skb)->seq, end) || |
4947 | before(TCP_SKB_CB(skb)->end_seq, start)) { | 4967 | before(TCP_SKB_CB(skb)->end_seq, start)) { |
4948 | tcp_collapse(sk, NULL, &tp->out_of_order_queue, | 4968 | /* Do not attempt collapsing tiny skbs */ |
4949 | head, skb, start, end); | 4969 | if (range_truesize != head->truesize || |
4970 | end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { | ||
4971 | tcp_collapse(sk, NULL, &tp->out_of_order_queue, | ||
4972 | head, skb, start, end); | ||
4973 | } else { | ||
4974 | sum_tiny += range_truesize; | ||
4975 | if (sum_tiny > sk->sk_rcvbuf >> 3) | ||
4976 | return; | ||
4977 | } | ||
4950 | goto new_range; | 4978 | goto new_range; |
4951 | } | 4979 | } |
4952 | 4980 | ||
4981 | range_truesize += skb->truesize; | ||
4953 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) | 4982 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) |
4954 | start = TCP_SKB_CB(skb)->seq; | 4983 | start = TCP_SKB_CB(skb)->seq; |
4955 | if (after(TCP_SKB_CB(skb)->end_seq, end)) | 4984 | if (after(TCP_SKB_CB(skb)->end_seq, end)) |
@@ -4964,6 +4993,7 @@ new_range: | |||
4964 | * 2) not add too big latencies if thousands of packets sit there. | 4993 | * 2) not add too big latencies if thousands of packets sit there. |
4965 | * (But if application shrinks SO_RCVBUF, we could still end up | 4994 | * (But if application shrinks SO_RCVBUF, we could still end up |
4966 | * freeing whole queue here) | 4995 | * freeing whole queue here) |
4996 | * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks. | ||
4967 | * | 4997 | * |
4968 | * Return true if queue has shrunk. | 4998 | * Return true if queue has shrunk. |
4969 | */ | 4999 | */ |
@@ -4971,20 +5001,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) | |||
4971 | { | 5001 | { |
4972 | struct tcp_sock *tp = tcp_sk(sk); | 5002 | struct tcp_sock *tp = tcp_sk(sk); |
4973 | struct rb_node *node, *prev; | 5003 | struct rb_node *node, *prev; |
5004 | int goal; | ||
4974 | 5005 | ||
4975 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) | 5006 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) |
4976 | return false; | 5007 | return false; |
4977 | 5008 | ||
4978 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); | 5009 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); |
5010 | goal = sk->sk_rcvbuf >> 3; | ||
4979 | node = &tp->ooo_last_skb->rbnode; | 5011 | node = &tp->ooo_last_skb->rbnode; |
4980 | do { | 5012 | do { |
4981 | prev = rb_prev(node); | 5013 | prev = rb_prev(node); |
4982 | rb_erase(node, &tp->out_of_order_queue); | 5014 | rb_erase(node, &tp->out_of_order_queue); |
5015 | goal -= rb_to_skb(node)->truesize; | ||
4983 | tcp_drop(sk, rb_to_skb(node)); | 5016 | tcp_drop(sk, rb_to_skb(node)); |
4984 | sk_mem_reclaim(sk); | 5017 | if (!prev || goal <= 0) { |
4985 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && | 5018 | sk_mem_reclaim(sk); |
4986 | !tcp_under_memory_pressure(sk)) | 5019 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && |
4987 | break; | 5020 | !tcp_under_memory_pressure(sk)) |
5021 | break; | ||
5022 | goal = sk->sk_rcvbuf >> 3; | ||
5023 | } | ||
4988 | node = prev; | 5024 | node = prev; |
4989 | } while (node); | 5025 | } while (node); |
4990 | tp->ooo_last_skb = rb_to_skb(prev); | 5026 | tp->ooo_last_skb = rb_to_skb(prev); |
@@ -5019,6 +5055,9 @@ static int tcp_prune_queue(struct sock *sk) | |||
5019 | else if (tcp_under_memory_pressure(sk)) | 5055 | else if (tcp_under_memory_pressure(sk)) |
5020 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | 5056 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
5021 | 5057 | ||
5058 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) | ||
5059 | return 0; | ||
5060 | |||
5022 | tcp_collapse_ofo_queue(sk); | 5061 | tcp_collapse_ofo_queue(sk); |
5023 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 5062 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
5024 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, | 5063 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, |