diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 65 |
1 files changed, 52 insertions, 13 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8e5522c6833a..3bcd30a2ba06 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -215,7 +215,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) | |||
215 | icsk->icsk_ack.quick = quickacks; | 215 | icsk->icsk_ack.quick = quickacks; |
216 | } | 216 | } |
217 | 217 | ||
218 | static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) | 218 | void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) |
219 | { | 219 | { |
220 | struct inet_connection_sock *icsk = inet_csk(sk); | 220 | struct inet_connection_sock *icsk = inet_csk(sk); |
221 | 221 | ||
@@ -223,6 +223,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) | |||
223 | icsk->icsk_ack.pingpong = 0; | 223 | icsk->icsk_ack.pingpong = 0; |
224 | icsk->icsk_ack.ato = TCP_ATO_MIN; | 224 | icsk->icsk_ack.ato = TCP_ATO_MIN; |
225 | } | 225 | } |
226 | EXPORT_SYMBOL(tcp_enter_quickack_mode); | ||
226 | 227 | ||
227 | /* Send ACKs quickly, if "quick" count is not exhausted | 228 | /* Send ACKs quickly, if "quick" count is not exhausted |
228 | * and the session is not interactive. | 229 | * and the session is not interactive. |
@@ -4357,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk, | |||
4357 | return true; | 4358 | return true; |
4358 | } | 4359 | } |
4359 | 4360 | ||
4361 | static bool tcp_ooo_try_coalesce(struct sock *sk, | ||
4362 | struct sk_buff *to, | ||
4363 | struct sk_buff *from, | ||
4364 | bool *fragstolen) | ||
4365 | { | ||
4366 | bool res = tcp_try_coalesce(sk, to, from, fragstolen); | ||
4367 | |||
4368 | /* In case tcp_drop() is called later, update to->gso_segs */ | ||
4369 | if (res) { | ||
4370 | u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + | ||
4371 | max_t(u16, 1, skb_shinfo(from)->gso_segs); | ||
4372 | |||
4373 | skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); | ||
4374 | } | ||
4375 | return res; | ||
4376 | } | ||
4377 | |||
4360 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) | 4378 | static void tcp_drop(struct sock *sk, struct sk_buff *skb) |
4361 | { | 4379 | { |
4362 | sk_drops_add(sk, skb); | 4380 | sk_drops_add(sk, skb); |
@@ -4480,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4480 | /* In the typical case, we are adding an skb to the end of the list. | 4498 | /* In the typical case, we are adding an skb to the end of the list. |
4481 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. | 4499 | * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. |
4482 | */ | 4500 | */ |
4483 | if (tcp_try_coalesce(sk, tp->ooo_last_skb, | 4501 | if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, |
4484 | skb, &fragstolen)) { | 4502 | skb, &fragstolen)) { |
4485 | coalesce_done: | 4503 | coalesce_done: |
4486 | tcp_grow_window(sk, skb); | 4504 | tcp_grow_window(sk, skb); |
4487 | kfree_skb_partial(skb, fragstolen); | 4505 | kfree_skb_partial(skb, fragstolen); |
@@ -4509,7 +4527,7 @@ coalesce_done: | |||
4509 | /* All the bits are present. Drop. */ | 4527 | /* All the bits are present. Drop. */ |
4510 | NET_INC_STATS(sock_net(sk), | 4528 | NET_INC_STATS(sock_net(sk), |
4511 | LINUX_MIB_TCPOFOMERGE); | 4529 | LINUX_MIB_TCPOFOMERGE); |
4512 | __kfree_skb(skb); | 4530 | tcp_drop(sk, skb); |
4513 | skb = NULL; | 4531 | skb = NULL; |
4514 | tcp_dsack_set(sk, seq, end_seq); | 4532 | tcp_dsack_set(sk, seq, end_seq); |
4515 | goto add_sack; | 4533 | goto add_sack; |
@@ -4528,11 +4546,11 @@ coalesce_done: | |||
4528 | TCP_SKB_CB(skb1)->end_seq); | 4546 | TCP_SKB_CB(skb1)->end_seq); |
4529 | NET_INC_STATS(sock_net(sk), | 4547 | NET_INC_STATS(sock_net(sk), |
4530 | LINUX_MIB_TCPOFOMERGE); | 4548 | LINUX_MIB_TCPOFOMERGE); |
4531 | __kfree_skb(skb1); | 4549 | tcp_drop(sk, skb1); |
4532 | goto merge_right; | 4550 | goto merge_right; |
4533 | } | 4551 | } |
4534 | } else if (tcp_try_coalesce(sk, skb1, | 4552 | } else if (tcp_ooo_try_coalesce(sk, skb1, |
4535 | skb, &fragstolen)) { | 4553 | skb, &fragstolen)) { |
4536 | goto coalesce_done; | 4554 | goto coalesce_done; |
4537 | } | 4555 | } |
4538 | p = &parent->rb_right; | 4556 | p = &parent->rb_right; |
@@ -4901,6 +4919,7 @@ end: | |||
4901 | static void tcp_collapse_ofo_queue(struct sock *sk) | 4919 | static void tcp_collapse_ofo_queue(struct sock *sk) |
4902 | { | 4920 | { |
4903 | struct tcp_sock *tp = tcp_sk(sk); | 4921 | struct tcp_sock *tp = tcp_sk(sk); |
4922 | u32 range_truesize, sum_tiny = 0; | ||
4904 | struct sk_buff *skb, *head; | 4923 | struct sk_buff *skb, *head; |
4905 | u32 start, end; | 4924 | u32 start, end; |
4906 | 4925 | ||
@@ -4912,6 +4931,7 @@ new_range: | |||
4912 | } | 4931 | } |
4913 | start = TCP_SKB_CB(skb)->seq; | 4932 | start = TCP_SKB_CB(skb)->seq; |
4914 | end = TCP_SKB_CB(skb)->end_seq; | 4933 | end = TCP_SKB_CB(skb)->end_seq; |
4934 | range_truesize = skb->truesize; | ||
4915 | 4935 | ||
4916 | for (head = skb;;) { | 4936 | for (head = skb;;) { |
4917 | skb = skb_rb_next(skb); | 4937 | skb = skb_rb_next(skb); |
@@ -4922,11 +4942,20 @@ new_range: | |||
4922 | if (!skb || | 4942 | if (!skb || |
4923 | after(TCP_SKB_CB(skb)->seq, end) || | 4943 | after(TCP_SKB_CB(skb)->seq, end) || |
4924 | before(TCP_SKB_CB(skb)->end_seq, start)) { | 4944 | before(TCP_SKB_CB(skb)->end_seq, start)) { |
4925 | tcp_collapse(sk, NULL, &tp->out_of_order_queue, | 4945 | /* Do not attempt collapsing tiny skbs */ |
4926 | head, skb, start, end); | 4946 | if (range_truesize != head->truesize || |
4947 | end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) { | ||
4948 | tcp_collapse(sk, NULL, &tp->out_of_order_queue, | ||
4949 | head, skb, start, end); | ||
4950 | } else { | ||
4951 | sum_tiny += range_truesize; | ||
4952 | if (sum_tiny > sk->sk_rcvbuf >> 3) | ||
4953 | return; | ||
4954 | } | ||
4927 | goto new_range; | 4955 | goto new_range; |
4928 | } | 4956 | } |
4929 | 4957 | ||
4958 | range_truesize += skb->truesize; | ||
4930 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) | 4959 | if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) |
4931 | start = TCP_SKB_CB(skb)->seq; | 4960 | start = TCP_SKB_CB(skb)->seq; |
4932 | if (after(TCP_SKB_CB(skb)->end_seq, end)) | 4961 | if (after(TCP_SKB_CB(skb)->end_seq, end)) |
@@ -4941,6 +4970,7 @@ new_range: | |||
4941 | * 2) not add too big latencies if thousands of packets sit there. | 4970 | * 2) not add too big latencies if thousands of packets sit there. |
4942 | * (But if application shrinks SO_RCVBUF, we could still end up | 4971 | * (But if application shrinks SO_RCVBUF, we could still end up |
4943 | * freeing whole queue here) | 4972 | * freeing whole queue here) |
4973 | * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks. | ||
4944 | * | 4974 | * |
4945 | * Return true if queue has shrunk. | 4975 | * Return true if queue has shrunk. |
4946 | */ | 4976 | */ |
@@ -4948,20 +4978,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk) | |||
4948 | { | 4978 | { |
4949 | struct tcp_sock *tp = tcp_sk(sk); | 4979 | struct tcp_sock *tp = tcp_sk(sk); |
4950 | struct rb_node *node, *prev; | 4980 | struct rb_node *node, *prev; |
4981 | int goal; | ||
4951 | 4982 | ||
4952 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) | 4983 | if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) |
4953 | return false; | 4984 | return false; |
4954 | 4985 | ||
4955 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); | 4986 | NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); |
4987 | goal = sk->sk_rcvbuf >> 3; | ||
4956 | node = &tp->ooo_last_skb->rbnode; | 4988 | node = &tp->ooo_last_skb->rbnode; |
4957 | do { | 4989 | do { |
4958 | prev = rb_prev(node); | 4990 | prev = rb_prev(node); |
4959 | rb_erase(node, &tp->out_of_order_queue); | 4991 | rb_erase(node, &tp->out_of_order_queue); |
4992 | goal -= rb_to_skb(node)->truesize; | ||
4960 | tcp_drop(sk, rb_to_skb(node)); | 4993 | tcp_drop(sk, rb_to_skb(node)); |
4961 | sk_mem_reclaim(sk); | 4994 | if (!prev || goal <= 0) { |
4962 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && | 4995 | sk_mem_reclaim(sk); |
4963 | !tcp_under_memory_pressure(sk)) | 4996 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && |
4964 | break; | 4997 | !tcp_under_memory_pressure(sk)) |
4998 | break; | ||
4999 | goal = sk->sk_rcvbuf >> 3; | ||
5000 | } | ||
4965 | node = prev; | 5001 | node = prev; |
4966 | } while (node); | 5002 | } while (node); |
4967 | tp->ooo_last_skb = rb_to_skb(prev); | 5003 | tp->ooo_last_skb = rb_to_skb(prev); |
@@ -4996,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk) | |||
4996 | else if (tcp_under_memory_pressure(sk)) | 5032 | else if (tcp_under_memory_pressure(sk)) |
4997 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | 5033 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
4998 | 5034 | ||
5035 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) | ||
5036 | return 0; | ||
5037 | |||
4999 | tcp_collapse_ofo_queue(sk); | 5038 | tcp_collapse_ofo_queue(sk); |
5000 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 5039 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
5001 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, | 5040 | tcp_collapse(sk, &sk->sk_receive_queue, NULL, |