aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-09-19 11:26:20 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-23 12:47:38 -0400
commitbd1e75abf4b3c666f61a5cf90c896aa928a735d5 (patch)
treef7d05e03763428dd6c65d6b1a2af66ebf3c68c48
parent4cdf507d54525842dfd9f6313fdafba039084046 (diff)
tcp: add coalescing attempt in tcp_ofo_queue()
In order to make TCP more resilient in presence of reorders, we need to allow coalescing to happen when skbs from out of order queue are transferred into receive queue. LRO/GRO can be completely canceled in some pathological cases, like per packet load balancing on aggregated links. I had to move tcp_try_coalesce() up in the file above tcp_ofo_queue() Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp_input.c89
1 files changed, 47 insertions, 42 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 13f3da4762e3..f3f016a15c5a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4061,6 +4061,44 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4061 tp->rx_opt.num_sacks = num_sacks; 4061 tp->rx_opt.num_sacks = num_sacks;
4062} 4062}
4063 4063
4064/**
4065 * tcp_try_coalesce - try to merge skb to prior one
4066 * @sk: socket
4067 * @to: prior buffer
4068 * @from: buffer to add in queue
4069 * @fragstolen: pointer to boolean
4070 *
4071 * Before queueing skb @from after @to, try to merge them
4072 * to reduce overall memory use and queue lengths, if cost is small.
4073 * Packets in ofo or receive queues can stay a long time.
4074 * Better try to coalesce them right now to avoid future collapses.
4075 * Returns true if caller should free @from instead of queueing it
4076 */
4077static bool tcp_try_coalesce(struct sock *sk,
4078 struct sk_buff *to,
4079 struct sk_buff *from,
4080 bool *fragstolen)
4081{
4082 int delta;
4083
4084 *fragstolen = false;
4085
4086 /* Its possible this segment overlaps with prior segment in queue */
4087 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4088 return false;
4089
4090 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4091 return false;
4092
4093 atomic_add(delta, &sk->sk_rmem_alloc);
4094 sk_mem_charge(sk, delta);
4095 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4096 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4097 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4098 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4099 return true;
4100}
4101
4064/* This one checks to see if we can put data from the 4102/* This one checks to see if we can put data from the
4065 * out_of_order queue into the receive_queue. 4103 * out_of_order queue into the receive_queue.
4066 */ 4104 */
@@ -4068,7 +4106,8 @@ static void tcp_ofo_queue(struct sock *sk)
4068{ 4106{
4069 struct tcp_sock *tp = tcp_sk(sk); 4107 struct tcp_sock *tp = tcp_sk(sk);
4070 __u32 dsack_high = tp->rcv_nxt; 4108 __u32 dsack_high = tp->rcv_nxt;
4071 struct sk_buff *skb; 4109 struct sk_buff *skb, *tail;
4110 bool fragstolen, eaten;
4072 4111
4073 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { 4112 while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
4074 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) 4113 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
@@ -4081,9 +4120,9 @@ static void tcp_ofo_queue(struct sock *sk)
4081 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); 4120 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
4082 } 4121 }
4083 4122
4123 __skb_unlink(skb, &tp->out_of_order_queue);
4084 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { 4124 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4085 SOCK_DEBUG(sk, "ofo packet was already received\n"); 4125 SOCK_DEBUG(sk, "ofo packet was already received\n");
4086 __skb_unlink(skb, &tp->out_of_order_queue);
4087 __kfree_skb(skb); 4126 __kfree_skb(skb);
4088 continue; 4127 continue;
4089 } 4128 }
@@ -4091,11 +4130,15 @@ static void tcp_ofo_queue(struct sock *sk)
4091 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, 4130 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4092 TCP_SKB_CB(skb)->end_seq); 4131 TCP_SKB_CB(skb)->end_seq);
4093 4132
4094 __skb_unlink(skb, &tp->out_of_order_queue); 4133 tail = skb_peek_tail(&sk->sk_receive_queue);
4095 __skb_queue_tail(&sk->sk_receive_queue, skb); 4134 eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
4096 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4135 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4136 if (!eaten)
4137 __skb_queue_tail(&sk->sk_receive_queue, skb);
4097 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 4138 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4098 tcp_fin(sk); 4139 tcp_fin(sk);
4140 if (eaten)
4141 kfree_skb_partial(skb, fragstolen);
4099 } 4142 }
4100} 4143}
4101 4144
@@ -4122,44 +4165,6 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4122 return 0; 4165 return 0;
4123} 4166}
4124 4167
4125/**
4126 * tcp_try_coalesce - try to merge skb to prior one
4127 * @sk: socket
4128 * @to: prior buffer
4129 * @from: buffer to add in queue
4130 * @fragstolen: pointer to boolean
4131 *
4132 * Before queueing skb @from after @to, try to merge them
4133 * to reduce overall memory use and queue lengths, if cost is small.
4134 * Packets in ofo or receive queues can stay a long time.
4135 * Better try to coalesce them right now to avoid future collapses.
4136 * Returns true if caller should free @from instead of queueing it
4137 */
4138static bool tcp_try_coalesce(struct sock *sk,
4139 struct sk_buff *to,
4140 struct sk_buff *from,
4141 bool *fragstolen)
4142{
4143 int delta;
4144
4145 *fragstolen = false;
4146
4147 /* Its possible this segment overlaps with prior segment in queue */
4148 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4149 return false;
4150
4151 if (!skb_try_coalesce(to, from, fragstolen, &delta))
4152 return false;
4153
4154 atomic_add(delta, &sk->sk_rmem_alloc);
4155 sk_mem_charge(sk, delta);
4156 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4157 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4158 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4159 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4160 return true;
4161}
4162
4163static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) 4168static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4164{ 4169{
4165 struct tcp_sock *tp = tcp_sk(sk); 4170 struct tcp_sock *tp = tcp_sk(sk);