diff options
author | Eric Dumazet <edumazet@google.com> | 2014-09-19 11:26:20 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-09-23 12:47:38 -0400 |
commit | bd1e75abf4b3c666f61a5cf90c896aa928a735d5 (patch) | |
tree | f7d05e03763428dd6c65d6b1a2af66ebf3c68c48 | |
parent | 4cdf507d54525842dfd9f6313fdafba039084046 (diff) |
tcp: add coalescing attempt in tcp_ofo_queue()
In order to make TCP more resilient in presence of reorders, we need
to allow coalescing to happen when skbs from out of order queue are
transferred into receive queue. LRO/GRO can be completely canceled
in some pathological cases, like per packet load balancing on aggregated
links.
I had to move tcp_try_coalesce() up in the file above tcp_ofo_queue()
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp_input.c | 89 |
1 files changed, 47 insertions, 42 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 13f3da4762e3..f3f016a15c5a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -4061,6 +4061,44 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
4061 | tp->rx_opt.num_sacks = num_sacks; | 4061 | tp->rx_opt.num_sacks = num_sacks; |
4062 | } | 4062 | } |
4063 | 4063 | ||
4064 | /** | ||
4065 | * tcp_try_coalesce - try to merge skb to prior one | ||
4066 | * @sk: socket | ||
4067 | * @to: prior buffer | ||
4068 | * @from: buffer to add in queue | ||
4069 | * @fragstolen: pointer to boolean | ||
4070 | * | ||
4071 | * Before queueing skb @from after @to, try to merge them | ||
4072 | * to reduce overall memory use and queue lengths, if cost is small. | ||
4073 | * Packets in ofo or receive queues can stay a long time. | ||
4074 | * Better try to coalesce them right now to avoid future collapses. | ||
4075 | * Returns true if caller should free @from instead of queueing it | ||
4076 | */ | ||
4077 | static bool tcp_try_coalesce(struct sock *sk, | ||
4078 | struct sk_buff *to, | ||
4079 | struct sk_buff *from, | ||
4080 | bool *fragstolen) | ||
4081 | { | ||
4082 | int delta; | ||
4083 | |||
4084 | *fragstolen = false; | ||
4085 | |||
4086 | /* Its possible this segment overlaps with prior segment in queue */ | ||
4087 | if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) | ||
4088 | return false; | ||
4089 | |||
4090 | if (!skb_try_coalesce(to, from, fragstolen, &delta)) | ||
4091 | return false; | ||
4092 | |||
4093 | atomic_add(delta, &sk->sk_rmem_alloc); | ||
4094 | sk_mem_charge(sk, delta); | ||
4095 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); | ||
4096 | TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; | ||
4097 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; | ||
4098 | TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; | ||
4099 | return true; | ||
4100 | } | ||
4101 | |||
4064 | /* This one checks to see if we can put data from the | 4102 | /* This one checks to see if we can put data from the |
4065 | * out_of_order queue into the receive_queue. | 4103 | * out_of_order queue into the receive_queue. |
4066 | */ | 4104 | */ |
@@ -4068,7 +4106,8 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4068 | { | 4106 | { |
4069 | struct tcp_sock *tp = tcp_sk(sk); | 4107 | struct tcp_sock *tp = tcp_sk(sk); |
4070 | __u32 dsack_high = tp->rcv_nxt; | 4108 | __u32 dsack_high = tp->rcv_nxt; |
4071 | struct sk_buff *skb; | 4109 | struct sk_buff *skb, *tail; |
4110 | bool fragstolen, eaten; | ||
4072 | 4111 | ||
4073 | while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { | 4112 | while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { |
4074 | if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) | 4113 | if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) |
@@ -4081,9 +4120,9 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4081 | tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); | 4120 | tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); |
4082 | } | 4121 | } |
4083 | 4122 | ||
4123 | __skb_unlink(skb, &tp->out_of_order_queue); | ||
4084 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { | 4124 | if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { |
4085 | SOCK_DEBUG(sk, "ofo packet was already received\n"); | 4125 | SOCK_DEBUG(sk, "ofo packet was already received\n"); |
4086 | __skb_unlink(skb, &tp->out_of_order_queue); | ||
4087 | __kfree_skb(skb); | 4126 | __kfree_skb(skb); |
4088 | continue; | 4127 | continue; |
4089 | } | 4128 | } |
@@ -4091,11 +4130,15 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4091 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, | 4130 | tp->rcv_nxt, TCP_SKB_CB(skb)->seq, |
4092 | TCP_SKB_CB(skb)->end_seq); | 4131 | TCP_SKB_CB(skb)->end_seq); |
4093 | 4132 | ||
4094 | __skb_unlink(skb, &tp->out_of_order_queue); | 4133 | tail = skb_peek_tail(&sk->sk_receive_queue); |
4095 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4134 | eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); |
4096 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 4135 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; |
4136 | if (!eaten) | ||
4137 | __skb_queue_tail(&sk->sk_receive_queue, skb); | ||
4097 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 4138 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
4098 | tcp_fin(sk); | 4139 | tcp_fin(sk); |
4140 | if (eaten) | ||
4141 | kfree_skb_partial(skb, fragstolen); | ||
4099 | } | 4142 | } |
4100 | } | 4143 | } |
4101 | 4144 | ||
@@ -4122,44 +4165,6 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, | |||
4122 | return 0; | 4165 | return 0; |
4123 | } | 4166 | } |
4124 | 4167 | ||
4125 | /** | ||
4126 | * tcp_try_coalesce - try to merge skb to prior one | ||
4127 | * @sk: socket | ||
4128 | * @to: prior buffer | ||
4129 | * @from: buffer to add in queue | ||
4130 | * @fragstolen: pointer to boolean | ||
4131 | * | ||
4132 | * Before queueing skb @from after @to, try to merge them | ||
4133 | * to reduce overall memory use and queue lengths, if cost is small. | ||
4134 | * Packets in ofo or receive queues can stay a long time. | ||
4135 | * Better try to coalesce them right now to avoid future collapses. | ||
4136 | * Returns true if caller should free @from instead of queueing it | ||
4137 | */ | ||
4138 | static bool tcp_try_coalesce(struct sock *sk, | ||
4139 | struct sk_buff *to, | ||
4140 | struct sk_buff *from, | ||
4141 | bool *fragstolen) | ||
4142 | { | ||
4143 | int delta; | ||
4144 | |||
4145 | *fragstolen = false; | ||
4146 | |||
4147 | /* Its possible this segment overlaps with prior segment in queue */ | ||
4148 | if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) | ||
4149 | return false; | ||
4150 | |||
4151 | if (!skb_try_coalesce(to, from, fragstolen, &delta)) | ||
4152 | return false; | ||
4153 | |||
4154 | atomic_add(delta, &sk->sk_rmem_alloc); | ||
4155 | sk_mem_charge(sk, delta); | ||
4156 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); | ||
4157 | TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; | ||
4158 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; | ||
4159 | TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; | ||
4160 | return true; | ||
4161 | } | ||
4162 | |||
4163 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | 4168 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) |
4164 | { | 4169 | { |
4165 | struct tcp_sock *tp = tcp_sk(sk); | 4170 | struct tcp_sock *tp = tcp_sk(sk); |