aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2016-11-01 13:53:42 -0400
committerDavid S. Miller <davem@davemloft.net>2016-11-02 15:21:36 -0400
commit2331ccc5b3231c35b758fe274f45fbed38467f94 (patch)
tree79bd2f09682d7b72370e389a1c4426223dfb5be3 /net/ipv4/tcp_output.c
parentb646cf299e14338c7348a68acc006d1673f1ba0a (diff)
tcp: enhance tcp collapsing
As Ilya Lesokhin suggested, we can collapse two skbs at retransmit time even if the skb at the right has fragments. We simply have to use more generic skb_copy_bits() instead of skb_copy_from_linear_data() in tcp_collapse_retrans() Also need to guard this skb_copy_bits() in case there is nothing to copy, otherwise skb_put() could panic if left skb has frags. Tested: Used following packetdrill test // Establish a connection. 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 8> +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +.100 < . 1:1(0) ack 1 win 257 +0 accept(3, ..., ...) = 4 +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 +0 write(4, ..., 200) = 200 +0 > P. 1:201(200) ack 1 +.001 write(4, ..., 200) = 200 +0 > P. 201:401(200) ack 1 +.001 write(4, ..., 200) = 200 +0 > P. 401:601(200) ack 1 +.001 write(4, ..., 200) = 200 +0 > P. 601:801(200) ack 1 +.001 write(4, ..., 200) = 200 +0 > P. 801:1001(200) ack 1 +.001 write(4, ..., 100) = 100 +0 > P. 1001:1101(100) ack 1 +.001 write(4, ..., 100) = 100 +0 > P. 1101:1201(100) ack 1 +.001 write(4, ..., 100) = 100 +0 > P. 1201:1301(100) ack 1 +.001 write(4, ..., 100) = 100 +0 > P. 1301:1401(100) ack 1 +.100 < . 1:1(0) ack 1 win 257 <nop,nop,sack 1001:1401> // Check that TCP collapse works : +0 > P. 1:1001(1000) ack 1 Reported-by: Ilya Lesokhin <ilyal@mellanox.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c10
1 files changed, 4 insertions, 6 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 896e9dfbdb5c..f57b5aa51b59 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2529,8 +2529,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2529 2529
2530 tcp_unlink_write_queue(next_skb, sk); 2530 tcp_unlink_write_queue(next_skb, sk);
2531 2531
2532 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), 2532 if (next_skb_size)
2533 next_skb_size); 2533 skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
2534 next_skb_size);
2534 2535
2535 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 2536 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
2536 skb->ip_summed = CHECKSUM_PARTIAL; 2537 skb->ip_summed = CHECKSUM_PARTIAL;
@@ -2567,14 +2568,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
2567{ 2568{
2568 if (tcp_skb_pcount(skb) > 1) 2569 if (tcp_skb_pcount(skb) > 1)
2569 return false; 2570 return false;
2570 /* TODO: SACK collapsing could be used to remove this condition */
2571 if (skb_shinfo(skb)->nr_frags != 0)
2572 return false;
2573 if (skb_cloned(skb)) 2571 if (skb_cloned(skb))
2574 return false; 2572 return false;
2575 if (skb == tcp_send_head(sk)) 2573 if (skb == tcp_send_head(sk))
2576 return false; 2574 return false;
2577 /* Some heurestics for collapsing over SACK'd could be invented */ 2575 /* Some heuristics for collapsing over SACK'd could be invented */
2578 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) 2576 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2579 return false; 2577 return false;
2580 2578