From 5db92c994982ed826cf38f38d58bd09bc326aef6 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 25 Jun 2014 17:09:59 +0300 Subject: tcp: unify tcp_v4_rtx_synack and tcp_v6_rtx_synack Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d92bce0ea24e..f8f2a944a1ce 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3299,3 +3299,18 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } } + +int tcp_rtx_synack(struct sock *sk, struct request_sock *req) +{ + const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; + struct flowi fl; + int res; + + res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); + if (!res) { + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + } + return res; +} +EXPORT_SYMBOL(tcp_rtx_synack); -- cgit v1.2.2 From b73c3d0e4f0e1961e15bec18720e48aabebe2109 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 1 Jul 2014 21:32:17 -0700 Subject: net: Save TX flow hash in sock and set in skbuf on xmit For a connected socket we can precompute the flow hash for setting in skb->hash on output. This is a performance advantage over calculating the skb->hash for every packet on the connection. The computation is done using the common hash algorithm to be consistent with computations done for packets of the connection in other states where thers is no socket (e.g. time-wait, syn-recv, syn-cookies). This patch adds sk_txhash to the sock structure. inet_set_txhash and ip6_set_txhash functions are added which are called from points in TCP and UDP where socket moves to established state. skb_set_hash_from_sk is a function which sets skb->hash from the sock txhash value. This is called in UDP and TCP transmit path when transmitting within the context of a socket. Tested: ran super_netperf with 200 TCP_RR streams over a vxlan interface (in this case skb_get_hash called on every TX packet to create a UDP source port). Before fix: 95.02% CPU utilization 154/256/505 90/95/99% latencies 1.13042e+06 tps Time in functions: 0.28% skb_flow_dissect 0.21% __skb_get_hash After fix: 94.95% CPU utilization 156/254/485 90/95/99% latencies 1.15447e+06 Neither __skb_get_hash nor skb_flow_dissect appear in perf Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f8f2a944a1ce..bcee13c4627c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; skb->destructor = tcp_wfree; + skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ -- cgit v1.2.2 From 5ee2c941b5969eb1b5592f9731b3ee76a784641f Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 14 Jul 2014 16:58:32 +0200 Subject: tcp: Remove unnecessary arg from tcp_enter_cwr and tcp_init_cwnd_reduction Since Yuchung's 9b44190dc11 (tcp: refactor F-RTO), tcp_enter_cwr is always called with set_ssthresh = 1. Thus, we can remove this argument from tcp_enter_cwr. Further, as we remove this one, tcp_init_cwnd_reduction is then always called with set_ssthresh = true, and so we can get rid of this argument as well. Cc: Yuchung Cheng Signed-off-by: Christoph Paasch Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bcee13c4627c..306dd5dead7d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -979,7 +979,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (likely(err <= 0)) return err; - tcp_enter_cwr(sk, 1); + tcp_enter_cwr(sk); return net_xmit_eval(err); } -- cgit v1.2.2 From 490cc7d03c21871eef5949ba77a18b0c7ef70ef5 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 12 Aug 2014 15:08:12 -0400 Subject: net-timestamp: fix missing tcp fragmentation cases Bytestream timestamps are correlated with a single byte in the skbuff, recorded in skb_shinfo(skb)->tskey. When fragmenting skbuffs, ensure that the tskey is set for the fragment in which the tskey falls (seqno <= tskey < end_seqno). The original implementation did not address fragmentation in tcp_fragment or tso_fragment. Add code to inspect the sequence numbers and move both tskey and the relevant tx_flags if necessary. Reported-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8fcfc91964ec..ef4a051de018 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1069,6 +1069,21 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de tcp_verify_left_out(tp); } +static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) && + !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { + struct skb_shared_info *shinfo2 = skb_shinfo(skb2); + u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; + + shinfo->tx_flags &= ~tsflags; + shinfo2->tx_flags |= tsflags; + swap(shinfo->tskey, shinfo2->tskey); + } +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -1136,6 +1151,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, */ TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; buff->tstamp = skb->tstamp; + tcp_fragment_tstamp(skb, buff); old_factor = tcp_skb_pcount(skb); @@ -1652,6 +1668,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); + tcp_fragment_tstamp(skb, buff); /* Fix up tso_factor for both original and new SKB. */ tcp_set_skb_tso_segs(sk, skb, mss_now); -- cgit v1.2.2 From 9d186cac7ffb1831e9f34cb4a3a8b22abb9dd9d4 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Wed, 13 Aug 2014 16:03:10 +0400 Subject: tcp: don't use timestamp from repaired skb-s to calculate RTT (v2) We don't know right timestamp for repaired skb-s. Wrong RTT estimations isn't good, because some congestion modules heavily depends on it. This patch adds the TCPCB_REPAIRED flag, which is included in TCPCB_RETRANS. Thanks to Eric for the advice how to fix this issue. This patch fixes the warning: [ 879.562947] WARNING: CPU: 0 PID: 2825 at net/ipv4/tcp_input.c:3078 tcp_ack+0x11f5/0x1380() [ 879.567253] CPU: 0 PID: 2825 Comm: socket-tcpbuf-l Not tainted 3.16.0-next-20140811 #1 [ 879.567829] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 879.568177] 0000000000000000 00000000c532680c ffff880039643d00 ffffffff817aa2d2 [ 879.568776] 0000000000000000 ffff880039643d38 ffffffff8109afbd ffff880039d6ba80 [ 879.569386] ffff88003a449800 000000002983d6bd 0000000000000000 000000002983d6bc [ 879.569982] Call Trace: [ 879.570264] [] dump_stack+0x4d/0x66 [ 879.570599] [] warn_slowpath_common+0x7d/0xa0 [ 879.570935] [] warn_slowpath_null+0x1a/0x20 [ 879.571292] [] tcp_ack+0x11f5/0x1380 [ 879.571614] [] tcp_rcv_established+0x1ed/0x710 [ 879.571958] [] tcp_v4_do_rcv+0x10a/0x370 [ 879.572315] [] release_sock+0x89/0x1d0 [ 879.572642] [] do_tcp_setsockopt.isra.36+0x120/0x860 [ 879.573000] [] ? rcu_read_lock_held+0x6e/0x80 [ 879.573352] [] tcp_setsockopt+0x32/0x40 [ 879.573678] [] sock_common_setsockopt+0x14/0x20 [ 879.574031] [] SyS_setsockopt+0x80/0xf0 [ 879.574393] [] system_call_fastpath+0x16/0x1b [ 879.574730] ---[ end trace a17cbc38eb8c5c00 ]--- v2: moving setting of skb->when for repaired skb-s in tcp_write_xmit, where it's set for other skb-s. Fixes: 431a91242d8d ("tcp: timestamp SYN+DATA messages") Fixes: 740b0f1841f6 ("tcp: switch rtt estimations to usec resolution") Cc: Eric Dumazet Cc: Pavel Emelyanov Cc: "David S. Miller" Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ef4a051de018..ff3f0f75cc6c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1934,8 +1934,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); - if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) + if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { + /* "when" is used as a start point for the retransmit timer */ + TCP_SKB_CB(skb)->when = tcp_time_stamp; goto repair; /* Skip network transmission */ + } cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) { -- cgit v1.2.2 From 4fab9071950c2021d846e18351e0f46a1cffd67b Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 14 Aug 2014 12:40:05 -0400 Subject: tcp: fix tcp_release_cb() to dispatch via address family for mtu_reduced() Make sure we use the correct address-family-specific function for handling MTU reductions from within tcp_release_cb(). Previously AF_INET6 sockets were incorrectly always using the IPv6 code path when sometimes they were handling IPv4 traffic and thus had an IPv4 dst. Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Diagnosed-by: Willem de Bruijn Fixes: 563d34d057862 ("tcp: dont drop MTU reduction indications") Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ff3f0f75cc6c..5a7c41fbc6d3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -800,7 +800,7 @@ void tcp_release_cb(struct sock *sk) __sock_put(sk); } if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { - sk->sk_prot->mtu_reduced(sk); + inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } } -- cgit v1.2.2