aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2019-01-16 18:05:29 -0500
committerDavid S. Miller <davem@davemloft.net>2019-01-17 18:12:26 -0500
commit7f12422c4873e9b274bc151ea59cb0cdf9415cf1 (patch)
tree4fcc9a7b9a4c31aa9c470fc8fc5a4a8c59b01177 /net/ipv4/tcp_output.c
parent88f8598d0a302a08380eadefd09b9f5cb1c4c428 (diff)
tcp: always timestamp on every skb transmission
Previously TCP skbs are not always timestamped if the transmission failed due to memory or other local issues. This makes deciding when to abort a socket tricky and complicated because the first unacknowledged skb's timestamp may be 0 on TCP timeout. The straight-forward fix is to always timestamp skb on every transmission attempt. Also every skb retransmission needs to be flagged properly to avoid RTT under-estimation. This can happen upon receiving an ACK for the original packet and the a previous (spurious) retransmission has failed. It's worth noting that this reverts to the old time-stamping style before commit 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully") which addresses a problem in computing the elapsed time of a stalled window-probing socket. The problem will be addressed differently in the next patches with a simpler approach. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Neal Cardwell <ncardwell@google.com> Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c16
1 files changed, 8 insertions, 8 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 730bc44dbad9..57a56e205070 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -980,7 +980,6 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
980{ 980{
981 struct tcp_sock *tp = tcp_sk(sk); 981 struct tcp_sock *tp = tcp_sk(sk);
982 982
983 skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
984 if (sk->sk_pacing_status != SK_PACING_NONE) { 983 if (sk->sk_pacing_status != SK_PACING_NONE) {
985 unsigned long rate = sk->sk_pacing_rate; 984 unsigned long rate = sk->sk_pacing_rate;
986 985
@@ -1028,7 +1027,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
1028 1027
1029 BUG_ON(!skb || !tcp_skb_pcount(skb)); 1028 BUG_ON(!skb || !tcp_skb_pcount(skb));
1030 tp = tcp_sk(sk); 1029 tp = tcp_sk(sk);
1031 1030 prior_wstamp = tp->tcp_wstamp_ns;
1031 tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
1032 skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
1032 if (clone_it) { 1033 if (clone_it) {
1033 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq 1034 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
1034 - tp->snd_una; 1035 - tp->snd_una;
@@ -1045,11 +1046,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
1045 return -ENOBUFS; 1046 return -ENOBUFS;
1046 } 1047 }
1047 1048
1048 prior_wstamp = tp->tcp_wstamp_ns;
1049 tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
1050
1051 skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
1052
1053 inet = inet_sk(sk); 1049 inet = inet_sk(sk);
1054 tcb = TCP_SKB_CB(skb); 1050 tcb = TCP_SKB_CB(skb);
1055 memset(&opts, 0, sizeof(opts)); 1051 memset(&opts, 0, sizeof(opts));
@@ -2937,12 +2933,16 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2937 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2933 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2938 } 2934 }
2939 2935
2936 /* To avoid taking spuriously low RTT samples based on a timestamp
2937 * for a transmit that never happened, always mark EVER_RETRANS
2938 */
2939 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2940
2940 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG)) 2941 if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
2941 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, 2942 tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
2942 TCP_SKB_CB(skb)->seq, segs, err); 2943 TCP_SKB_CB(skb)->seq, segs, err);
2943 2944
2944 if (likely(!err)) { 2945 if (likely(!err)) {
2945 TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
2946 trace_tcp_retransmit_skb(sk, skb); 2946 trace_tcp_retransmit_skb(sk, skb);
2947 } else if (err != -EBUSY) { 2947 } else if (err != -EBUSY) {
2948 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); 2948 NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);