diff options
author | Yuchung Cheng <ycheng@google.com> | 2019-01-16 18:05:29 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-01-17 18:12:26 -0500 |
commit | 7f12422c4873e9b274bc151ea59cb0cdf9415cf1 (patch) | |
tree | 4fcc9a7b9a4c31aa9c470fc8fc5a4a8c59b01177 /net/ipv4/tcp_output.c | |
parent | 88f8598d0a302a08380eadefd09b9f5cb1c4c428 (diff) |
tcp: always timestamp on every skb transmission
Previously TCP skbs are not always timestamped if the transmission
failed due to memory or other local issues. This makes deciding
when to abort a socket tricky and complicated because the first
unacknowledged skb's timestamp may be 0 on TCP timeout.
The straight-forward fix is to always timestamp skb on every
transmission attempt. Also every skb retransmission needs to be
flagged properly to avoid RTT under-estimation. This can happen
upon receiving an ACK for the original packet and the a previous
(spurious) retransmission has failed.
It's worth noting that this reverts to the old time-stamping
style before commit 8c72c65b426b ("tcp: update skb->skb_mstamp more
carefully") which addresses a problem in computing the elapsed time
of a stalled window-probing socket. The problem will be addressed
differently in the next patches with a simpler approach.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 730bc44dbad9..57a56e205070 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -980,7 +980,6 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, | |||
980 | { | 980 | { |
981 | struct tcp_sock *tp = tcp_sk(sk); | 981 | struct tcp_sock *tp = tcp_sk(sk); |
982 | 982 | ||
983 | skb->skb_mstamp_ns = tp->tcp_wstamp_ns; | ||
984 | if (sk->sk_pacing_status != SK_PACING_NONE) { | 983 | if (sk->sk_pacing_status != SK_PACING_NONE) { |
985 | unsigned long rate = sk->sk_pacing_rate; | 984 | unsigned long rate = sk->sk_pacing_rate; |
986 | 985 | ||
@@ -1028,7 +1027,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, | |||
1028 | 1027 | ||
1029 | BUG_ON(!skb || !tcp_skb_pcount(skb)); | 1028 | BUG_ON(!skb || !tcp_skb_pcount(skb)); |
1030 | tp = tcp_sk(sk); | 1029 | tp = tcp_sk(sk); |
1031 | 1030 | prior_wstamp = tp->tcp_wstamp_ns; | |
1031 | tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); | ||
1032 | skb->skb_mstamp_ns = tp->tcp_wstamp_ns; | ||
1032 | if (clone_it) { | 1033 | if (clone_it) { |
1033 | TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq | 1034 | TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq |
1034 | - tp->snd_una; | 1035 | - tp->snd_una; |
@@ -1045,11 +1046,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, | |||
1045 | return -ENOBUFS; | 1046 | return -ENOBUFS; |
1046 | } | 1047 | } |
1047 | 1048 | ||
1048 | prior_wstamp = tp->tcp_wstamp_ns; | ||
1049 | tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); | ||
1050 | |||
1051 | skb->skb_mstamp_ns = tp->tcp_wstamp_ns; | ||
1052 | |||
1053 | inet = inet_sk(sk); | 1049 | inet = inet_sk(sk); |
1054 | tcb = TCP_SKB_CB(skb); | 1050 | tcb = TCP_SKB_CB(skb); |
1055 | memset(&opts, 0, sizeof(opts)); | 1051 | memset(&opts, 0, sizeof(opts)); |
@@ -2937,12 +2933,16 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) | |||
2937 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2933 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2938 | } | 2934 | } |
2939 | 2935 | ||
2936 | /* To avoid taking spuriously low RTT samples based on a timestamp | ||
2937 | * for a transmit that never happened, always mark EVER_RETRANS | ||
2938 | */ | ||
2939 | TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; | ||
2940 | |||
2940 | if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG)) | 2941 | if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG)) |
2941 | tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, | 2942 | tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, |
2942 | TCP_SKB_CB(skb)->seq, segs, err); | 2943 | TCP_SKB_CB(skb)->seq, segs, err); |
2943 | 2944 | ||
2944 | if (likely(!err)) { | 2945 | if (likely(!err)) { |
2945 | TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; | ||
2946 | trace_tcp_retransmit_skb(sk, skb); | 2946 | trace_tcp_retransmit_skb(sk, skb); |
2947 | } else if (err != -EBUSY) { | 2947 | } else if (err != -EBUSY) { |
2948 | NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); | 2948 | NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); |