aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2019-01-16 18:05:30 -0500
committerDavid S. Miller <davem@davemloft.net>2019-01-17 18:12:26 -0500
commit7ae189759cc48cf8b54beebff566e9fd2d4e7d7c (patch)
tree352db05710fad943494d48107a6fb4d7555d0a70
parent7f12422c4873e9b274bc151ea59cb0cdf9415cf1 (diff)
tcp: always set retrans_stamp on recovery
Previously TCP socket's retrans_stamp is not set if the retransmission has failed to send. As a result if a socket is experiencing local issues to retransmit packets, determining when to abort a socket is complicated w/o knowning the starting time of the recovery since retrans_stamp may remain zero. This complication causes sub-optimal behavior that TCP may use the latest, instead of the first, retransmission time to compute the elapsed time of a stalling connection due to local issues. Then TCP may disrecard TCP retries settings and keep retrying until it finally succeed: not a good idea when the local host is already strained. The simple fix is to always timestamp the start of a recovery. It's worth noting that retrans_stamp is also used to compare echo timestamp values to detect spurious recovery. This patch does not break that because retrans_stamp is still later than when the original packet was sent. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Neal Cardwell <ncardwell@google.com> Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp_output.c9
-rw-r--r--net/ipv4/tcp_timer.c23
2 files changed, 7 insertions, 25 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 57a56e205070..d2d494c74811 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2963,13 +2963,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2963#endif 2963#endif
2964 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; 2964 TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
2965 tp->retrans_out += tcp_skb_pcount(skb); 2965 tp->retrans_out += tcp_skb_pcount(skb);
2966
2967 /* Save stamp of the first retransmit. */
2968 if (!tp->retrans_stamp)
2969 tp->retrans_stamp = tcp_skb_timestamp(skb);
2970
2971 } 2966 }
2972 2967
2968 /* Save stamp of the first (attempted) retransmit. */
2969 if (!tp->retrans_stamp)
2970 tp->retrans_stamp = tcp_skb_timestamp(skb);
2971
2973 if (tp->undo_retrans < 0) 2972 if (tp->undo_retrans < 0)
2974 tp->undo_retrans = 0; 2973 tp->undo_retrans = 0;
2975 tp->undo_retrans += tcp_skb_pcount(skb); 2974 tp->undo_retrans += tcp_skb_pcount(skb);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index e7d09e3705b8..1e61f0bd6e24 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,28 +22,14 @@
22#include <linux/gfp.h> 22#include <linux/gfp.h>
23#include <net/tcp.h> 23#include <net/tcp.h>
24 24
25static u32 tcp_retransmit_stamp(const struct sock *sk)
26{
27 u32 start_ts = tcp_sk(sk)->retrans_stamp;
28
29 if (unlikely(!start_ts)) {
30 struct sk_buff *head = tcp_rtx_queue_head(sk);
31
32 if (!head)
33 return 0;
34 start_ts = tcp_skb_timestamp(head);
35 }
36 return start_ts;
37}
38
39static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) 25static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
40{ 26{
41 struct inet_connection_sock *icsk = inet_csk(sk); 27 struct inet_connection_sock *icsk = inet_csk(sk);
42 u32 elapsed, start_ts; 28 u32 elapsed, start_ts;
43 s32 remaining; 29 s32 remaining;
44 30
45 start_ts = tcp_retransmit_stamp(sk); 31 start_ts = tcp_sk(sk)->retrans_stamp;
46 if (!icsk->icsk_user_timeout || !start_ts) 32 if (!icsk->icsk_user_timeout)
47 return icsk->icsk_rto; 33 return icsk->icsk_rto;
48 elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; 34 elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
49 remaining = icsk->icsk_user_timeout - elapsed; 35 remaining = icsk->icsk_user_timeout - elapsed;
@@ -197,10 +183,7 @@ static bool retransmits_timed_out(struct sock *sk,
197 if (!inet_csk(sk)->icsk_retransmits) 183 if (!inet_csk(sk)->icsk_retransmits)
198 return false; 184 return false;
199 185
200 start_ts = tcp_retransmit_stamp(sk); 186 start_ts = tcp_sk(sk)->retrans_stamp;
201 if (!start_ts)
202 return false;
203
204 if (likely(timeout == 0)) { 187 if (likely(timeout == 0)) {
205 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); 188 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
206 189