aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcelo Leitner <mleitner@redhat.com>2014-11-04 14:15:08 -0500
committerDavid S. Miller <davem@davemloft.net>2014-11-05 16:59:49 -0500
commit1f37bf87aa7523d28e7e4c4f7bb5dba98faa3e00 (patch)
tree587ea2b8eaff79f04b9f6cadce4089c1dd765b32
parent46d3802627f60be1c17659a44c8d2d7a5e247023 (diff)
tcp: zero retrans_stamp if all retrans were acked
Ueki Kohei reported that when we are using NewReno with connections that have a very low traffic, we may timeout the connection too early if a second loss occurs after the first one was successfully acked but no data was transfered later. Below is his description of it: When SACK is disabled, and a socket suffers multiple separate TCP retransmissions, that socket's ETIMEDOUT value is calculated from the time of the *first* retransmission instead of the *latest* retransmission. This happens because the tcp_sock's retrans_stamp is set once then never cleared. Take the following connection: Linux remote-machine | | send#1---->(*1)|--------> data#1 --------->| | | | RTO : : | | | ---(*2)|----> data#1(retrans) ---->| | (*3)|<---------- ACK <----------| | | | | : : | : : | : : 16 minutes (or more) : | : : | : : | : : | | | send#2---->(*4)|--------> data#2 --------->| | | | RTO : : | | | ---(*5)|----> data#2(retrans) ---->| | | | | | | RTO*2 : : | | | | | | ETIMEDOUT<----(*6)| | (*1) One data packet sent. (*2) Because no ACK packet is received, the packet is retransmitted. (*3) The ACK packet is received. The transmitted packet is acknowledged. At this point the first "retransmission event" has passed and been recovered from. Any future retransmission is a completely new "event". (*4) After 16 minutes (to correspond with retries2=15), a new data packet is sent. Note: No data is transmitted between (*3) and (*4). The socket's timeout SHOULD be calculated from this point in time, but instead it's calculated from the prior "event" 16 minutes ago. (*5) Because no ACK packet is received, the packet is retransmitted. (*6) At the time of the 2nd retransmission, the socket returns ETIMEDOUT. Therefore, now we clear retrans_stamp as soon as all data during the loss window is fully acked. Reported-by: Ueki Kohei Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com> Acked-by: Neal Cardwell <ncardwell@google.com> Tested-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp_input.c60
1 files changed, 31 insertions, 29 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a12b455928e5..88fa2d160685 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2315,6 +2315,35 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
2315 2315
2316/* Undo procedures. */ 2316/* Undo procedures. */
2317 2317
2318/* We can clear retrans_stamp when there are no retransmissions in the
2319 * window. It would seem that it is trivially available for us in
2320 * tp->retrans_out, however, that kind of assumptions doesn't consider
2321 * what will happen if errors occur when sending retransmission for the
2322 * second time. ...It could the that such segment has only
2323 * TCPCB_EVER_RETRANS set at the present time. It seems that checking
2324 * the head skb is enough except for some reneging corner cases that
2325 * are not worth the effort.
2326 *
2327 * Main reason for all this complexity is the fact that connection dying
2328 * time now depends on the validity of the retrans_stamp, in particular,
2329 * that successive retransmissions of a segment must not advance
2330 * retrans_stamp under any conditions.
2331 */
2332static bool tcp_any_retrans_done(const struct sock *sk)
2333{
2334 const struct tcp_sock *tp = tcp_sk(sk);
2335 struct sk_buff *skb;
2336
2337 if (tp->retrans_out)
2338 return true;
2339
2340 skb = tcp_write_queue_head(sk);
2341 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2342 return true;
2343
2344 return false;
2345}
2346
2318#if FASTRETRANS_DEBUG > 1 2347#if FASTRETRANS_DEBUG > 1
2319static void DBGUNDO(struct sock *sk, const char *msg) 2348static void DBGUNDO(struct sock *sk, const char *msg)
2320{ 2349{
@@ -2410,6 +2439,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
2410 * is ACKed. For Reno it is MUST to prevent false 2439 * is ACKed. For Reno it is MUST to prevent false
2411 * fast retransmits (RFC2582). SACK TCP is safe. */ 2440 * fast retransmits (RFC2582). SACK TCP is safe. */
2412 tcp_moderate_cwnd(tp); 2441 tcp_moderate_cwnd(tp);
2442 if (!tcp_any_retrans_done(sk))
2443 tp->retrans_stamp = 0;
2413 return true; 2444 return true;
2414 } 2445 }
2415 tcp_set_ca_state(sk, TCP_CA_Open); 2446 tcp_set_ca_state(sk, TCP_CA_Open);
@@ -2430,35 +2461,6 @@ static bool tcp_try_undo_dsack(struct sock *sk)
2430 return false; 2461 return false;
2431} 2462}
2432 2463
2433/* We can clear retrans_stamp when there are no retransmissions in the
2434 * window. It would seem that it is trivially available for us in
2435 * tp->retrans_out, however, that kind of assumptions doesn't consider
2436 * what will happen if errors occur when sending retransmission for the
2437 * second time. ...It could the that such segment has only
2438 * TCPCB_EVER_RETRANS set at the present time. It seems that checking
2439 * the head skb is enough except for some reneging corner cases that
2440 * are not worth the effort.
2441 *
2442 * Main reason for all this complexity is the fact that connection dying
2443 * time now depends on the validity of the retrans_stamp, in particular,
2444 * that successive retransmissions of a segment must not advance
2445 * retrans_stamp under any conditions.
2446 */
2447static bool tcp_any_retrans_done(const struct sock *sk)
2448{
2449 const struct tcp_sock *tp = tcp_sk(sk);
2450 struct sk_buff *skb;
2451
2452 if (tp->retrans_out)
2453 return true;
2454
2455 skb = tcp_write_queue_head(sk);
2456 if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
2457 return true;
2458
2459 return false;
2460}
2461
2462/* Undo during loss recovery after partial ACK or using F-RTO. */ 2464/* Undo during loss recovery after partial ACK or using F-RTO. */
2463static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) 2465static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2464{ 2466{