aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWei Wang <weiwan@google.com>2018-06-20 00:42:50 -0400
committerDavid S. Miller <davem@davemloft.net>2018-06-22 00:45:01 -0400
commit3f6c65d6255a872846c44182c82c78d3dc6239f5 (patch)
tree7dabd565296b1e797140820da5205e4c0f60aae2
parent66caeeb99d0db139c894b1b650546c982701aa27 (diff)
tcp: ignore rcv_rtt sample with old ts ecr value
When receiving multiple packets with the same ts ecr value, only try to compute rcv_rtt sample with the earliest received packet. This is because the rcv_rtt calculated by later received packets could possibly include long idle time or other types of delay. For example: (1) server sends last packet of reply with TS val V1 (2) client ACKs last packet of reply with TS ecr V1 (3) long idle time passes (4) client sends next request data packet with TS ecr V1 (again!) At this time, the rcv_rtt computed on server with TS ecr V1 will be inflated with the idle time and should get ignored. Signed-off-by: Wei Wang <weiwan@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/tcp.h1
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_input.c14
3 files changed, 13 insertions, 3 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 72705eaf4b84..3dbea6610304 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -350,6 +350,7 @@ struct tcp_sock {
350#endif 350#endif
351 351
352/* Receiver side RTT estimation */ 352/* Receiver side RTT estimation */
353 u32 rcv_rtt_last_tsecr;
353 struct { 354 struct {
354 u32 rtt_us; 355 u32 rtt_us;
355 u32 seq; 356 u32 seq;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 141acd92e58a..47c45d5be9f9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2563,6 +2563,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2563 sk->sk_shutdown = 0; 2563 sk->sk_shutdown = 0;
2564 sock_reset_flag(sk, SOCK_DONE); 2564 sock_reset_flag(sk, SOCK_DONE);
2565 tp->srtt_us = 0; 2565 tp->srtt_us = 0;
2566 tp->rcv_rtt_last_tsecr = 0;
2566 tp->write_seq += tp->max_window + 2; 2567 tp->write_seq += tp->max_window + 2;
2567 if (tp->write_seq == 0) 2568 if (tp->write_seq == 0)
2568 tp->write_seq = 1; 2569 tp->write_seq = 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 355d3dffd021..76ca88f63b70 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -582,9 +582,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
582{ 582{
583 struct tcp_sock *tp = tcp_sk(sk); 583 struct tcp_sock *tp = tcp_sk(sk);
584 584
585 if (tp->rx_opt.rcv_tsecr && 585 if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
586 (TCP_SKB_CB(skb)->end_seq - 586 return;
587 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { 587 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
588
589 if (TCP_SKB_CB(skb)->end_seq -
590 TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
588 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; 591 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
589 u32 delta_us; 592 u32 delta_us;
590 593
@@ -5475,6 +5478,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
5475 tcp_ack(sk, skb, 0); 5478 tcp_ack(sk, skb, 0);
5476 __kfree_skb(skb); 5479 __kfree_skb(skb);
5477 tcp_data_snd_check(sk); 5480 tcp_data_snd_check(sk);
5481 /* When receiving pure ack in fast path, update
5482 * last ts ecr directly instead of calling
5483 * tcp_rcv_rtt_measure_ts()
5484 */
5485 tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
5478 return; 5486 return;
5479 } else { /* Header too small */ 5487 } else { /* Header too small */
5480 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 5488 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);