aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_metrics.c
diff options
context:
space:
mode:
authorSoheil Hassas Yeganeh <soheil@google.com>2017-03-15 16:30:45 -0400
committerDavid S. Miller <davem@davemloft.net>2017-03-16 23:33:56 -0400
commitd82bae12dc38d79a2b77473f5eb0612a3d69c55b (patch)
tree677ef02fce0b832e01c02c21f37fa7d653663104 /net/ipv4/tcp_metrics.c
parent8b705f5241adb2d0b5d009abea5a865601666974 (diff)
tcp: remove per-destination timestamp cache
Commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets for each connection) randomizes TCP timestamps per connection. After this commit, there is no guarantee that the timestamps received from the same destination are monotonically increasing. As a result, the per-destination timestamp cache in TCP metrics (i.e., tcpm_ts in struct tcp_metrics_block) is broken and cannot be relied upon. Remove the per-destination timestamp cache and all related code paths. Note that this cache was already broken for caching timestamps of multiple machines behind a NAT sharing the same address. Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Cc: Lutz Vieweg <lvml@5t9.de> Cc: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
-rw-r--r--net/ipv4/tcp_metrics.c147
1 files changed, 4 insertions, 143 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0f46e5fe31ad..9d0d4f39e42b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -45,8 +45,6 @@ struct tcp_metrics_block {
45 struct inetpeer_addr tcpm_saddr; 45 struct inetpeer_addr tcpm_saddr;
46 struct inetpeer_addr tcpm_daddr; 46 struct inetpeer_addr tcpm_daddr;
47 unsigned long tcpm_stamp; 47 unsigned long tcpm_stamp;
48 u32 tcpm_ts;
49 u32 tcpm_ts_stamp;
50 u32 tcpm_lock; 48 u32 tcpm_lock;
51 u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1]; 49 u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
52 struct tcp_fastopen_metrics tcpm_fastopen; 50 struct tcp_fastopen_metrics tcpm_fastopen;
@@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
123 tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); 121 tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
124 tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); 122 tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
125 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); 123 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
126 tm->tcpm_ts = 0;
127 tm->tcpm_ts_stamp = 0;
128 if (fastopen_clear) { 124 if (fastopen_clear) {
129 tm->tcpm_fastopen.mss = 0; 125 tm->tcpm_fastopen.mss = 0;
130 tm->tcpm_fastopen.syn_loss = 0; 126 tm->tcpm_fastopen.syn_loss = 0;
@@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
273 return tm; 269 return tm;
274} 270}
275 271
276static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
277{
278 struct tcp_metrics_block *tm;
279 struct inetpeer_addr saddr, daddr;
280 unsigned int hash;
281 struct net *net;
282
283 if (tw->tw_family == AF_INET) {
284 inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
285 inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
286 hash = ipv4_addr_hash(tw->tw_daddr);
287 }
288#if IS_ENABLED(CONFIG_IPV6)
289 else if (tw->tw_family == AF_INET6) {
290 if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) {
291 inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr);
292 inetpeer_set_addr_v4(&daddr, tw->tw_daddr);
293 hash = ipv4_addr_hash(tw->tw_daddr);
294 } else {
295 inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr);
296 inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr);
297 hash = ipv6_addr_hash(&tw->tw_v6_daddr);
298 }
299 }
300#endif
301 else
302 return NULL;
303
304 net = twsk_net(tw);
305 hash ^= net_hash_mix(net);
306 hash = hash_32(hash, tcp_metrics_hash_log);
307
308 for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
309 tm = rcu_dereference(tm->tcpm_next)) {
310 if (addr_same(&tm->tcpm_saddr, &saddr) &&
311 addr_same(&tm->tcpm_daddr, &daddr) &&
312 net_eq(tm_net(tm), net))
313 break;
314 }
315 return tm;
316}
317
318static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, 272static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
319 struct dst_entry *dst, 273 struct dst_entry *dst,
320 bool create) 274 bool create)
@@ -573,8 +527,7 @@ reset:
573 tp->snd_cwnd_stamp = tcp_time_stamp; 527 tp->snd_cwnd_stamp = tcp_time_stamp;
574} 528}
575 529
576bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, 530bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
577 bool paws_check, bool timestamps)
578{ 531{
579 struct tcp_metrics_block *tm; 532 struct tcp_metrics_block *tm;
580 bool ret; 533 bool ret;
@@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
584 537
585 rcu_read_lock(); 538 rcu_read_lock();
586 tm = __tcp_get_metrics_req(req, dst); 539 tm = __tcp_get_metrics_req(req, dst);
587 if (paws_check) { 540 if (tm && tcp_metric_get(tm, TCP_METRIC_RTT))
588 if (tm &&
589 (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
590 ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
591 !timestamps))
592 ret = false;
593 else
594 ret = true;
595 } else {
596 if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
597 ret = true;
598 else
599 ret = false;
600 }
601 rcu_read_unlock();
602
603 return ret;
604}
605
606void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
607{
608 struct tcp_metrics_block *tm;
609
610 rcu_read_lock();
611 tm = tcp_get_metrics(sk, dst, true);
612 if (tm) {
613 struct tcp_sock *tp = tcp_sk(sk);
614
615 if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
616 tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
617 tp->rx_opt.ts_recent = tm->tcpm_ts;
618 }
619 }
620 rcu_read_unlock();
621}
622EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
623
624/* VJ's idea. Save last timestamp seen from this destination and hold
625 * it at least for normal timewait interval to use for duplicate
626 * segment detection in subsequent connections, before they enter
627 * synchronized state.
628 */
629bool tcp_remember_stamp(struct sock *sk)
630{
631 struct dst_entry *dst = __sk_dst_get(sk);
632 bool ret = false;
633
634 if (dst) {
635 struct tcp_metrics_block *tm;
636
637 rcu_read_lock();
638 tm = tcp_get_metrics(sk, dst, true);
639 if (tm) {
640 struct tcp_sock *tp = tcp_sk(sk);
641
642 if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
643 ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
644 tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
645 tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
646 tm->tcpm_ts = tp->rx_opt.ts_recent;
647 }
648 ret = true;
649 }
650 rcu_read_unlock();
651 }
652 return ret;
653}
654
655bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
656{
657 struct tcp_metrics_block *tm;
658 bool ret = false;
659
660 rcu_read_lock();
661 tm = __tcp_get_metrics_tw(tw);
662 if (tm) {
663 const struct tcp_timewait_sock *tcptw;
664 struct sock *sk = (struct sock *) tw;
665
666 tcptw = tcp_twsk(sk);
667 if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
668 ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
669 tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
670 tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
671 tm->tcpm_ts = tcptw->tw_ts_recent;
672 }
673 ret = true; 541 ret = true;
674 } 542 else
543 ret = false;
675 rcu_read_unlock(); 544 rcu_read_unlock();
676 545
677 return ret; 546 return ret;
@@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
791 jiffies - tm->tcpm_stamp, 660 jiffies - tm->tcpm_stamp,
792 TCP_METRICS_ATTR_PAD) < 0) 661 TCP_METRICS_ATTR_PAD) < 0)
793 goto nla_put_failure; 662 goto nla_put_failure;
794 if (tm->tcpm_ts_stamp) {
795 if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
796 (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
797 goto nla_put_failure;
798 if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
799 tm->tcpm_ts) < 0)
800 goto nla_put_failure;
801 }
802 663
803 { 664 {
804 int n = 0; 665 int n = 0;