aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-07-10 06:14:24 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-11 01:40:08 -0400
commit81166dd6fa8eb780b2132d32fbc77eb6ac04e44e (patch)
tree7a41b12950640648d3bb95ca74bfa1391efa0421 /net/ipv4
parent94334d5ed4b64ebcd2c4b421e133b921f8ccf75d (diff)
tcp: Move timestamps from inetpeer to metrics cache.
With help from Lin Ming. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inetpeer.c1
-rw-r--r--net/ipv4/route.c8
-rw-r--r--net/ipv4/tcp_ipv4.c30
-rw-r--r--net/ipv4/tcp_metrics.c136
-rw-r--r--net/ipv4/tcp_minisocks.c46
5 files changed, 138 insertions, 83 deletions
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index da90a8cab61..f457bcb4135 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -508,7 +508,6 @@ relookup:
508 (daddr->family == AF_INET) ? 508 (daddr->family == AF_INET) ?
509 secure_ip_id(daddr->addr.a4) : 509 secure_ip_id(daddr->addr.a4) :
510 secure_ipv6_id(daddr->addr.a6)); 510 secure_ipv6_id(daddr->addr.a6));
511 p->tcp_ts_stamp = 0;
512 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; 511 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
513 p->rate_tokens = 0; 512 p->rate_tokens = 0;
514 p->rate_last = 0; 513 p->rate_last = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d02c91177d3..78d81543766 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2846,7 +2846,7 @@ static int rt_fill_info(struct net *net,
2846 struct rtmsg *r; 2846 struct rtmsg *r;
2847 struct nlmsghdr *nlh; 2847 struct nlmsghdr *nlh;
2848 unsigned long expires = 0; 2848 unsigned long expires = 0;
2849 u32 id = 0, ts = 0, tsage = 0, error; 2849 u32 id = 0, error;
2850 2850
2851 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); 2851 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2852 if (nlh == NULL) 2852 if (nlh == NULL)
@@ -2903,10 +2903,6 @@ static int rt_fill_info(struct net *net,
2903 const struct inet_peer *peer = rt_peer_ptr(rt); 2903 const struct inet_peer *peer = rt_peer_ptr(rt);
2904 inet_peer_refcheck(peer); 2904 inet_peer_refcheck(peer);
2905 id = atomic_read(&peer->ip_id_count) & 0xffff; 2905 id = atomic_read(&peer->ip_id_count) & 0xffff;
2906 if (peer->tcp_ts_stamp) {
2907 ts = peer->tcp_ts;
2908 tsage = get_seconds() - peer->tcp_ts_stamp;
2909 }
2910 expires = ACCESS_ONCE(peer->pmtu_expires); 2906 expires = ACCESS_ONCE(peer->pmtu_expires);
2911 if (expires) { 2907 if (expires) {
2912 if (time_before(jiffies, expires)) 2908 if (time_before(jiffies, expires))
@@ -2942,7 +2938,7 @@ static int rt_fill_info(struct net *net,
2942 goto nla_put_failure; 2938 goto nla_put_failure;
2943 } 2939 }
2944 2940
2945 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, 2941 if (rtnl_put_cacheinfo(skb, &rt->dst, id, 0, 0,
2946 expires, error) < 0) 2942 expires, error) < 0)
2947 goto nla_put_failure; 2943 goto nla_put_failure;
2948 2944
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e9312a8f33a..d406bf7f37d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -209,22 +209,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
209 } 209 }
210 210
211 if (tcp_death_row.sysctl_tw_recycle && 211 if (tcp_death_row.sysctl_tw_recycle &&
212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { 212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
213 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); 213 tcp_fetch_timewait_stamp(sk, &rt->dst);
214 /*
215 * VJ's idea. We save last timestamp seen from
216 * the destination in peer table, when entering state
217 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
218 * when trying new connection.
219 */
220 if (peer) {
221 inet_peer_refcheck(peer);
222 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
223 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
224 tp->rx_opt.ts_recent = peer->tcp_ts;
225 }
226 }
227 }
228 214
229 inet->inet_dport = usin->sin_port; 215 inet->inet_dport = usin->sin_port;
230 inet->inet_daddr = daddr; 216 inet->inet_daddr = daddr;
@@ -1375,7 +1361,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1375 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1361 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1376 req->cookie_ts = tmp_opt.tstamp_ok; 1362 req->cookie_ts = tmp_opt.tstamp_ok;
1377 } else if (!isn) { 1363 } else if (!isn) {
1378 struct inet_peer *peer = NULL;
1379 struct flowi4 fl4; 1364 struct flowi4 fl4;
1380 1365
1381 /* VJ's idea. We save last timestamp seen 1366 /* VJ's idea. We save last timestamp seen
@@ -1390,12 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1390 if (tmp_opt.saw_tstamp && 1375 if (tmp_opt.saw_tstamp &&
1391 tcp_death_row.sysctl_tw_recycle && 1376 tcp_death_row.sysctl_tw_recycle &&
1392 (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && 1377 (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL &&
1393 fl4.daddr == saddr && 1378 fl4.daddr == saddr) {
1394 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { 1379 if (!tcp_peer_is_proven(req, dst, true)) {
1395 inet_peer_refcheck(peer);
1396 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1397 (s32)(peer->tcp_ts - req->ts_recent) >
1398 TCP_PAWS_WINDOW) {
1399 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 1380 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1400 goto drop_and_release; 1381 goto drop_and_release;
1401 } 1382 }
@@ -1404,8 +1385,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1404 else if (!sysctl_tcp_syncookies && 1385 else if (!sysctl_tcp_syncookies &&
1405 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 1386 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1406 (sysctl_max_syn_backlog >> 2)) && 1387 (sysctl_max_syn_backlog >> 2)) &&
1407 (!peer || !peer->tcp_ts_stamp) && 1388 !tcp_peer_is_proven(req, dst, false)) {
1408 !tcp_peer_is_proven(req, dst)) {
1409 /* Without syncookies last quarter of 1389 /* Without syncookies last quarter of
1410 * backlog is filled with destinations, 1390 * backlog is filled with destinations,
1411 * proven to be alive. 1391 * proven to be alive.
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 56223bab251..1fd83d3118f 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -34,6 +34,8 @@ struct tcp_metrics_block {
34 struct tcp_metrics_block __rcu *tcpm_next; 34 struct tcp_metrics_block __rcu *tcpm_next;
35 struct inetpeer_addr tcpm_addr; 35 struct inetpeer_addr tcpm_addr;
36 unsigned long tcpm_stamp; 36 unsigned long tcpm_stamp;
37 u32 tcpm_ts;
38 u32 tcpm_ts_stamp;
37 u32 tcpm_lock; 39 u32 tcpm_lock;
38 u32 tcpm_vals[TCP_METRIC_MAX]; 40 u32 tcpm_vals[TCP_METRIC_MAX];
39}; 41};
@@ -114,6 +116,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
114 tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); 116 tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
115 tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); 117 tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
116 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); 118 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
119 tm->tcpm_ts = 0;
120 tm->tcpm_ts_stamp = 0;
117} 121}
118 122
119static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, 123static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
@@ -230,6 +234,45 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
230 return tm; 234 return tm;
231} 235}
232 236
237static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
238{
239 struct inet6_timewait_sock *tw6;
240 struct tcp_metrics_block *tm;
241 struct inetpeer_addr addr;
242 unsigned int hash;
243 struct net *net;
244
245 addr.family = tw->tw_family;
246 switch (addr.family) {
247 case AF_INET:
248 addr.addr.a4 = tw->tw_daddr;
249 hash = (__force unsigned int) addr.addr.a4;
250 break;
251 case AF_INET6:
252 tw6 = inet6_twsk((struct sock *)tw);
253 *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr;
254 hash = ((__force unsigned int) addr.addr.a6[0] ^
255 (__force unsigned int) addr.addr.a6[1] ^
256 (__force unsigned int) addr.addr.a6[2] ^
257 (__force unsigned int) addr.addr.a6[3]);
258 break;
259 default:
260 return NULL;
261 }
262
263 hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8);
264
265 net = twsk_net(tw);
266 hash &= net->ipv4.tcp_metrics_hash_mask;
267
268 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
269 tm = rcu_dereference(tm->tcpm_next)) {
270 if (addr_same(&tm->tcpm_addr, &addr))
271 break;
272 }
273 return tm;
274}
275
233static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, 276static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
234 struct dst_entry *dst, 277 struct dst_entry *dst,
235 bool create) 278 bool create)
@@ -496,7 +539,7 @@ reset:
496 tp->snd_cwnd_stamp = tcp_time_stamp; 539 tp->snd_cwnd_stamp = tcp_time_stamp;
497} 540}
498 541
499bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) 542bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check)
500{ 543{
501 struct tcp_metrics_block *tm; 544 struct tcp_metrics_block *tm;
502 bool ret; 545 bool ret;
@@ -506,16 +549,99 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
506 549
507 rcu_read_lock(); 550 rcu_read_lock();
508 tm = __tcp_get_metrics_req(req, dst); 551 tm = __tcp_get_metrics_req(req, dst);
509 if (tm && tcp_metric_get(tm, TCP_METRIC_RTT)) 552 if (paws_check) {
510 ret = true; 553 if (tm &&
511 else 554 (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
512 ret = false; 555 (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW)
556 ret = false;
557 else
558 ret = true;
559 } else {
560 if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
561 ret = true;
562 else
563 ret = false;
564 }
513 rcu_read_unlock(); 565 rcu_read_unlock();
514 566
515 return ret; 567 return ret;
516} 568}
517EXPORT_SYMBOL_GPL(tcp_peer_is_proven); 569EXPORT_SYMBOL_GPL(tcp_peer_is_proven);
518 570
571void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
572{
573 struct tcp_metrics_block *tm;
574
575 rcu_read_lock();
576 tm = tcp_get_metrics(sk, dst, true);
577 if (tm) {
578 struct tcp_sock *tp = tcp_sk(sk);
579
580 if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
581 tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
582 tp->rx_opt.ts_recent = tm->tcpm_ts;
583 }
584 }
585 rcu_read_unlock();
586}
587EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
588
589/* VJ's idea. Save last timestamp seen from this destination and hold
590 * it at least for normal timewait interval to use for duplicate
591 * segment detection in subsequent connections, before they enter
592 * synchronized state.
593 */
594bool tcp_remember_stamp(struct sock *sk)
595{
596 struct dst_entry *dst = __sk_dst_get(sk);
597 bool ret = false;
598
599 if (dst) {
600 struct tcp_metrics_block *tm;
601
602 rcu_read_lock();
603 tm = tcp_get_metrics(sk, dst, true);
604 if (tm) {
605 struct tcp_sock *tp = tcp_sk(sk);
606
607 if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
608 ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
609 tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
610 tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
611 tm->tcpm_ts = tp->rx_opt.ts_recent;
612 }
613 ret = true;
614 }
615 rcu_read_unlock();
616 }
617 return ret;
618}
619
620bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
621{
622 struct tcp_metrics_block *tm;
623 bool ret = false;
624
625 rcu_read_lock();
626 tm = __tcp_get_metrics_tw(tw);
627 if (tw) {
628 const struct tcp_timewait_sock *tcptw;
629 struct sock *sk = (struct sock *) tw;
630
631 tcptw = tcp_twsk(sk);
632 if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
633 ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
634 tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
635 tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
636 tm->tcpm_ts = tcptw->tw_ts_recent;
637 }
638 ret = true;
639 }
640 rcu_read_unlock();
641
642 return ret;
643}
644
519static unsigned long tcpmhash_entries; 645static unsigned long tcpmhash_entries;
520static int __init set_tcpmhash_entries(char *str) 646static int __init set_tcpmhash_entries(char *str)
521{ 647{
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 72b7c63b1a3..a51aa534dab 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -49,52 +49,6 @@ struct inet_timewait_death_row tcp_death_row = {
49}; 49};
50EXPORT_SYMBOL_GPL(tcp_death_row); 50EXPORT_SYMBOL_GPL(tcp_death_row);
51 51
52/* VJ's idea. Save last timestamp seen from this destination
53 * and hold it at least for normal timewait interval to use for duplicate
54 * segment detection in subsequent connections, before they enter synchronized
55 * state.
56 */
57
58static bool tcp_remember_stamp(struct sock *sk)
59{
60 const struct inet_connection_sock *icsk = inet_csk(sk);
61 struct tcp_sock *tp = tcp_sk(sk);
62 struct inet_peer *peer;
63
64 peer = icsk->icsk_af_ops->get_peer(sk);
65 if (peer) {
66 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
67 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
68 peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
69 peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
70 peer->tcp_ts = tp->rx_opt.ts_recent;
71 }
72 return true;
73 }
74
75 return false;
76}
77
78static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
79{
80 const struct tcp_timewait_sock *tcptw;
81 struct sock *sk = (struct sock *) tw;
82 struct inet_peer *peer;
83
84 tcptw = tcp_twsk(sk);
85 peer = tcptw->tw_peer;
86 if (peer) {
87 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
88 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
89 peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
90 peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
91 peer->tcp_ts = tcptw->tw_ts_recent;
92 }
93 return true;
94 }
95 return false;
96}
97
98static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) 52static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
99{ 53{
100 if (seq == s_win) 54 if (seq == s_win)