tcp: Add timewait recycling bits to ipv6 connect code.

This will also improve handling of ipv6 tcp socket request backlog when syncookies are not enabled. When backlog becomes very deep, last quarter of backlog is limited to validated destinations. Previously only ipv4 implemented this logic, but now ipv6 does too. Now we are only one step away from enabling timewait recycling for ipv6, and that step is simply filling in the implementation of tcp_v6_get_peer() and tcp_v6_tw_get_peer(). Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2010-12-02 15:14:29 -0500
committer: David S. Miller <davem@davemloft.net> 2010-12-02 15:14:29 -0500
commit: 493f377d6dd56f4e98b198d637fe714ab124681b (patch)
tree: 2e505bb0908fabd1f5cec1e40302e9f5b2d33077 /net/ipv6/tcp_ipv6.c
parent: ae4694b2d3e4c0f47c0e804a68417be57e5daf85 (diff)
1 files changed, 76 insertions, 25 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5f73a1808e36..c2ebbe1c5a47 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -130,6 +130,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct in6_addr *saddr = NULL, *final_p, final;
+        struct rt6_info *rt;
        struct flowi fl;
        struct dst_entry *dst;
        int addr_type;
@@ -280,6 +281,26 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        sk->sk_gso_type = SKB_GSO_TCPV6;
        __ip6_dst_store(sk, dst, NULL, NULL);
+        rt = (struct rt6_info *) dst;
+        if (tcp_death_row.sysctl_tw_recycle &&
+            !tp->rx_opt.ts_recent_stamp &&
+            ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
+                struct inet_peer *peer = rt6_get_peer(rt);
+                /*
+                 * VJ's idea. We save last timestamp seen from
+                 * the destination in peer table, when entering state
+                 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
+                 * when trying new connection.
+                 */
+                if (peer) {
+                        inet_peer_refcheck(peer);
+                        if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
+                                tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+                                tp->rx_opt.ts_recent = peer->tcp_ts;
+                        }
+                }
+        }
        icsk->icsk_ext_hdr_len = 0;
        if (np->opt)
                icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
@@ -1170,6 +1191,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        __u32 isn = TCP_SKB_CB(skb)->when;
+        struct dst_entry *dst = NULL;
 #ifdef CONFIG_SYN_COOKIES
        int want_cookie = 0;
 #else
@@ -1267,6 +1289,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
                TCP_ECN_create_request(req, tcp_hdr(skb));
        if (!isn) {
+                struct inet_peer *peer = NULL;
                if (ipv6_opt_accepted(sk, skb) ||
                    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
                    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1279,13 +1303,57 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
                if (!sk->sk_bound_dev_if &&
                    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
                        treq->iif = inet6_iif(skb);
-                if (!want_cookie) {
-                        isn = tcp_v6_init_sequence(skb);
+                if (want_cookie) {
-                } else {
                        isn = cookie_v6_init_sequence(sk, skb, &req->mss);
                        req->cookie_ts = tmp_opt.tstamp_ok;
+                        goto have_isn;
+                }
+                /* VJ's idea. We save last timestamp seen
+                 * from the destination in peer table, when entering
+                 * state TIME-WAIT, and check against it before
+                 * accepting new connection request.
+                 *
+                 * If "isn" is not zero, this request hit alive
+                 * timewait bucket, so that all the necessary checks
+                 * are made in the function processing timewait state.
+                 */
+                if (tmp_opt.saw_tstamp &&
+                    tcp_death_row.sysctl_tw_recycle &&
+                    (dst = inet6_csk_route_req(sk, req)) != NULL &&
+                    (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
+                    ipv6_addr_equal((struct in6_addr *)peer->daddr.a6,
+                                    &treq->rmt_addr)) {
+                        inet_peer_refcheck(peer);
+                        if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
+                            (s32)(peer->tcp_ts - req->ts_recent) >
+                                                        TCP_PAWS_WINDOW) {
+                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+                                goto drop_and_release;
+                        }
+                }
+                /* Kill the following clause, if you dislike this way. */
+                else if (!sysctl_tcp_syncookies &&
+                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+                          (sysctl_max_syn_backlog >> 2)) &&
+                         (!peer || !peer->tcp_ts_stamp) &&
+                         (!dst || !dst_metric(dst, RTAX_RTT))) {
+                        /* Without syncookies last quarter of
+                         * backlog is filled with destinations,
+                         * proven to be alive.
+                         * It means that we continue to communicate
+                         * to destinations, already remembered
+                         * to the moment of synflood.
+                         */
+                        LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
+                                       &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
+                        goto drop_and_release;
                }
+                isn = tcp_v6_init_sequence(skb);
        }
+have_isn:
        tcp_rsk(req)->snt_isn = isn;
        security_inet_conn_request(sk, skb, req);
@@ -1298,6 +1366,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
        return 0;
+drop_and_release:
+        dst_release(dst);
 drop_and_free:
        reqsk_free(req);
 drop:
@@ -1376,28 +1446,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        if (sk_acceptq_is_full(sk))
                goto out_overflow;
-        if (dst == NULL) {
+        if (!dst) {
-                struct in6_addr *final_p, final;
+                dst = inet6_csk_route_req(sk, req);
-                struct flowi fl;
+                if (!dst)
-                memset(&fl, 0, sizeof(fl));
-                fl.proto = IPPROTO_TCP;
-                ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-                final_p = fl6_update_dst(&fl, opt, &final);
-                ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
-                fl.oif = sk->sk_bound_dev_if;
-                fl.mark = sk->sk_mark;
-                fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-                fl.fl_ip_sport = inet_rsk(req)->loc_port;
-                security_req_classify_flow(req, &fl);
-                if (ip6_dst_lookup(sk, &dst, &fl))
-                        goto out;
-                if (final_p)
-                        ipv6_addr_copy(&fl.fl6_dst, final_p);
-                if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
                        goto out;
        }
author	David S. Miller <davem@davemloft.net>	2010-12-02 15:14:29 -0500
committer	David S. Miller <davem@davemloft.net>	2010-12-02 15:14:29 -0500
commit	493f377d6dd56f4e98b198d637fe714ab124681b (patch)
tree	2e505bb0908fabd1f5cec1e40302e9f5b2d33077 /net/ipv6/tcp_ipv6.c
parent	ae4694b2d3e4c0f47c0e804a68417be57e5daf85 (diff)

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5f73a1808e36..c2ebbe1c5a47 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c
@@ -130,6 +130,7 @@ static int tcp_v6_connect(struct sock sk, struct sockaddr uaddr,
130	struct ipv6_pinfo *np = inet6_sk(sk);	130	struct ipv6_pinfo *np = inet6_sk(sk);
131	struct tcp_sock *tp = tcp_sk(sk);	131	struct tcp_sock *tp = tcp_sk(sk);
132	struct in6_addr saddr = NULL, final_p, final;	132	struct in6_addr saddr = NULL, final_p, final;
		133	struct rt6_info *rt;
133	struct flowi fl;	134	struct flowi fl;
134	struct dst_entry *dst;	135	struct dst_entry *dst;
135	int addr_type;	136	int addr_type;
@@ -280,6 +281,26 @@ static int tcp_v6_connect(struct sock sk, struct sockaddr uaddr,
280	sk->sk_gso_type = SKB_GSO_TCPV6;	281	sk->sk_gso_type = SKB_GSO_TCPV6;
281	__ip6_dst_store(sk, dst, NULL, NULL);	282	__ip6_dst_store(sk, dst, NULL, NULL);
282		283
		284	rt = (struct rt6_info *) dst;
		285	if (tcp_death_row.sysctl_tw_recycle &&
		286	!tp->rx_opt.ts_recent_stamp &&
		287	ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
		288	struct inet_peer *peer = rt6_get_peer(rt);
		289	/*
		290	* VJ's idea. We save last timestamp seen from
		291	* the destination in peer table, when entering state
		292	* TIME-WAIT * and initialize rx_opt.ts_recent from it,
		293	* when trying new connection.
		294	*/
		295	if (peer) {
		296	inet_peer_refcheck(peer);
		297	if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
		298	tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
		299	tp->rx_opt.ts_recent = peer->tcp_ts;
		300	}
		301	}
		302	}
		303
283	icsk->icsk_ext_hdr_len = 0;	304	icsk->icsk_ext_hdr_len = 0;
284	if (np->opt)	305	if (np->opt)
285	icsk->icsk_ext_hdr_len = (np->opt->opt_flen +	306	icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
@@ -1170,6 +1191,7 @@ static int tcp_v6_conn_request(struct sock sk, struct sk_buff skb)
1170	struct ipv6_pinfo *np = inet6_sk(sk);	1191	struct ipv6_pinfo *np = inet6_sk(sk);
1171	struct tcp_sock *tp = tcp_sk(sk);	1192	struct tcp_sock *tp = tcp_sk(sk);
1172	__u32 isn = TCP_SKB_CB(skb)->when;	1193	__u32 isn = TCP_SKB_CB(skb)->when;
		1194	struct dst_entry *dst = NULL;
1173	#ifdef CONFIG_SYN_COOKIES	1195	#ifdef CONFIG_SYN_COOKIES
1174	int want_cookie = 0;	1196	int want_cookie = 0;
1175	#else	1197	#else
@@ -1267,6 +1289,8 @@ static int tcp_v6_conn_request(struct sock sk, struct sk_buff skb)
1267	TCP_ECN_create_request(req, tcp_hdr(skb));	1289	TCP_ECN_create_request(req, tcp_hdr(skb));
1268		1290
1269	if (!isn) {	1291	if (!isn) {
		1292	struct inet_peer *peer = NULL;
		1293
1270	if (ipv6_opt_accepted(sk, skb) \|\|	1294	if (ipv6_opt_accepted(sk, skb) \|\|
1271	np->rxopt.bits.rxinfo \|\| np->rxopt.bits.rxoinfo \|\|	1295	np->rxopt.bits.rxinfo \|\| np->rxopt.bits.rxoinfo \|\|
1272	np->rxopt.bits.rxhlim \|\| np->rxopt.bits.rxohlim) {	1296	np->rxopt.bits.rxhlim \|\| np->rxopt.bits.rxohlim) {
@@ -1279,13 +1303,57 @@ static int tcp_v6_conn_request(struct sock sk, struct sk_buff skb)
1279	if (!sk->sk_bound_dev_if &&	1303	if (!sk->sk_bound_dev_if &&
1280	ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)	1304	ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1281	treq->iif = inet6_iif(skb);	1305	treq->iif = inet6_iif(skb);
1282	if (!want_cookie) {	1306
1283	isn = tcp_v6_init_sequence(skb);	1307	if (want_cookie) {
1284	} else {
1285	isn = cookie_v6_init_sequence(sk, skb, &req->mss);	1308	isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1286	req->cookie_ts = tmp_opt.tstamp_ok;	1309	req->cookie_ts = tmp_opt.tstamp_ok;
		1310	goto have_isn;
		1311	}
		1312
		1313	/* VJ's idea. We save last timestamp seen
		1314	* from the destination in peer table, when entering
		1315	* state TIME-WAIT, and check against it before
		1316	* accepting new connection request.
		1317	*
		1318	* If "isn" is not zero, this request hit alive
		1319	* timewait bucket, so that all the necessary checks
		1320	* are made in the function processing timewait state.
		1321	*/
		1322	if (tmp_opt.saw_tstamp &&
		1323	tcp_death_row.sysctl_tw_recycle &&
		1324	(dst = inet6_csk_route_req(sk, req)) != NULL &&
		1325	(peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
		1326	ipv6_addr_equal((struct in6_addr *)peer->daddr.a6,
		1327	&treq->rmt_addr)) {
		1328	inet_peer_refcheck(peer);
		1329	if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
		1330	(s32)(peer->tcp_ts - req->ts_recent) >
		1331	TCP_PAWS_WINDOW) {
		1332	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
		1333	goto drop_and_release;
		1334	}
		1335	}
		1336	/* Kill the following clause, if you dislike this way. */
		1337	else if (!sysctl_tcp_syncookies &&
		1338	(sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
		1339	(sysctl_max_syn_backlog >> 2)) &&
		1340	(!peer \|\| !peer->tcp_ts_stamp) &&
		1341	(!dst \|\| !dst_metric(dst, RTAX_RTT))) {
		1342	/* Without syncookies last quarter of
		1343	* backlog is filled with destinations,
		1344	* proven to be alive.
		1345	* It means that we continue to communicate
		1346	* to destinations, already remembered
		1347	* to the moment of synflood.
		1348	*/
		1349	LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
		1350	&treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
		1351	goto drop_and_release;
1287	}	1352	}
		1353
		1354	isn = tcp_v6_init_sequence(skb);
1288	}	1355	}
		1356	have_isn:
1289	tcp_rsk(req)->snt_isn = isn;	1357	tcp_rsk(req)->snt_isn = isn;
1290		1358
1291	security_inet_conn_request(sk, skb, req);	1359	security_inet_conn_request(sk, skb, req);
@@ -1298,6 +1366,8 @@ static int tcp_v6_conn_request(struct sock sk, struct sk_buff skb)
1298	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);	1366	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1299	return 0;	1367	return 0;
1300		1368
		1369	drop_and_release:
		1370	dst_release(dst);
1301	drop_and_free:	1371	drop_and_free:
1302	reqsk_free(req);	1372	reqsk_free(req);
1303	drop:	1373	drop:
@@ -1376,28 +1446,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock sk, struct sk_buff skb,
1376	if (sk_acceptq_is_full(sk))	1446	if (sk_acceptq_is_full(sk))
1377	goto out_overflow;	1447	goto out_overflow;
1378		1448
1379	if (dst == NULL) {	1449	if (!dst) {
1380	struct in6_addr *final_p, final;	1450	dst = inet6_csk_route_req(sk, req);
1381	struct flowi fl;	1451	if (!dst)
1382
1383	memset(&fl, 0, sizeof(fl));
1384	fl.proto = IPPROTO_TCP;
1385	ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1386	final_p = fl6_update_dst(&fl, opt, &final);
1387	ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1388	fl.oif = sk->sk_bound_dev_if;
1389	fl.mark = sk->sk_mark;
1390	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1391	fl.fl_ip_sport = inet_rsk(req)->loc_port;
1392	security_req_classify_flow(req, &fl);
1393
1394	if (ip6_dst_lookup(sk, &dst, &fl))
1395	goto out;
1396
1397	if (final_p)
1398	ipv6_addr_copy(&fl.fl6_dst, final_p);
1399
1400	if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
1401	goto out;	1452	goto out;
1402	}	1453	}
1403		1454