diff options
author | David S. Miller <davem@davemloft.net> | 2010-12-02 15:14:29 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-12-02 15:14:29 -0500 |
commit | 493f377d6dd56f4e98b198d637fe714ab124681b (patch) | |
tree | 2e505bb0908fabd1f5cec1e40302e9f5b2d33077 /net/ipv6 | |
parent | ae4694b2d3e4c0f47c0e804a68417be57e5daf85 (diff) |
tcp: Add timewait recycling bits to ipv6 connect code.
This will also improve handling of ipv6 tcp socket request
backlog when syncookies are not enabled. When backlog
becomes very deep, last quarter of backlog is limited to
validated destinations. Previously only ipv4 implemented
this logic, but now ipv6 does too.
Now we are only one step away from enabling timewait
recycling for ipv6, and that step is simply filling in
the implementation of tcp_v6_get_peer() and
tcp_v6_tw_get_peer().
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 101 |
1 files changed, 76 insertions, 25 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5f73a1808e36..c2ebbe1c5a47 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -130,6 +130,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
130 | struct ipv6_pinfo *np = inet6_sk(sk); | 130 | struct ipv6_pinfo *np = inet6_sk(sk); |
131 | struct tcp_sock *tp = tcp_sk(sk); | 131 | struct tcp_sock *tp = tcp_sk(sk); |
132 | struct in6_addr *saddr = NULL, *final_p, final; | 132 | struct in6_addr *saddr = NULL, *final_p, final; |
133 | struct rt6_info *rt; | ||
133 | struct flowi fl; | 134 | struct flowi fl; |
134 | struct dst_entry *dst; | 135 | struct dst_entry *dst; |
135 | int addr_type; | 136 | int addr_type; |
@@ -280,6 +281,26 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
280 | sk->sk_gso_type = SKB_GSO_TCPV6; | 281 | sk->sk_gso_type = SKB_GSO_TCPV6; |
281 | __ip6_dst_store(sk, dst, NULL, NULL); | 282 | __ip6_dst_store(sk, dst, NULL, NULL); |
282 | 283 | ||
284 | rt = (struct rt6_info *) dst; | ||
285 | if (tcp_death_row.sysctl_tw_recycle && | ||
286 | !tp->rx_opt.ts_recent_stamp && | ||
287 | ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) { | ||
288 | struct inet_peer *peer = rt6_get_peer(rt); | ||
289 | /* | ||
290 | * VJ's idea. We save last timestamp seen from | ||
291 | * the destination in peer table, when entering state | ||
292 | * TIME-WAIT * and initialize rx_opt.ts_recent from it, | ||
293 | * when trying new connection. | ||
294 | */ | ||
295 | if (peer) { | ||
296 | inet_peer_refcheck(peer); | ||
297 | if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { | ||
298 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; | ||
299 | tp->rx_opt.ts_recent = peer->tcp_ts; | ||
300 | } | ||
301 | } | ||
302 | } | ||
303 | |||
283 | icsk->icsk_ext_hdr_len = 0; | 304 | icsk->icsk_ext_hdr_len = 0; |
284 | if (np->opt) | 305 | if (np->opt) |
285 | icsk->icsk_ext_hdr_len = (np->opt->opt_flen + | 306 | icsk->icsk_ext_hdr_len = (np->opt->opt_flen + |
@@ -1170,6 +1191,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1170 | struct ipv6_pinfo *np = inet6_sk(sk); | 1191 | struct ipv6_pinfo *np = inet6_sk(sk); |
1171 | struct tcp_sock *tp = tcp_sk(sk); | 1192 | struct tcp_sock *tp = tcp_sk(sk); |
1172 | __u32 isn = TCP_SKB_CB(skb)->when; | 1193 | __u32 isn = TCP_SKB_CB(skb)->when; |
1194 | struct dst_entry *dst = NULL; | ||
1173 | #ifdef CONFIG_SYN_COOKIES | 1195 | #ifdef CONFIG_SYN_COOKIES |
1174 | int want_cookie = 0; | 1196 | int want_cookie = 0; |
1175 | #else | 1197 | #else |
@@ -1267,6 +1289,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1267 | TCP_ECN_create_request(req, tcp_hdr(skb)); | 1289 | TCP_ECN_create_request(req, tcp_hdr(skb)); |
1268 | 1290 | ||
1269 | if (!isn) { | 1291 | if (!isn) { |
1292 | struct inet_peer *peer = NULL; | ||
1293 | |||
1270 | if (ipv6_opt_accepted(sk, skb) || | 1294 | if (ipv6_opt_accepted(sk, skb) || |
1271 | np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || | 1295 | np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || |
1272 | np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { | 1296 | np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { |
@@ -1279,13 +1303,57 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1279 | if (!sk->sk_bound_dev_if && | 1303 | if (!sk->sk_bound_dev_if && |
1280 | ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) | 1304 | ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) |
1281 | treq->iif = inet6_iif(skb); | 1305 | treq->iif = inet6_iif(skb); |
1282 | if (!want_cookie) { | 1306 | |
1283 | isn = tcp_v6_init_sequence(skb); | 1307 | if (want_cookie) { |
1284 | } else { | ||
1285 | isn = cookie_v6_init_sequence(sk, skb, &req->mss); | 1308 | isn = cookie_v6_init_sequence(sk, skb, &req->mss); |
1286 | req->cookie_ts = tmp_opt.tstamp_ok; | 1309 | req->cookie_ts = tmp_opt.tstamp_ok; |
1310 | goto have_isn; | ||
1311 | } | ||
1312 | |||
1313 | /* VJ's idea. We save last timestamp seen | ||
1314 | * from the destination in peer table, when entering | ||
1315 | * state TIME-WAIT, and check against it before | ||
1316 | * accepting new connection request. | ||
1317 | * | ||
1318 | * If "isn" is not zero, this request hit alive | ||
1319 | * timewait bucket, so that all the necessary checks | ||
1320 | * are made in the function processing timewait state. | ||
1321 | */ | ||
1322 | if (tmp_opt.saw_tstamp && | ||
1323 | tcp_death_row.sysctl_tw_recycle && | ||
1324 | (dst = inet6_csk_route_req(sk, req)) != NULL && | ||
1325 | (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && | ||
1326 | ipv6_addr_equal((struct in6_addr *)peer->daddr.a6, | ||
1327 | &treq->rmt_addr)) { | ||
1328 | inet_peer_refcheck(peer); | ||
1329 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | ||
1330 | (s32)(peer->tcp_ts - req->ts_recent) > | ||
1331 | TCP_PAWS_WINDOW) { | ||
1332 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); | ||
1333 | goto drop_and_release; | ||
1334 | } | ||
1335 | } | ||
1336 | /* Kill the following clause, if you dislike this way. */ | ||
1337 | else if (!sysctl_tcp_syncookies && | ||
1338 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < | ||
1339 | (sysctl_max_syn_backlog >> 2)) && | ||
1340 | (!peer || !peer->tcp_ts_stamp) && | ||
1341 | (!dst || !dst_metric(dst, RTAX_RTT))) { | ||
1342 | /* Without syncookies last quarter of | ||
1343 | * backlog is filled with destinations, | ||
1344 | * proven to be alive. | ||
1345 | * It means that we continue to communicate | ||
1346 | * to destinations, already remembered | ||
1347 | * to the moment of synflood. | ||
1348 | */ | ||
1349 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", | ||
1350 | &treq->rmt_addr, ntohs(tcp_hdr(skb)->source)); | ||
1351 | goto drop_and_release; | ||
1287 | } | 1352 | } |
1353 | |||
1354 | isn = tcp_v6_init_sequence(skb); | ||
1288 | } | 1355 | } |
1356 | have_isn: | ||
1289 | tcp_rsk(req)->snt_isn = isn; | 1357 | tcp_rsk(req)->snt_isn = isn; |
1290 | 1358 | ||
1291 | security_inet_conn_request(sk, skb, req); | 1359 | security_inet_conn_request(sk, skb, req); |
@@ -1298,6 +1366,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1298 | inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | 1366 | inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
1299 | return 0; | 1367 | return 0; |
1300 | 1368 | ||
1369 | drop_and_release: | ||
1370 | dst_release(dst); | ||
1301 | drop_and_free: | 1371 | drop_and_free: |
1302 | reqsk_free(req); | 1372 | reqsk_free(req); |
1303 | drop: | 1373 | drop: |
@@ -1376,28 +1446,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1376 | if (sk_acceptq_is_full(sk)) | 1446 | if (sk_acceptq_is_full(sk)) |
1377 | goto out_overflow; | 1447 | goto out_overflow; |
1378 | 1448 | ||
1379 | if (dst == NULL) { | 1449 | if (!dst) { |
1380 | struct in6_addr *final_p, final; | 1450 | dst = inet6_csk_route_req(sk, req); |
1381 | struct flowi fl; | 1451 | if (!dst) |
1382 | |||
1383 | memset(&fl, 0, sizeof(fl)); | ||
1384 | fl.proto = IPPROTO_TCP; | ||
1385 | ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); | ||
1386 | final_p = fl6_update_dst(&fl, opt, &final); | ||
1387 | ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); | ||
1388 | fl.oif = sk->sk_bound_dev_if; | ||
1389 | fl.mark = sk->sk_mark; | ||
1390 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; | ||
1391 | fl.fl_ip_sport = inet_rsk(req)->loc_port; | ||
1392 | security_req_classify_flow(req, &fl); | ||
1393 | |||
1394 | if (ip6_dst_lookup(sk, &dst, &fl)) | ||
1395 | goto out; | ||
1396 | |||
1397 | if (final_p) | ||
1398 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
1399 | |||
1400 | if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) | ||
1401 | goto out; | 1452 | goto out; |
1402 | } | 1453 | } |
1403 | 1454 | ||