aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
authorJerry Chu <hkchu@google.com>2012-08-31 08:29:13 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-31 20:02:19 -0400
commit168a8f58059a22feb9e9a2dcc1b8053dbbbc12ef (patch)
tree0d5b9181b840c9b6b08b1452004f0746e8eebab8 /net/ipv4/tcp_ipv4.c
parent8336886f786fdacbc19b719c1f7ea91eb70706d4 (diff)
tcp: TCP Fast Open Server - main code path
This patch adds the main processing path to complete the TFO server patches. A TFO request (i.e., SYN+data packet with a TFO cookie option) first gets processed in tcp_v4_conn_request(). If it passes the various TFO checks by tcp_fastopen_check(), a child socket will be created right away to be accepted by applications, rather than waiting for the 3WHS to finish. In additon to the use of TFO cookie, a simple max_qlen based scheme is put in place to fend off spoofed TFO attack. When a valid ACK comes back to tcp_rcv_state_process(), it will cause the state of the child socket to switch from either TCP_SYN_RECV to TCP_ESTABLISHED, or TCP_FIN_WAIT1 to TCP_FIN_WAIT2. At this time retransmission will resume for any unack'ed (data, FIN,...) segments. Signed-off-by: H.K. Jerry Chu <hkchu@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c265
1 files changed, 251 insertions, 14 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bb148dee1edd..e64abed249cc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -352,6 +352,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
352 const int code = icmp_hdr(icmp_skb)->code; 352 const int code = icmp_hdr(icmp_skb)->code;
353 struct sock *sk; 353 struct sock *sk;
354 struct sk_buff *skb; 354 struct sk_buff *skb;
355 struct request_sock *req;
355 __u32 seq; 356 __u32 seq;
356 __u32 remaining; 357 __u32 remaining;
357 int err; 358 int err;
@@ -394,9 +395,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
394 395
395 icsk = inet_csk(sk); 396 icsk = inet_csk(sk);
396 tp = tcp_sk(sk); 397 tp = tcp_sk(sk);
398 req = tp->fastopen_rsk;
397 seq = ntohl(th->seq); 399 seq = ntohl(th->seq);
398 if (sk->sk_state != TCP_LISTEN && 400 if (sk->sk_state != TCP_LISTEN &&
399 !between(seq, tp->snd_una, tp->snd_nxt)) { 401 !between(seq, tp->snd_una, tp->snd_nxt) &&
402 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
403 /* For a Fast Open socket, allow seq to be snt_isn. */
400 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 404 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
401 goto out; 405 goto out;
402 } 406 }
@@ -435,6 +439,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
435 !icsk->icsk_backoff) 439 !icsk->icsk_backoff)
436 break; 440 break;
437 441
442 /* XXX (TFO) - revisit the following logic for TFO */
443
438 if (sock_owned_by_user(sk)) 444 if (sock_owned_by_user(sk))
439 break; 445 break;
440 446
@@ -466,6 +472,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
466 goto out; 472 goto out;
467 } 473 }
468 474
475 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
476 * than following the TCP_SYN_RECV case and closing the socket,
477 * we ignore the ICMP error and keep trying like a fully established
478 * socket. Is this the right thing to do?
479 */
480 if (req && req->sk == NULL)
481 goto out;
482
469 switch (sk->sk_state) { 483 switch (sk->sk_state) {
470 struct request_sock *req, **prev; 484 struct request_sock *req, **prev;
471 case TCP_LISTEN: 485 case TCP_LISTEN:
@@ -498,7 +512,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
498 512
499 case TCP_SYN_SENT: 513 case TCP_SYN_SENT:
500 case TCP_SYN_RECV: /* Cannot happen. 514 case TCP_SYN_RECV: /* Cannot happen.
501 It can f.e. if SYNs crossed. 515 It can f.e. if SYNs crossed,
516 or Fast Open.
502 */ 517 */
503 if (!sock_owned_by_user(sk)) { 518 if (!sock_owned_by_user(sk)) {
504 sk->sk_err = err; 519 sk->sk_err = err;
@@ -809,8 +824,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
809static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 824static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
810 struct request_sock *req) 825 struct request_sock *req)
811{ 826{
812 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, 827 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
813 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, 828 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
829 */
830 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
831 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
832 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
814 req->ts_recent, 833 req->ts_recent,
815 0, 834 0,
816 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 835 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -1272,6 +1291,178 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1272}; 1291};
1273#endif 1292#endif
1274 1293
1294static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1295 struct request_sock *req,
1296 struct tcp_fastopen_cookie *foc,
1297 struct tcp_fastopen_cookie *valid_foc)
1298{
1299 bool skip_cookie = false;
1300 struct fastopen_queue *fastopenq;
1301
1302 if (likely(!fastopen_cookie_present(foc))) {
1303 /* See include/net/tcp.h for the meaning of these knobs */
1304 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1305 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1306 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1307 skip_cookie = true; /* no cookie to validate */
1308 else
1309 return false;
1310 }
1311 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1312 /* A FO option is present; bump the counter. */
1313 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1314
1315 /* Make sure the listener has enabled fastopen, and we don't
1316 * exceed the max # of pending TFO requests allowed before trying
1317 * to validating the cookie in order to avoid burning CPU cycles
1318 * unnecessarily.
1319 *
1320 * XXX (TFO) - The implication of checking the max_qlen before
1321 * processing a cookie request is that clients can't differentiate
1322 * between qlen overflow causing Fast Open to be disabled
1323 * temporarily vs a server not supporting Fast Open at all.
1324 */
1325 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1326 fastopenq == NULL || fastopenq->max_qlen == 0)
1327 return false;
1328
1329 if (fastopenq->qlen >= fastopenq->max_qlen) {
1330 struct request_sock *req1;
1331 spin_lock(&fastopenq->lock);
1332 req1 = fastopenq->rskq_rst_head;
1333 if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1334 spin_unlock(&fastopenq->lock);
1335 NET_INC_STATS_BH(sock_net(sk),
1336 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1337 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1338 foc->len = -1;
1339 return false;
1340 }
1341 fastopenq->rskq_rst_head = req1->dl_next;
1342 fastopenq->qlen--;
1343 spin_unlock(&fastopenq->lock);
1344 reqsk_free(req1);
1345 }
1346 if (skip_cookie) {
1347 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1348 return true;
1349 }
1350 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1351 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1352 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1353 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1354 memcmp(&foc->val[0], &valid_foc->val[0],
1355 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1356 return false;
1357 valid_foc->len = -1;
1358 }
1359 /* Acknowledge the data received from the peer. */
1360 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1361 return true;
1362 } else if (foc->len == 0) { /* Client requesting a cookie */
1363 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1364 NET_INC_STATS_BH(sock_net(sk),
1365 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1366 } else {
1367 /* Client sent a cookie with wrong size. Treat it
1368 * the same as invalid and return a valid one.
1369 */
1370 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1371 }
1372 return false;
1373}
1374
1375static int tcp_v4_conn_req_fastopen(struct sock *sk,
1376 struct sk_buff *skb,
1377 struct sk_buff *skb_synack,
1378 struct request_sock *req,
1379 struct request_values *rvp)
1380{
1381 struct tcp_sock *tp = tcp_sk(sk);
1382 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1383 const struct inet_request_sock *ireq = inet_rsk(req);
1384 struct sock *child;
1385
1386 req->retrans = 0;
1387 req->sk = NULL;
1388
1389 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1390 if (child == NULL) {
1391 NET_INC_STATS_BH(sock_net(sk),
1392 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1393 kfree_skb(skb_synack);
1394 return -1;
1395 }
1396 ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1397 ireq->rmt_addr, ireq->opt);
1398 /* XXX (TFO) - is it ok to ignore error and continue? */
1399
1400 spin_lock(&queue->fastopenq->lock);
1401 queue->fastopenq->qlen++;
1402 spin_unlock(&queue->fastopenq->lock);
1403
1404 /* Initialize the child socket. Have to fix some values to take
1405 * into account the child is a Fast Open socket and is created
1406 * only out of the bits carried in the SYN packet.
1407 */
1408 tp = tcp_sk(child);
1409
1410 tp->fastopen_rsk = req;
1411 /* Do a hold on the listner sk so that if the listener is being
1412 * closed, the child that has been accepted can live on and still
1413 * access listen_lock.
1414 */
1415 sock_hold(sk);
1416 tcp_rsk(req)->listener = sk;
1417
1418 /* RFC1323: The window in SYN & SYN/ACK segments is never
1419 * scaled. So correct it appropriately.
1420 */
1421 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1422
1423 /* Activate the retrans timer so that SYNACK can be retransmitted.
1424 * The request socket is not added to the SYN table of the parent
1425 * because it's been added to the accept queue directly.
1426 */
1427 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
1428 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
1429
1430 /* Add the child socket directly into the accept queue */
1431 inet_csk_reqsk_queue_add(sk, req, child);
1432
1433 /* Now finish processing the fastopen child socket. */
1434 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1435 tcp_init_congestion_control(child);
1436 tcp_mtup_init(child);
1437 tcp_init_buffer_space(child);
1438 tcp_init_metrics(child);
1439
1440 /* Queue the data carried in the SYN packet. We need to first
1441 * bump skb's refcnt because the caller will attempt to free it.
1442 *
1443 * XXX (TFO) - we honor a zero-payload TFO request for now.
1444 * (Any reason not to?)
1445 */
1446 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1447 /* Don't queue the skb if there is no payload in SYN.
1448 * XXX (TFO) - How about SYN+FIN?
1449 */
1450 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1451 } else {
1452 skb = skb_get(skb);
1453 skb_dst_drop(skb);
1454 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1455 skb_set_owner_r(skb, child);
1456 __skb_queue_tail(&child->sk_receive_queue, skb);
1457 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1458 }
1459 sk->sk_data_ready(sk, 0);
1460 bh_unlock_sock(child);
1461 sock_put(child);
1462 WARN_ON(req->sk == NULL);
1463 return 0;
1464}
1465
1275int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1466int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1276{ 1467{
1277 struct tcp_extend_values tmp_ext; 1468 struct tcp_extend_values tmp_ext;
@@ -1285,6 +1476,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1285 __be32 daddr = ip_hdr(skb)->daddr; 1476 __be32 daddr = ip_hdr(skb)->daddr;
1286 __u32 isn = TCP_SKB_CB(skb)->when; 1477 __u32 isn = TCP_SKB_CB(skb)->when;
1287 bool want_cookie = false; 1478 bool want_cookie = false;
1479 struct flowi4 fl4;
1480 struct tcp_fastopen_cookie foc = { .len = -1 };
1481 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1482 struct sk_buff *skb_synack;
1483 int do_fastopen;
1288 1484
1289 /* Never answer to SYNs send to broadcast or multicast */ 1485 /* Never answer to SYNs send to broadcast or multicast */
1290 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 1486 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1319,7 +1515,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1319 tcp_clear_options(&tmp_opt); 1515 tcp_clear_options(&tmp_opt);
1320 tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 1516 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1321 tmp_opt.user_mss = tp->rx_opt.user_mss; 1517 tmp_opt.user_mss = tp->rx_opt.user_mss;
1322 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 1518 tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
1519 want_cookie ? NULL : &foc);
1323 1520
1324 if (tmp_opt.cookie_plus > 0 && 1521 if (tmp_opt.cookie_plus > 0 &&
1325 tmp_opt.saw_tstamp && 1522 tmp_opt.saw_tstamp &&
@@ -1377,8 +1574,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1377 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1574 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1378 req->cookie_ts = tmp_opt.tstamp_ok; 1575 req->cookie_ts = tmp_opt.tstamp_ok;
1379 } else if (!isn) { 1576 } else if (!isn) {
1380 struct flowi4 fl4;
1381
1382 /* VJ's idea. We save last timestamp seen 1577 /* VJ's idea. We save last timestamp seen
1383 * from the destination in peer table, when entering 1578 * from the destination in peer table, when entering
1384 * state TIME-WAIT, and check against it before 1579 * state TIME-WAIT, and check against it before
@@ -1419,14 +1614,52 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1419 tcp_rsk(req)->snt_isn = isn; 1614 tcp_rsk(req)->snt_isn = isn;
1420 tcp_rsk(req)->snt_synack = tcp_time_stamp; 1615 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1421 1616
1422 if (tcp_v4_send_synack(sk, dst, req, 1617 if (dst == NULL) {
1423 (struct request_values *)&tmp_ext, 1618 dst = inet_csk_route_req(sk, &fl4, req);
1424 skb_get_queue_mapping(skb), 1619 if (dst == NULL)
1425 want_cookie) || 1620 goto drop_and_free;
1426 want_cookie) 1621 }
1622 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1623
1624 /* We don't call tcp_v4_send_synack() directly because we need
1625 * to make sure a child socket can be created successfully before
1626 * sending back synack!
1627 *
1628 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1629 * (or better yet, call tcp_send_synack() in the child context
1630 * directly, but will have to fix bunch of other code first)
1631 * after syn_recv_sock() except one will need to first fix the
1632 * latter to remove its dependency on the current implementation
1633 * of tcp_v4_send_synack()->tcp_select_initial_window().
1634 */
1635 skb_synack = tcp_make_synack(sk, dst, req,
1636 (struct request_values *)&tmp_ext,
1637 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1638
1639 if (skb_synack) {
1640 __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
1641 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1642 } else
1643 goto drop_and_free;
1644
1645 if (likely(!do_fastopen)) {
1646 int err;
1647 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1648 ireq->rmt_addr, ireq->opt);
1649 err = net_xmit_eval(err);
1650 if (err || want_cookie)
1651 goto drop_and_free;
1652
1653 tcp_rsk(req)->listener = NULL;
1654 /* Add the request_sock to the SYN table */
1655 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1656 if (fastopen_cookie_present(&foc) && foc.len != 0)
1657 NET_INC_STATS_BH(sock_net(sk),
1658 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1659 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
1660 (struct request_values *)&tmp_ext))
1427 goto drop_and_free; 1661 goto drop_and_free;
1428 1662
1429 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1430 return 0; 1663 return 0;
1431 1664
1432drop_and_release: 1665drop_and_release:
@@ -1977,6 +2210,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
1977 tcp_cookie_values_release); 2210 tcp_cookie_values_release);
1978 tp->cookie_values = NULL; 2211 tp->cookie_values = NULL;
1979 } 2212 }
2213 BUG_ON(tp->fastopen_rsk != NULL);
1980 2214
1981 /* If socket is aborted during connect operation */ 2215 /* If socket is aborted during connect operation */
1982 tcp_free_fastopen_req(tp); 2216 tcp_free_fastopen_req(tp);
@@ -2425,6 +2659,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2425 const struct tcp_sock *tp = tcp_sk(sk); 2659 const struct tcp_sock *tp = tcp_sk(sk);
2426 const struct inet_connection_sock *icsk = inet_csk(sk); 2660 const struct inet_connection_sock *icsk = inet_csk(sk);
2427 const struct inet_sock *inet = inet_sk(sk); 2661 const struct inet_sock *inet = inet_sk(sk);
2662 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
2428 __be32 dest = inet->inet_daddr; 2663 __be32 dest = inet->inet_daddr;
2429 __be32 src = inet->inet_rcv_saddr; 2664 __be32 src = inet->inet_rcv_saddr;
2430 __u16 destp = ntohs(inet->inet_dport); 2665 __u16 destp = ntohs(inet->inet_dport);
@@ -2469,7 +2704,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2469 jiffies_to_clock_t(icsk->icsk_ack.ato), 2704 jiffies_to_clock_t(icsk->icsk_ack.ato),
2470 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2705 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2471 tp->snd_cwnd, 2706 tp->snd_cwnd,
2472 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh, 2707 sk->sk_state == TCP_LISTEN ?
2708 (fastopenq ? fastopenq->max_qlen : 0) :
2709 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
2473 len); 2710 len);
2474} 2711}
2475 2712