diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 277 |
1 files changed, 30 insertions, 247 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad166dcc278f..a2780e5334c9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -336,8 +336,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
336 | const int code = icmp_hdr(icmp_skb)->code; | 336 | const int code = icmp_hdr(icmp_skb)->code; |
337 | struct sock *sk; | 337 | struct sock *sk; |
338 | struct sk_buff *skb; | 338 | struct sk_buff *skb; |
339 | struct request_sock *req; | 339 | struct request_sock *fastopen; |
340 | __u32 seq; | 340 | __u32 seq, snd_una; |
341 | __u32 remaining; | 341 | __u32 remaining; |
342 | int err; | 342 | int err; |
343 | struct net *net = dev_net(icmp_skb->dev); | 343 | struct net *net = dev_net(icmp_skb->dev); |
@@ -378,12 +378,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
378 | 378 | ||
379 | icsk = inet_csk(sk); | 379 | icsk = inet_csk(sk); |
380 | tp = tcp_sk(sk); | 380 | tp = tcp_sk(sk); |
381 | req = tp->fastopen_rsk; | ||
382 | seq = ntohl(th->seq); | 381 | seq = ntohl(th->seq); |
382 | /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ | ||
383 | fastopen = tp->fastopen_rsk; | ||
384 | snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; | ||
383 | if (sk->sk_state != TCP_LISTEN && | 385 | if (sk->sk_state != TCP_LISTEN && |
384 | !between(seq, tp->snd_una, tp->snd_nxt) && | 386 | !between(seq, snd_una, tp->snd_nxt)) { |
385 | (req == NULL || seq != tcp_rsk(req)->snt_isn)) { | ||
386 | /* For a Fast Open socket, allow seq to be snt_isn. */ | ||
387 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); | 387 | NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); |
388 | goto out; | 388 | goto out; |
389 | } | 389 | } |
@@ -426,11 +426,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
426 | if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) | 426 | if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) |
427 | break; | 427 | break; |
428 | if (seq != tp->snd_una || !icsk->icsk_retransmits || | 428 | if (seq != tp->snd_una || !icsk->icsk_retransmits || |
429 | !icsk->icsk_backoff) | 429 | !icsk->icsk_backoff || fastopen) |
430 | break; | 430 | break; |
431 | 431 | ||
432 | /* XXX (TFO) - revisit the following logic for TFO */ | ||
433 | |||
434 | if (sock_owned_by_user(sk)) | 432 | if (sock_owned_by_user(sk)) |
435 | break; | 433 | break; |
436 | 434 | ||
@@ -462,14 +460,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
462 | goto out; | 460 | goto out; |
463 | } | 461 | } |
464 | 462 | ||
465 | /* XXX (TFO) - if it's a TFO socket and has been accepted, rather | ||
466 | * than following the TCP_SYN_RECV case and closing the socket, | ||
467 | * we ignore the ICMP error and keep trying like a fully established | ||
468 | * socket. Is this the right thing to do? | ||
469 | */ | ||
470 | if (req && req->sk == NULL) | ||
471 | goto out; | ||
472 | |||
473 | switch (sk->sk_state) { | 463 | switch (sk->sk_state) { |
474 | struct request_sock *req, **prev; | 464 | struct request_sock *req, **prev; |
475 | case TCP_LISTEN: | 465 | case TCP_LISTEN: |
@@ -502,10 +492,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
502 | goto out; | 492 | goto out; |
503 | 493 | ||
504 | case TCP_SYN_SENT: | 494 | case TCP_SYN_SENT: |
505 | case TCP_SYN_RECV: /* Cannot happen. | 495 | case TCP_SYN_RECV: |
506 | It can f.e. if SYNs crossed, | 496 | /* Only in fast or simultaneous open. If a fast open socket is |
507 | or Fast Open. | 497 | * is already accepted it is treated as a connected one below. |
508 | */ | 498 | */ |
499 | if (fastopen && fastopen->sk == NULL) | ||
500 | break; | ||
501 | |||
509 | if (!sock_owned_by_user(sk)) { | 502 | if (!sock_owned_by_user(sk)) { |
510 | sk->sk_err = err; | 503 | sk->sk_err = err; |
511 | 504 | ||
@@ -822,7 +815,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, | |||
822 | */ | 815 | */ |
823 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | 816 | static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, |
824 | struct request_sock *req, | 817 | struct request_sock *req, |
825 | u16 queue_mapping) | 818 | u16 queue_mapping, |
819 | struct tcp_fastopen_cookie *foc) | ||
826 | { | 820 | { |
827 | const struct inet_request_sock *ireq = inet_rsk(req); | 821 | const struct inet_request_sock *ireq = inet_rsk(req); |
828 | struct flowi4 fl4; | 822 | struct flowi4 fl4; |
@@ -833,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
833 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) | 827 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
834 | return -1; | 828 | return -1; |
835 | 829 | ||
836 | skb = tcp_make_synack(sk, dst, req, NULL); | 830 | skb = tcp_make_synack(sk, dst, req, foc); |
837 | 831 | ||
838 | if (skb) { | 832 | if (skb) { |
839 | __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); | 833 | __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); |
@@ -852,7 +846,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
852 | 846 | ||
853 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) | 847 | static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) |
854 | { | 848 | { |
855 | int res = tcp_v4_send_synack(sk, NULL, req, 0); | 849 | int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); |
856 | 850 | ||
857 | if (!res) { | 851 | if (!res) { |
858 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); | 852 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); |
@@ -1260,187 +1254,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { | |||
1260 | }; | 1254 | }; |
1261 | #endif | 1255 | #endif |
1262 | 1256 | ||
1263 | static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, | ||
1264 | struct request_sock *req, | ||
1265 | struct tcp_fastopen_cookie *foc, | ||
1266 | struct tcp_fastopen_cookie *valid_foc) | ||
1267 | { | ||
1268 | bool skip_cookie = false; | ||
1269 | struct fastopen_queue *fastopenq; | ||
1270 | |||
1271 | if (likely(!fastopen_cookie_present(foc))) { | ||
1272 | /* See include/net/tcp.h for the meaning of these knobs */ | ||
1273 | if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || | ||
1274 | ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && | ||
1275 | (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) | ||
1276 | skip_cookie = true; /* no cookie to validate */ | ||
1277 | else | ||
1278 | return false; | ||
1279 | } | ||
1280 | fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; | ||
1281 | /* A FO option is present; bump the counter. */ | ||
1282 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); | ||
1283 | |||
1284 | /* Make sure the listener has enabled fastopen, and we don't | ||
1285 | * exceed the max # of pending TFO requests allowed before trying | ||
1286 | * to validating the cookie in order to avoid burning CPU cycles | ||
1287 | * unnecessarily. | ||
1288 | * | ||
1289 | * XXX (TFO) - The implication of checking the max_qlen before | ||
1290 | * processing a cookie request is that clients can't differentiate | ||
1291 | * between qlen overflow causing Fast Open to be disabled | ||
1292 | * temporarily vs a server not supporting Fast Open at all. | ||
1293 | */ | ||
1294 | if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || | ||
1295 | fastopenq == NULL || fastopenq->max_qlen == 0) | ||
1296 | return false; | ||
1297 | |||
1298 | if (fastopenq->qlen >= fastopenq->max_qlen) { | ||
1299 | struct request_sock *req1; | ||
1300 | spin_lock(&fastopenq->lock); | ||
1301 | req1 = fastopenq->rskq_rst_head; | ||
1302 | if ((req1 == NULL) || time_after(req1->expires, jiffies)) { | ||
1303 | spin_unlock(&fastopenq->lock); | ||
1304 | NET_INC_STATS_BH(sock_net(sk), | ||
1305 | LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); | ||
1306 | /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ | ||
1307 | foc->len = -1; | ||
1308 | return false; | ||
1309 | } | ||
1310 | fastopenq->rskq_rst_head = req1->dl_next; | ||
1311 | fastopenq->qlen--; | ||
1312 | spin_unlock(&fastopenq->lock); | ||
1313 | reqsk_free(req1); | ||
1314 | } | ||
1315 | if (skip_cookie) { | ||
1316 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
1317 | return true; | ||
1318 | } | ||
1319 | |||
1320 | if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { | ||
1321 | if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { | ||
1322 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, | ||
1323 | ip_hdr(skb)->daddr, valid_foc); | ||
1324 | if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || | ||
1325 | memcmp(&foc->val[0], &valid_foc->val[0], | ||
1326 | TCP_FASTOPEN_COOKIE_SIZE) != 0) | ||
1327 | return false; | ||
1328 | valid_foc->len = -1; | ||
1329 | } | ||
1330 | /* Acknowledge the data received from the peer. */ | ||
1331 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
1332 | return true; | ||
1333 | } else if (foc->len == 0) { /* Client requesting a cookie */ | ||
1334 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, | ||
1335 | ip_hdr(skb)->daddr, valid_foc); | ||
1336 | NET_INC_STATS_BH(sock_net(sk), | ||
1337 | LINUX_MIB_TCPFASTOPENCOOKIEREQD); | ||
1338 | } else { | ||
1339 | /* Client sent a cookie with wrong size. Treat it | ||
1340 | * the same as invalid and return a valid one. | ||
1341 | */ | ||
1342 | tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, | ||
1343 | ip_hdr(skb)->daddr, valid_foc); | ||
1344 | } | ||
1345 | return false; | ||
1346 | } | ||
1347 | |||
1348 | static int tcp_v4_conn_req_fastopen(struct sock *sk, | ||
1349 | struct sk_buff *skb, | ||
1350 | struct sk_buff *skb_synack, | ||
1351 | struct request_sock *req) | ||
1352 | { | ||
1353 | struct tcp_sock *tp = tcp_sk(sk); | ||
1354 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | ||
1355 | const struct inet_request_sock *ireq = inet_rsk(req); | ||
1356 | struct sock *child; | ||
1357 | int err; | ||
1358 | |||
1359 | req->num_retrans = 0; | ||
1360 | req->num_timeout = 0; | ||
1361 | req->sk = NULL; | ||
1362 | |||
1363 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); | ||
1364 | if (child == NULL) { | ||
1365 | NET_INC_STATS_BH(sock_net(sk), | ||
1366 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); | ||
1367 | kfree_skb(skb_synack); | ||
1368 | return -1; | ||
1369 | } | ||
1370 | err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, | ||
1371 | ireq->ir_rmt_addr, ireq->opt); | ||
1372 | err = net_xmit_eval(err); | ||
1373 | if (!err) | ||
1374 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1375 | /* XXX (TFO) - is it ok to ignore error and continue? */ | ||
1376 | |||
1377 | spin_lock(&queue->fastopenq->lock); | ||
1378 | queue->fastopenq->qlen++; | ||
1379 | spin_unlock(&queue->fastopenq->lock); | ||
1380 | |||
1381 | /* Initialize the child socket. Have to fix some values to take | ||
1382 | * into account the child is a Fast Open socket and is created | ||
1383 | * only out of the bits carried in the SYN packet. | ||
1384 | */ | ||
1385 | tp = tcp_sk(child); | ||
1386 | |||
1387 | tp->fastopen_rsk = req; | ||
1388 | /* Do a hold on the listner sk so that if the listener is being | ||
1389 | * closed, the child that has been accepted can live on and still | ||
1390 | * access listen_lock. | ||
1391 | */ | ||
1392 | sock_hold(sk); | ||
1393 | tcp_rsk(req)->listener = sk; | ||
1394 | |||
1395 | /* RFC1323: The window in SYN & SYN/ACK segments is never | ||
1396 | * scaled. So correct it appropriately. | ||
1397 | */ | ||
1398 | tp->snd_wnd = ntohs(tcp_hdr(skb)->window); | ||
1399 | |||
1400 | /* Activate the retrans timer so that SYNACK can be retransmitted. | ||
1401 | * The request socket is not added to the SYN table of the parent | ||
1402 | * because it's been added to the accept queue directly. | ||
1403 | */ | ||
1404 | inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, | ||
1405 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); | ||
1406 | |||
1407 | /* Add the child socket directly into the accept queue */ | ||
1408 | inet_csk_reqsk_queue_add(sk, req, child); | ||
1409 | |||
1410 | /* Now finish processing the fastopen child socket. */ | ||
1411 | inet_csk(child)->icsk_af_ops->rebuild_header(child); | ||
1412 | tcp_init_congestion_control(child); | ||
1413 | tcp_mtup_init(child); | ||
1414 | tcp_init_metrics(child); | ||
1415 | tcp_init_buffer_space(child); | ||
1416 | |||
1417 | /* Queue the data carried in the SYN packet. We need to first | ||
1418 | * bump skb's refcnt because the caller will attempt to free it. | ||
1419 | * | ||
1420 | * XXX (TFO) - we honor a zero-payload TFO request for now. | ||
1421 | * (Any reason not to?) | ||
1422 | */ | ||
1423 | if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { | ||
1424 | /* Don't queue the skb if there is no payload in SYN. | ||
1425 | * XXX (TFO) - How about SYN+FIN? | ||
1426 | */ | ||
1427 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
1428 | } else { | ||
1429 | skb = skb_get(skb); | ||
1430 | skb_dst_drop(skb); | ||
1431 | __skb_pull(skb, tcp_hdr(skb)->doff * 4); | ||
1432 | skb_set_owner_r(skb, child); | ||
1433 | __skb_queue_tail(&child->sk_receive_queue, skb); | ||
1434 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | ||
1435 | tp->syn_data_acked = 1; | ||
1436 | } | ||
1437 | sk->sk_data_ready(sk); | ||
1438 | bh_unlock_sock(child); | ||
1439 | sock_put(child); | ||
1440 | WARN_ON(req->sk == NULL); | ||
1441 | return 0; | ||
1442 | } | ||
1443 | |||
1444 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 1257 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
1445 | { | 1258 | { |
1446 | struct tcp_options_received tmp_opt; | 1259 | struct tcp_options_received tmp_opt; |
@@ -1451,12 +1264,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1451 | __be32 saddr = ip_hdr(skb)->saddr; | 1264 | __be32 saddr = ip_hdr(skb)->saddr; |
1452 | __be32 daddr = ip_hdr(skb)->daddr; | 1265 | __be32 daddr = ip_hdr(skb)->daddr; |
1453 | __u32 isn = TCP_SKB_CB(skb)->when; | 1266 | __u32 isn = TCP_SKB_CB(skb)->when; |
1454 | bool want_cookie = false; | 1267 | bool want_cookie = false, fastopen; |
1455 | struct flowi4 fl4; | 1268 | struct flowi4 fl4; |
1456 | struct tcp_fastopen_cookie foc = { .len = -1 }; | 1269 | struct tcp_fastopen_cookie foc = { .len = -1 }; |
1457 | struct tcp_fastopen_cookie valid_foc = { .len = -1 }; | 1270 | int err; |
1458 | struct sk_buff *skb_synack; | ||
1459 | int do_fastopen; | ||
1460 | 1271 | ||
1461 | /* Never answer to SYNs send to broadcast or multicast */ | 1272 | /* Never answer to SYNs send to broadcast or multicast */ |
1462 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | 1273 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
@@ -1555,52 +1366,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1555 | 1366 | ||
1556 | isn = tcp_v4_init_sequence(skb); | 1367 | isn = tcp_v4_init_sequence(skb); |
1557 | } | 1368 | } |
1558 | tcp_rsk(req)->snt_isn = isn; | 1369 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
1559 | |||
1560 | if (dst == NULL) { | ||
1561 | dst = inet_csk_route_req(sk, &fl4, req); | ||
1562 | if (dst == NULL) | ||
1563 | goto drop_and_free; | ||
1564 | } | ||
1565 | do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); | ||
1566 | |||
1567 | /* We don't call tcp_v4_send_synack() directly because we need | ||
1568 | * to make sure a child socket can be created successfully before | ||
1569 | * sending back synack! | ||
1570 | * | ||
1571 | * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() | ||
1572 | * (or better yet, call tcp_send_synack() in the child context | ||
1573 | * directly, but will have to fix bunch of other code first) | ||
1574 | * after syn_recv_sock() except one will need to first fix the | ||
1575 | * latter to remove its dependency on the current implementation | ||
1576 | * of tcp_v4_send_synack()->tcp_select_initial_window(). | ||
1577 | */ | ||
1578 | skb_synack = tcp_make_synack(sk, dst, req, | ||
1579 | fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); | ||
1580 | |||
1581 | if (skb_synack) { | ||
1582 | __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); | ||
1583 | skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); | ||
1584 | } else | ||
1585 | goto drop_and_free; | 1370 | goto drop_and_free; |
1586 | 1371 | ||
1587 | if (likely(!do_fastopen)) { | 1372 | tcp_rsk(req)->snt_isn = isn; |
1588 | int err; | 1373 | tcp_rsk(req)->snt_synack = tcp_time_stamp; |
1589 | err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, | 1374 | tcp_openreq_init_rwin(req, sk, dst); |
1590 | ireq->ir_rmt_addr, ireq->opt); | 1375 | fastopen = !want_cookie && |
1591 | err = net_xmit_eval(err); | 1376 | tcp_try_fastopen(sk, skb, req, &foc, dst); |
1377 | err = tcp_v4_send_synack(sk, dst, req, | ||
1378 | skb_get_queue_mapping(skb), &foc); | ||
1379 | if (!fastopen) { | ||
1592 | if (err || want_cookie) | 1380 | if (err || want_cookie) |
1593 | goto drop_and_free; | 1381 | goto drop_and_free; |
1594 | 1382 | ||
1595 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | 1383 | tcp_rsk(req)->snt_synack = tcp_time_stamp; |
1596 | tcp_rsk(req)->listener = NULL; | 1384 | tcp_rsk(req)->listener = NULL; |
1597 | /* Add the request_sock to the SYN table */ | ||
1598 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | 1385 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
1599 | if (fastopen_cookie_present(&foc) && foc.len != 0) | 1386 | } |
1600 | NET_INC_STATS_BH(sock_net(sk), | ||
1601 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); | ||
1602 | } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) | ||
1603 | goto drop_and_free; | ||
1604 | 1387 | ||
1605 | return 0; | 1388 | return 0; |
1606 | 1389 | ||