aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c277
1 files changed, 30 insertions, 247 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad166dcc278f..a2780e5334c9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -336,8 +336,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
336 const int code = icmp_hdr(icmp_skb)->code; 336 const int code = icmp_hdr(icmp_skb)->code;
337 struct sock *sk; 337 struct sock *sk;
338 struct sk_buff *skb; 338 struct sk_buff *skb;
339 struct request_sock *req; 339 struct request_sock *fastopen;
340 __u32 seq; 340 __u32 seq, snd_una;
341 __u32 remaining; 341 __u32 remaining;
342 int err; 342 int err;
343 struct net *net = dev_net(icmp_skb->dev); 343 struct net *net = dev_net(icmp_skb->dev);
@@ -378,12 +378,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
378 378
379 icsk = inet_csk(sk); 379 icsk = inet_csk(sk);
380 tp = tcp_sk(sk); 380 tp = tcp_sk(sk);
381 req = tp->fastopen_rsk;
382 seq = ntohl(th->seq); 381 seq = ntohl(th->seq);
382 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
383 fastopen = tp->fastopen_rsk;
384 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
383 if (sk->sk_state != TCP_LISTEN && 385 if (sk->sk_state != TCP_LISTEN &&
384 !between(seq, tp->snd_una, tp->snd_nxt) && 386 !between(seq, snd_una, tp->snd_nxt)) {
385 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
386 /* For a Fast Open socket, allow seq to be snt_isn. */
387 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 387 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
388 goto out; 388 goto out;
389 } 389 }
@@ -426,11 +426,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
426 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 426 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
427 break; 427 break;
428 if (seq != tp->snd_una || !icsk->icsk_retransmits || 428 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
429 !icsk->icsk_backoff) 429 !icsk->icsk_backoff || fastopen)
430 break; 430 break;
431 431
432 /* XXX (TFO) - revisit the following logic for TFO */
433
434 if (sock_owned_by_user(sk)) 432 if (sock_owned_by_user(sk))
435 break; 433 break;
436 434
@@ -462,14 +460,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
462 goto out; 460 goto out;
463 } 461 }
464 462
465 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
466 * than following the TCP_SYN_RECV case and closing the socket,
467 * we ignore the ICMP error and keep trying like a fully established
468 * socket. Is this the right thing to do?
469 */
470 if (req && req->sk == NULL)
471 goto out;
472
473 switch (sk->sk_state) { 463 switch (sk->sk_state) {
474 struct request_sock *req, **prev; 464 struct request_sock *req, **prev;
475 case TCP_LISTEN: 465 case TCP_LISTEN:
@@ -502,10 +492,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
502 goto out; 492 goto out;
503 493
504 case TCP_SYN_SENT: 494 case TCP_SYN_SENT:
505 case TCP_SYN_RECV: /* Cannot happen. 495 case TCP_SYN_RECV:
506 It can f.e. if SYNs crossed, 496 /* Only in fast or simultaneous open. If a fast open socket is
507 or Fast Open. 497 * is already accepted it is treated as a connected one below.
508 */ 498 */
499 if (fastopen && fastopen->sk == NULL)
500 break;
501
509 if (!sock_owned_by_user(sk)) { 502 if (!sock_owned_by_user(sk)) {
510 sk->sk_err = err; 503 sk->sk_err = err;
511 504
@@ -822,7 +815,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
822 */ 815 */
823static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 816static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
824 struct request_sock *req, 817 struct request_sock *req,
825 u16 queue_mapping) 818 u16 queue_mapping,
819 struct tcp_fastopen_cookie *foc)
826{ 820{
827 const struct inet_request_sock *ireq = inet_rsk(req); 821 const struct inet_request_sock *ireq = inet_rsk(req);
828 struct flowi4 fl4; 822 struct flowi4 fl4;
@@ -833,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 827 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
834 return -1; 828 return -1;
835 829
836 skb = tcp_make_synack(sk, dst, req, NULL); 830 skb = tcp_make_synack(sk, dst, req, foc);
837 831
838 if (skb) { 832 if (skb) {
839 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); 833 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -852,7 +846,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
852 846
853static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) 847static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
854{ 848{
855 int res = tcp_v4_send_synack(sk, NULL, req, 0); 849 int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
856 850
857 if (!res) { 851 if (!res) {
858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 852 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -1260,187 +1254,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1260}; 1254};
1261#endif 1255#endif
1262 1256
1263static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1264 struct request_sock *req,
1265 struct tcp_fastopen_cookie *foc,
1266 struct tcp_fastopen_cookie *valid_foc)
1267{
1268 bool skip_cookie = false;
1269 struct fastopen_queue *fastopenq;
1270
1271 if (likely(!fastopen_cookie_present(foc))) {
1272 /* See include/net/tcp.h for the meaning of these knobs */
1273 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1274 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1275 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1276 skip_cookie = true; /* no cookie to validate */
1277 else
1278 return false;
1279 }
1280 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1281 /* A FO option is present; bump the counter. */
1282 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1283
1284 /* Make sure the listener has enabled fastopen, and we don't
1285 * exceed the max # of pending TFO requests allowed before trying
1286 * to validating the cookie in order to avoid burning CPU cycles
1287 * unnecessarily.
1288 *
1289 * XXX (TFO) - The implication of checking the max_qlen before
1290 * processing a cookie request is that clients can't differentiate
1291 * between qlen overflow causing Fast Open to be disabled
1292 * temporarily vs a server not supporting Fast Open at all.
1293 */
1294 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1295 fastopenq == NULL || fastopenq->max_qlen == 0)
1296 return false;
1297
1298 if (fastopenq->qlen >= fastopenq->max_qlen) {
1299 struct request_sock *req1;
1300 spin_lock(&fastopenq->lock);
1301 req1 = fastopenq->rskq_rst_head;
1302 if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1303 spin_unlock(&fastopenq->lock);
1304 NET_INC_STATS_BH(sock_net(sk),
1305 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1306 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1307 foc->len = -1;
1308 return false;
1309 }
1310 fastopenq->rskq_rst_head = req1->dl_next;
1311 fastopenq->qlen--;
1312 spin_unlock(&fastopenq->lock);
1313 reqsk_free(req1);
1314 }
1315 if (skip_cookie) {
1316 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1317 return true;
1318 }
1319
1320 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1321 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1322 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1323 ip_hdr(skb)->daddr, valid_foc);
1324 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1325 memcmp(&foc->val[0], &valid_foc->val[0],
1326 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1327 return false;
1328 valid_foc->len = -1;
1329 }
1330 /* Acknowledge the data received from the peer. */
1331 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1332 return true;
1333 } else if (foc->len == 0) { /* Client requesting a cookie */
1334 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1335 ip_hdr(skb)->daddr, valid_foc);
1336 NET_INC_STATS_BH(sock_net(sk),
1337 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1338 } else {
1339 /* Client sent a cookie with wrong size. Treat it
1340 * the same as invalid and return a valid one.
1341 */
1342 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
1343 ip_hdr(skb)->daddr, valid_foc);
1344 }
1345 return false;
1346}
1347
1348static int tcp_v4_conn_req_fastopen(struct sock *sk,
1349 struct sk_buff *skb,
1350 struct sk_buff *skb_synack,
1351 struct request_sock *req)
1352{
1353 struct tcp_sock *tp = tcp_sk(sk);
1354 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1355 const struct inet_request_sock *ireq = inet_rsk(req);
1356 struct sock *child;
1357 int err;
1358
1359 req->num_retrans = 0;
1360 req->num_timeout = 0;
1361 req->sk = NULL;
1362
1363 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1364 if (child == NULL) {
1365 NET_INC_STATS_BH(sock_net(sk),
1366 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1367 kfree_skb(skb_synack);
1368 return -1;
1369 }
1370 err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
1371 ireq->ir_rmt_addr, ireq->opt);
1372 err = net_xmit_eval(err);
1373 if (!err)
1374 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1375 /* XXX (TFO) - is it ok to ignore error and continue? */
1376
1377 spin_lock(&queue->fastopenq->lock);
1378 queue->fastopenq->qlen++;
1379 spin_unlock(&queue->fastopenq->lock);
1380
1381 /* Initialize the child socket. Have to fix some values to take
1382 * into account the child is a Fast Open socket and is created
1383 * only out of the bits carried in the SYN packet.
1384 */
1385 tp = tcp_sk(child);
1386
1387 tp->fastopen_rsk = req;
1388 /* Do a hold on the listner sk so that if the listener is being
1389 * closed, the child that has been accepted can live on and still
1390 * access listen_lock.
1391 */
1392 sock_hold(sk);
1393 tcp_rsk(req)->listener = sk;
1394
1395 /* RFC1323: The window in SYN & SYN/ACK segments is never
1396 * scaled. So correct it appropriately.
1397 */
1398 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1399
1400 /* Activate the retrans timer so that SYNACK can be retransmitted.
1401 * The request socket is not added to the SYN table of the parent
1402 * because it's been added to the accept queue directly.
1403 */
1404 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
1405 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
1406
1407 /* Add the child socket directly into the accept queue */
1408 inet_csk_reqsk_queue_add(sk, req, child);
1409
1410 /* Now finish processing the fastopen child socket. */
1411 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1412 tcp_init_congestion_control(child);
1413 tcp_mtup_init(child);
1414 tcp_init_metrics(child);
1415 tcp_init_buffer_space(child);
1416
1417 /* Queue the data carried in the SYN packet. We need to first
1418 * bump skb's refcnt because the caller will attempt to free it.
1419 *
1420 * XXX (TFO) - we honor a zero-payload TFO request for now.
1421 * (Any reason not to?)
1422 */
1423 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1424 /* Don't queue the skb if there is no payload in SYN.
1425 * XXX (TFO) - How about SYN+FIN?
1426 */
1427 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1428 } else {
1429 skb = skb_get(skb);
1430 skb_dst_drop(skb);
1431 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1432 skb_set_owner_r(skb, child);
1433 __skb_queue_tail(&child->sk_receive_queue, skb);
1434 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1435 tp->syn_data_acked = 1;
1436 }
1437 sk->sk_data_ready(sk);
1438 bh_unlock_sock(child);
1439 sock_put(child);
1440 WARN_ON(req->sk == NULL);
1441 return 0;
1442}
1443
1444int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1257int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1445{ 1258{
1446 struct tcp_options_received tmp_opt; 1259 struct tcp_options_received tmp_opt;
@@ -1451,12 +1264,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1451 __be32 saddr = ip_hdr(skb)->saddr; 1264 __be32 saddr = ip_hdr(skb)->saddr;
1452 __be32 daddr = ip_hdr(skb)->daddr; 1265 __be32 daddr = ip_hdr(skb)->daddr;
1453 __u32 isn = TCP_SKB_CB(skb)->when; 1266 __u32 isn = TCP_SKB_CB(skb)->when;
1454 bool want_cookie = false; 1267 bool want_cookie = false, fastopen;
1455 struct flowi4 fl4; 1268 struct flowi4 fl4;
1456 struct tcp_fastopen_cookie foc = { .len = -1 }; 1269 struct tcp_fastopen_cookie foc = { .len = -1 };
1457 struct tcp_fastopen_cookie valid_foc = { .len = -1 }; 1270 int err;
1458 struct sk_buff *skb_synack;
1459 int do_fastopen;
1460 1271
1461 /* Never answer to SYNs send to broadcast or multicast */ 1272 /* Never answer to SYNs send to broadcast or multicast */
1462 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 1273 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1555,52 +1366,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1555 1366
1556 isn = tcp_v4_init_sequence(skb); 1367 isn = tcp_v4_init_sequence(skb);
1557 } 1368 }
1558 tcp_rsk(req)->snt_isn = isn; 1369 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
1559
1560 if (dst == NULL) {
1561 dst = inet_csk_route_req(sk, &fl4, req);
1562 if (dst == NULL)
1563 goto drop_and_free;
1564 }
1565 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1566
1567 /* We don't call tcp_v4_send_synack() directly because we need
1568 * to make sure a child socket can be created successfully before
1569 * sending back synack!
1570 *
1571 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1572 * (or better yet, call tcp_send_synack() in the child context
1573 * directly, but will have to fix bunch of other code first)
1574 * after syn_recv_sock() except one will need to first fix the
1575 * latter to remove its dependency on the current implementation
1576 * of tcp_v4_send_synack()->tcp_select_initial_window().
1577 */
1578 skb_synack = tcp_make_synack(sk, dst, req,
1579 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1580
1581 if (skb_synack) {
1582 __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1583 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1584 } else
1585 goto drop_and_free; 1370 goto drop_and_free;
1586 1371
1587 if (likely(!do_fastopen)) { 1372 tcp_rsk(req)->snt_isn = isn;
1588 int err; 1373 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1589 err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, 1374 tcp_openreq_init_rwin(req, sk, dst);
1590 ireq->ir_rmt_addr, ireq->opt); 1375 fastopen = !want_cookie &&
1591 err = net_xmit_eval(err); 1376 tcp_try_fastopen(sk, skb, req, &foc, dst);
1377 err = tcp_v4_send_synack(sk, dst, req,
1378 skb_get_queue_mapping(skb), &foc);
1379 if (!fastopen) {
1592 if (err || want_cookie) 1380 if (err || want_cookie)
1593 goto drop_and_free; 1381 goto drop_and_free;
1594 1382
1595 tcp_rsk(req)->snt_synack = tcp_time_stamp; 1383 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1596 tcp_rsk(req)->listener = NULL; 1384 tcp_rsk(req)->listener = NULL;
1597 /* Add the request_sock to the SYN table */
1598 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 1385 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1599 if (fastopen_cookie_present(&foc) && foc.len != 0) 1386 }
1600 NET_INC_STATS_BH(sock_net(sk),
1601 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1602 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1603 goto drop_and_free;
1604 1387
1605 return 0; 1388 return 0;
1606 1389