aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_minisocks.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_minisocks.c')
-rw-r--r--net/ipv4/tcp_minisocks.c76
1 files changed, 61 insertions, 15 deletions
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6ff7f10dce9d..a7302d974f32 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -85,6 +85,8 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
85 * spinlock it. I do not want! Well, probability of misbehaviour 85 * spinlock it. I do not want! Well, probability of misbehaviour
86 * is ridiculously low and, seems, we could use some mb() tricks 86 * is ridiculously low and, seems, we could use some mb() tricks
87 * to avoid misread sequence numbers, states etc. --ANK 87 * to avoid misread sequence numbers, states etc. --ANK
88 *
89 * We don't need to initialize tmp_out.sack_ok as we don't use the results
88 */ 90 */
89enum tcp_tw_status 91enum tcp_tw_status
90tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, 92tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
@@ -507,6 +509,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
507 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; 509 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
508 newtp->rx_opt.mss_clamp = req->mss; 510 newtp->rx_opt.mss_clamp = req->mss;
509 TCP_ECN_openreq_child(newtp, req); 511 TCP_ECN_openreq_child(newtp, req);
512 newtp->fastopen_rsk = NULL;
513 newtp->syn_data_acked = 0;
510 514
511 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); 515 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
512 } 516 }
@@ -515,13 +519,20 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
515EXPORT_SYMBOL(tcp_create_openreq_child); 519EXPORT_SYMBOL(tcp_create_openreq_child);
516 520
517/* 521/*
518 * Process an incoming packet for SYN_RECV sockets represented 522 * Process an incoming packet for SYN_RECV sockets represented as a
519 * as a request_sock. 523 * request_sock. Normally sk is the listener socket but for TFO it
524 * points to the child socket.
525 *
526 * XXX (TFO) - The current impl contains a special check for ack
527 * validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
528 *
529 * We don't need to initialize tmp_opt.sack_ok as we don't use the results
520 */ 530 */
521 531
522struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, 532struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
523 struct request_sock *req, 533 struct request_sock *req,
524 struct request_sock **prev) 534 struct request_sock **prev,
535 bool fastopen)
525{ 536{
526 struct tcp_options_received tmp_opt; 537 struct tcp_options_received tmp_opt;
527 const u8 *hash_location; 538 const u8 *hash_location;
@@ -530,6 +541,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
530 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 541 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
531 bool paws_reject = false; 542 bool paws_reject = false;
532 543
544 BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
545
533 tmp_opt.saw_tstamp = 0; 546 tmp_opt.saw_tstamp = 0;
534 if (th->doff > (sizeof(struct tcphdr)>>2)) { 547 if (th->doff > (sizeof(struct tcphdr)>>2)) {
535 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 548 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
@@ -565,6 +578,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
565 * 578 *
566 * Enforce "SYN-ACK" according to figure 8, figure 6 579 * Enforce "SYN-ACK" according to figure 8, figure 6
567 * of RFC793, fixed by RFC1122. 580 * of RFC793, fixed by RFC1122.
581 *
582 * Note that even if there is new data in the SYN packet
583 * they will be thrown away too.
568 */ 584 */
569 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 585 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
570 return NULL; 586 return NULL;
@@ -622,9 +638,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
622 * sent (the segment carries an unacceptable ACK) ... 638 * sent (the segment carries an unacceptable ACK) ...
623 * a reset is sent." 639 * a reset is sent."
624 * 640 *
625 * Invalid ACK: reset will be sent by listening socket 641 * Invalid ACK: reset will be sent by listening socket.
642 * Note that the ACK validity check for a Fast Open socket is done
643 * elsewhere and is checked directly against the child socket rather
644 * than req because user data may have been sent out.
626 */ 645 */
627 if ((flg & TCP_FLAG_ACK) && 646 if ((flg & TCP_FLAG_ACK) && !fastopen &&
628 (TCP_SKB_CB(skb)->ack_seq != 647 (TCP_SKB_CB(skb)->ack_seq !=
629 tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) 648 tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
630 return sk; 649 return sk;
@@ -637,7 +656,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
637 /* RFC793: "first check sequence number". */ 656 /* RFC793: "first check sequence number". */
638 657
639 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, 658 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
640 tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) { 659 tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) {
641 /* Out of window: send ACK and drop. */ 660 /* Out of window: send ACK and drop. */
642 if (!(flg & TCP_FLAG_RST)) 661 if (!(flg & TCP_FLAG_RST))
643 req->rsk_ops->send_ack(sk, skb, req); 662 req->rsk_ops->send_ack(sk, skb, req);
@@ -648,7 +667,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
648 667
649 /* In sequence, PAWS is OK. */ 668 /* In sequence, PAWS is OK. */
650 669
651 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1)) 670 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
652 req->ts_recent = tmp_opt.rcv_tsval; 671 req->ts_recent = tmp_opt.rcv_tsval;
653 672
654 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { 673 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
@@ -667,10 +686,25 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
667 686
668 /* ACK sequence verified above, just make sure ACK is 687 /* ACK sequence verified above, just make sure ACK is
669 * set. If ACK not set, just silently drop the packet. 688 * set. If ACK not set, just silently drop the packet.
689 *
690 * XXX (TFO) - if we ever allow "data after SYN", the
691 * following check needs to be removed.
670 */ 692 */
671 if (!(flg & TCP_FLAG_ACK)) 693 if (!(flg & TCP_FLAG_ACK))
672 return NULL; 694 return NULL;
673 695
696 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
697 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
698 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
699 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
700 tcp_rsk(req)->snt_synack = 0;
701
702 /* For Fast Open no more processing is needed (sk is the
703 * child socket).
704 */
705 if (fastopen)
706 return sk;
707
674 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ 708 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
675 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 709 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
676 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 710 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
@@ -678,10 +712,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
678 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); 712 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
679 return NULL; 713 return NULL;
680 } 714 }
681 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
682 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
683 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
684 tcp_rsk(req)->snt_synack = 0;
685 715
686 /* OK, ACK is valid, create big socket and 716 /* OK, ACK is valid, create big socket and
687 * feed this segment to it. It will repeat all 717 * feed this segment to it. It will repeat all
@@ -706,11 +736,21 @@ listen_overflow:
706 } 736 }
707 737
708embryonic_reset: 738embryonic_reset:
709 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); 739 if (!(flg & TCP_FLAG_RST)) {
710 if (!(flg & TCP_FLAG_RST)) 740 /* Received a bad SYN pkt - for TFO We try not to reset
741 * the local connection unless it's really necessary to
742 * avoid becoming vulnerable to outside attack aiming at
743 * resetting legit local connections.
744 */
711 req->rsk_ops->send_reset(sk, skb); 745 req->rsk_ops->send_reset(sk, skb);
712 746 } else if (fastopen) { /* received a valid RST pkt */
713 inet_csk_reqsk_queue_drop(sk, req, prev); 747 reqsk_fastopen_remove(sk, req, true);
748 tcp_reset(sk);
749 }
750 if (!fastopen) {
751 inet_csk_reqsk_queue_drop(sk, req, prev);
752 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
753 }
714 return NULL; 754 return NULL;
715} 755}
716EXPORT_SYMBOL(tcp_check_req); 756EXPORT_SYMBOL(tcp_check_req);
@@ -719,6 +759,12 @@ EXPORT_SYMBOL(tcp_check_req);
719 * Queue segment on the new socket if the new socket is active, 759 * Queue segment on the new socket if the new socket is active,
720 * otherwise we just shortcircuit this and continue with 760 * otherwise we just shortcircuit this and continue with
721 * the new socket. 761 * the new socket.
762 *
763 * For the vast majority of cases child->sk_state will be TCP_SYN_RECV
764 * when entering. But other states are possible due to a race condition
765 * where after __inet_lookup_established() fails but before the listener
766 * locked is obtained, other packets cause the same connection to
767 * be created.
722 */ 768 */
723 769
724int tcp_child_process(struct sock *parent, struct sock *child, 770int tcp_child_process(struct sock *parent, struct sock *child,