diff options
Diffstat (limited to 'net/ipv4/tcp_minisocks.c')
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 76 |
1 files changed, 61 insertions, 15 deletions
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6ff7f10dce9d..a7302d974f32 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -85,6 +85,8 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | |||
85 | * spinlock it. I do not want! Well, probability of misbehaviour | 85 | * spinlock it. I do not want! Well, probability of misbehaviour |
86 | * is ridiculously low and, seems, we could use some mb() tricks | 86 | * is ridiculously low and, seems, we could use some mb() tricks |
87 | * to avoid misread sequence numbers, states etc. --ANK | 87 | * to avoid misread sequence numbers, states etc. --ANK |
88 | * | ||
89 | * We don't need to initialize tmp_out.sack_ok as we don't use the results | ||
88 | */ | 90 | */ |
89 | enum tcp_tw_status | 91 | enum tcp_tw_status |
90 | tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, | 92 | tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, |
@@ -507,6 +509,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
507 | newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; | 509 | newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; |
508 | newtp->rx_opt.mss_clamp = req->mss; | 510 | newtp->rx_opt.mss_clamp = req->mss; |
509 | TCP_ECN_openreq_child(newtp, req); | 511 | TCP_ECN_openreq_child(newtp, req); |
512 | newtp->fastopen_rsk = NULL; | ||
513 | newtp->syn_data_acked = 0; | ||
510 | 514 | ||
511 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); | 515 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); |
512 | } | 516 | } |
@@ -515,13 +519,20 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
515 | EXPORT_SYMBOL(tcp_create_openreq_child); | 519 | EXPORT_SYMBOL(tcp_create_openreq_child); |
516 | 520 | ||
517 | /* | 521 | /* |
518 | * Process an incoming packet for SYN_RECV sockets represented | 522 | * Process an incoming packet for SYN_RECV sockets represented as a |
519 | * as a request_sock. | 523 | * request_sock. Normally sk is the listener socket but for TFO it |
524 | * points to the child socket. | ||
525 | * | ||
526 | * XXX (TFO) - The current impl contains a special check for ack | ||
527 | * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? | ||
528 | * | ||
529 | * We don't need to initialize tmp_opt.sack_ok as we don't use the results | ||
520 | */ | 530 | */ |
521 | 531 | ||
522 | struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | 532 | struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, |
523 | struct request_sock *req, | 533 | struct request_sock *req, |
524 | struct request_sock **prev) | 534 | struct request_sock **prev, |
535 | bool fastopen) | ||
525 | { | 536 | { |
526 | struct tcp_options_received tmp_opt; | 537 | struct tcp_options_received tmp_opt; |
527 | const u8 *hash_location; | 538 | const u8 *hash_location; |
@@ -530,6 +541,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
530 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); | 541 | __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); |
531 | bool paws_reject = false; | 542 | bool paws_reject = false; |
532 | 543 | ||
544 | BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN)); | ||
545 | |||
533 | tmp_opt.saw_tstamp = 0; | 546 | tmp_opt.saw_tstamp = 0; |
534 | if (th->doff > (sizeof(struct tcphdr)>>2)) { | 547 | if (th->doff > (sizeof(struct tcphdr)>>2)) { |
535 | tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); | 548 | tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); |
@@ -565,6 +578,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
565 | * | 578 | * |
566 | * Enforce "SYN-ACK" according to figure 8, figure 6 | 579 | * Enforce "SYN-ACK" according to figure 8, figure 6 |
567 | * of RFC793, fixed by RFC1122. | 580 | * of RFC793, fixed by RFC1122. |
581 | * | ||
582 | * Note that even if there is new data in the SYN packet | ||
583 | * they will be thrown away too. | ||
568 | */ | 584 | */ |
569 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); | 585 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); |
570 | return NULL; | 586 | return NULL; |
@@ -622,9 +638,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
622 | * sent (the segment carries an unacceptable ACK) ... | 638 | * sent (the segment carries an unacceptable ACK) ... |
623 | * a reset is sent." | 639 | * a reset is sent." |
624 | * | 640 | * |
625 | * Invalid ACK: reset will be sent by listening socket | 641 | * Invalid ACK: reset will be sent by listening socket. |
642 | * Note that the ACK validity check for a Fast Open socket is done | ||
643 | * elsewhere and is checked directly against the child socket rather | ||
644 | * than req because user data may have been sent out. | ||
626 | */ | 645 | */ |
627 | if ((flg & TCP_FLAG_ACK) && | 646 | if ((flg & TCP_FLAG_ACK) && !fastopen && |
628 | (TCP_SKB_CB(skb)->ack_seq != | 647 | (TCP_SKB_CB(skb)->ack_seq != |
629 | tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) | 648 | tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) |
630 | return sk; | 649 | return sk; |
@@ -637,7 +656,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
637 | /* RFC793: "first check sequence number". */ | 656 | /* RFC793: "first check sequence number". */ |
638 | 657 | ||
639 | if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, | 658 | if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, |
640 | tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) { | 659 | tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) { |
641 | /* Out of window: send ACK and drop. */ | 660 | /* Out of window: send ACK and drop. */ |
642 | if (!(flg & TCP_FLAG_RST)) | 661 | if (!(flg & TCP_FLAG_RST)) |
643 | req->rsk_ops->send_ack(sk, skb, req); | 662 | req->rsk_ops->send_ack(sk, skb, req); |
@@ -648,7 +667,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
648 | 667 | ||
649 | /* In sequence, PAWS is OK. */ | 668 | /* In sequence, PAWS is OK. */ |
650 | 669 | ||
651 | if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1)) | 670 | if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) |
652 | req->ts_recent = tmp_opt.rcv_tsval; | 671 | req->ts_recent = tmp_opt.rcv_tsval; |
653 | 672 | ||
654 | if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { | 673 | if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { |
@@ -667,10 +686,25 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
667 | 686 | ||
668 | /* ACK sequence verified above, just make sure ACK is | 687 | /* ACK sequence verified above, just make sure ACK is |
669 | * set. If ACK not set, just silently drop the packet. | 688 | * set. If ACK not set, just silently drop the packet. |
689 | * | ||
690 | * XXX (TFO) - if we ever allow "data after SYN", the | ||
691 | * following check needs to be removed. | ||
670 | */ | 692 | */ |
671 | if (!(flg & TCP_FLAG_ACK)) | 693 | if (!(flg & TCP_FLAG_ACK)) |
672 | return NULL; | 694 | return NULL; |
673 | 695 | ||
696 | /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ | ||
697 | if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) | ||
698 | tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; | ||
699 | else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ | ||
700 | tcp_rsk(req)->snt_synack = 0; | ||
701 | |||
702 | /* For Fast Open no more processing is needed (sk is the | ||
703 | * child socket). | ||
704 | */ | ||
705 | if (fastopen) | ||
706 | return sk; | ||
707 | |||
674 | /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ | 708 | /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ |
675 | if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && | 709 | if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && |
676 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { | 710 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { |
@@ -678,10 +712,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
678 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); | 712 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); |
679 | return NULL; | 713 | return NULL; |
680 | } | 714 | } |
681 | if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) | ||
682 | tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; | ||
683 | else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ | ||
684 | tcp_rsk(req)->snt_synack = 0; | ||
685 | 715 | ||
686 | /* OK, ACK is valid, create big socket and | 716 | /* OK, ACK is valid, create big socket and |
687 | * feed this segment to it. It will repeat all | 717 | * feed this segment to it. It will repeat all |
@@ -706,11 +736,21 @@ listen_overflow: | |||
706 | } | 736 | } |
707 | 737 | ||
708 | embryonic_reset: | 738 | embryonic_reset: |
709 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); | 739 | if (!(flg & TCP_FLAG_RST)) { |
710 | if (!(flg & TCP_FLAG_RST)) | 740 | /* Received a bad SYN pkt - for TFO We try not to reset |
741 | * the local connection unless it's really necessary to | ||
742 | * avoid becoming vulnerable to outside attack aiming at | ||
743 | * resetting legit local connections. | ||
744 | */ | ||
711 | req->rsk_ops->send_reset(sk, skb); | 745 | req->rsk_ops->send_reset(sk, skb); |
712 | 746 | } else if (fastopen) { /* received a valid RST pkt */ | |
713 | inet_csk_reqsk_queue_drop(sk, req, prev); | 747 | reqsk_fastopen_remove(sk, req, true); |
748 | tcp_reset(sk); | ||
749 | } | ||
750 | if (!fastopen) { | ||
751 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
752 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); | ||
753 | } | ||
714 | return NULL; | 754 | return NULL; |
715 | } | 755 | } |
716 | EXPORT_SYMBOL(tcp_check_req); | 756 | EXPORT_SYMBOL(tcp_check_req); |
@@ -719,6 +759,12 @@ EXPORT_SYMBOL(tcp_check_req); | |||
719 | * Queue segment on the new socket if the new socket is active, | 759 | * Queue segment on the new socket if the new socket is active, |
720 | * otherwise we just shortcircuit this and continue with | 760 | * otherwise we just shortcircuit this and continue with |
721 | * the new socket. | 761 | * the new socket. |
762 | * | ||
763 | * For the vast majority of cases child->sk_state will be TCP_SYN_RECV | ||
764 | * when entering. But other states are possible due to a race condition | ||
765 | * where after __inet_lookup_established() fails but before the listener | ||
766 | * locked is obtained, other packets cause the same connection to | ||
767 | * be created. | ||
722 | */ | 768 | */ |
723 | 769 | ||
724 | int tcp_child_process(struct sock *parent, struct sock *child, | 770 | int tcp_child_process(struct sock *parent, struct sock *child, |