diff options
author | Jerry Chu <hkchu@google.com> | 2012-08-31 08:29:13 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-31 20:02:19 -0400 |
commit | 168a8f58059a22feb9e9a2dcc1b8053dbbbc12ef (patch) | |
tree | 0d5b9181b840c9b6b08b1452004f0746e8eebab8 /net/ipv4/tcp_input.c | |
parent | 8336886f786fdacbc19b719c1f7ea91eb70706d4 (diff) |
tcp: TCP Fast Open Server - main code path
This patch adds the main processing path to complete the TFO server
patches.
A TFO request (i.e., SYN+data packet with a TFO cookie option) first
gets processed in tcp_v4_conn_request(). If it passes the various TFO
checks by tcp_fastopen_check(), a child socket will be created right
away to be accepted by applications, rather than waiting for the 3WHS
to finish.
In additon to the use of TFO cookie, a simple max_qlen based scheme
is put in place to fend off spoofed TFO attack.
When a valid ACK comes back to tcp_rcv_state_process(), it will cause
the state of the child socket to switch from either TCP_SYN_RECV to
TCP_ESTABLISHED, or TCP_FIN_WAIT1 to TCP_FIN_WAIT2. At this time
retransmission will resume for any unack'ed (data, FIN,...) segments.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 71 |
1 files changed, 58 insertions, 13 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d47d5fe8f3f0..8c304a400798 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -3127,6 +3127,12 @@ void tcp_rearm_rto(struct sock *sk) | |||
3127 | { | 3127 | { |
3128 | struct tcp_sock *tp = tcp_sk(sk); | 3128 | struct tcp_sock *tp = tcp_sk(sk); |
3129 | 3129 | ||
3130 | /* If the retrans timer is currently being used by Fast Open | ||
3131 | * for SYN-ACK retrans purpose, stay put. | ||
3132 | */ | ||
3133 | if (tp->fastopen_rsk) | ||
3134 | return; | ||
3135 | |||
3130 | if (!tp->packets_out) { | 3136 | if (!tp->packets_out) { |
3131 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | 3137 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
3132 | } else { | 3138 | } else { |
@@ -5895,7 +5901,9 @@ discard: | |||
5895 | tcp_send_synack(sk); | 5901 | tcp_send_synack(sk); |
5896 | #if 0 | 5902 | #if 0 |
5897 | /* Note, we could accept data and URG from this segment. | 5903 | /* Note, we could accept data and URG from this segment. |
5898 | * There are no obstacles to make this. | 5904 | * There are no obstacles to make this (except that we must |
5905 | * either change tcp_recvmsg() to prevent it from returning data | ||
5906 | * before 3WHS completes per RFC793, or employ TCP Fast Open). | ||
5899 | * | 5907 | * |
5900 | * However, if we ignore data in ACKless segments sometimes, | 5908 | * However, if we ignore data in ACKless segments sometimes, |
5901 | * we have no reasons to accept it sometimes. | 5909 | * we have no reasons to accept it sometimes. |
@@ -5935,6 +5943,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5935 | { | 5943 | { |
5936 | struct tcp_sock *tp = tcp_sk(sk); | 5944 | struct tcp_sock *tp = tcp_sk(sk); |
5937 | struct inet_connection_sock *icsk = inet_csk(sk); | 5945 | struct inet_connection_sock *icsk = inet_csk(sk); |
5946 | struct request_sock *req; | ||
5938 | int queued = 0; | 5947 | int queued = 0; |
5939 | 5948 | ||
5940 | tp->rx_opt.saw_tstamp = 0; | 5949 | tp->rx_opt.saw_tstamp = 0; |
@@ -5990,7 +5999,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5990 | return 0; | 5999 | return 0; |
5991 | } | 6000 | } |
5992 | 6001 | ||
5993 | if (!tcp_validate_incoming(sk, skb, th, 0)) | 6002 | req = tp->fastopen_rsk; |
6003 | if (req != NULL) { | ||
6004 | BUG_ON(sk->sk_state != TCP_SYN_RECV && | ||
6005 | sk->sk_state != TCP_FIN_WAIT1); | ||
6006 | |||
6007 | if (tcp_check_req(sk, skb, req, NULL, true) == NULL) | ||
6008 | goto discard; | ||
6009 | } else if (!tcp_validate_incoming(sk, skb, th, 0)) | ||
5994 | return 0; | 6010 | return 0; |
5995 | 6011 | ||
5996 | /* step 5: check the ACK field */ | 6012 | /* step 5: check the ACK field */ |
@@ -6000,7 +6016,22 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
6000 | switch (sk->sk_state) { | 6016 | switch (sk->sk_state) { |
6001 | case TCP_SYN_RECV: | 6017 | case TCP_SYN_RECV: |
6002 | if (acceptable) { | 6018 | if (acceptable) { |
6003 | tp->copied_seq = tp->rcv_nxt; | 6019 | /* Once we leave TCP_SYN_RECV, we no longer |
6020 | * need req so release it. | ||
6021 | */ | ||
6022 | if (req) { | ||
6023 | reqsk_fastopen_remove(sk, req, false); | ||
6024 | } else { | ||
6025 | /* Make sure socket is routed, for | ||
6026 | * correct metrics. | ||
6027 | */ | ||
6028 | icsk->icsk_af_ops->rebuild_header(sk); | ||
6029 | tcp_init_congestion_control(sk); | ||
6030 | |||
6031 | tcp_mtup_init(sk); | ||
6032 | tcp_init_buffer_space(sk); | ||
6033 | tp->copied_seq = tp->rcv_nxt; | ||
6034 | } | ||
6004 | smp_mb(); | 6035 | smp_mb(); |
6005 | tcp_set_state(sk, TCP_ESTABLISHED); | 6036 | tcp_set_state(sk, TCP_ESTABLISHED); |
6006 | sk->sk_state_change(sk); | 6037 | sk->sk_state_change(sk); |
@@ -6022,23 +6053,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
6022 | if (tp->rx_opt.tstamp_ok) | 6053 | if (tp->rx_opt.tstamp_ok) |
6023 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 6054 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
6024 | 6055 | ||
6025 | /* Make sure socket is routed, for | 6056 | if (req) { |
6026 | * correct metrics. | 6057 | /* Re-arm the timer because data may |
6027 | */ | 6058 | * have been sent out. This is similar |
6028 | icsk->icsk_af_ops->rebuild_header(sk); | 6059 | * to the regular data transmission case |
6029 | 6060 | * when new data has just been ack'ed. | |
6030 | tcp_init_metrics(sk); | 6061 | * |
6031 | 6062 | * (TFO) - we could try to be more | |
6032 | tcp_init_congestion_control(sk); | 6063 | * aggressive and retranmitting any data |
6064 | * sooner based on when they were sent | ||
6065 | * out. | ||
6066 | */ | ||
6067 | tcp_rearm_rto(sk); | ||
6068 | } else | ||
6069 | tcp_init_metrics(sk); | ||
6033 | 6070 | ||
6034 | /* Prevent spurious tcp_cwnd_restart() on | 6071 | /* Prevent spurious tcp_cwnd_restart() on |
6035 | * first data packet. | 6072 | * first data packet. |
6036 | */ | 6073 | */ |
6037 | tp->lsndtime = tcp_time_stamp; | 6074 | tp->lsndtime = tcp_time_stamp; |
6038 | 6075 | ||
6039 | tcp_mtup_init(sk); | ||
6040 | tcp_initialize_rcv_mss(sk); | 6076 | tcp_initialize_rcv_mss(sk); |
6041 | tcp_init_buffer_space(sk); | ||
6042 | tcp_fast_path_on(tp); | 6077 | tcp_fast_path_on(tp); |
6043 | } else { | 6078 | } else { |
6044 | return 1; | 6079 | return 1; |
@@ -6046,6 +6081,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
6046 | break; | 6081 | break; |
6047 | 6082 | ||
6048 | case TCP_FIN_WAIT1: | 6083 | case TCP_FIN_WAIT1: |
6084 | /* If we enter the TCP_FIN_WAIT1 state and we are a | ||
6085 | * Fast Open socket and this is the first acceptable | ||
6086 | * ACK we have received, this would have acknowledged | ||
6087 | * our SYNACK so stop the SYNACK timer. | ||
6088 | */ | ||
6089 | if (acceptable && req != NULL) { | ||
6090 | /* We no longer need the request sock. */ | ||
6091 | reqsk_fastopen_remove(sk, req, false); | ||
6092 | tcp_rearm_rto(sk); | ||
6093 | } | ||
6049 | if (tp->snd_una == tp->write_seq) { | 6094 | if (tp->snd_una == tp->write_seq) { |
6050 | struct dst_entry *dst; | 6095 | struct dst_entry *dst; |
6051 | 6096 | ||