aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2012-07-19 02:43:07 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-19 14:02:03 -0400
commit783237e8daf13481ee234997cbbbb823872ac388 (patch)
treecf6e9adf54eedbc155d4fc43bcf6ec87aaac820e
parent1fe4c481ba637660793217769695c146a037bd54 (diff)
net-tcp: Fast Open client - sending SYN-data
This patch implements sending SYN-data in tcp_connect(). The data is from tcp_sendmsg() with flag MSG_FASTOPEN (implemented in a later patch). The length of the cookie in tcp_fastopen_req, init'd to 0, controls the type of the SYN. If the cookie is not cached (len==0), the host sends data-less SYN with Fast Open cookie request option to solicit a cookie from the remote. If cookie is not available (len > 0), the host sends a SYN-data with Fast Open cookie option. If cookie length is negative, the SYN will not include any Fast Open option (for fall back operations). To deal with middleboxes that may drop SYN with data or experimental TCP option, the SYN-data is only sent once. SYN retransmits do not include data or Fast Open options. The connection will fall back to regular TCP handshake. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/snmp.h1
-rw-r--r--include/linux/tcp.h6
-rw-r--r--include/net/tcp.h9
-rw-r--r--net/ipv4/af_inet.c10
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/tcp_output.c115
6 files changed, 130 insertions, 12 deletions
diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index e5fcbd079e4a..00bc189cb395 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -238,6 +238,7 @@ enum
238 LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ 238 LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */
239 LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ 239 LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */
240 LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */ 240 LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */
241 LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */
241 __LINUX_MIB_MAX 242 __LINUX_MIB_MAX
242}; 243};
243 244
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 12948f543839..1edf96afab44 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -386,7 +386,8 @@ struct tcp_sock {
386 unused : 1; 386 unused : 1;
387 u8 repair_queue; 387 u8 repair_queue;
388 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ 388 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
389 early_retrans_delayed:1; /* Delayed ER timer installed */ 389 early_retrans_delayed:1, /* Delayed ER timer installed */
390 syn_fastopen:1; /* SYN includes Fast Open option */
390 391
391/* RTT measurement */ 392/* RTT measurement */
392 u32 srtt; /* smoothed round trip time << 3 */ 393 u32 srtt; /* smoothed round trip time << 3 */
@@ -500,6 +501,9 @@ struct tcp_sock {
500 struct tcp_md5sig_info __rcu *md5sig_info; 501 struct tcp_md5sig_info __rcu *md5sig_info;
501#endif 502#endif
502 503
504/* TCP fastopen related information */
505 struct tcp_fastopen_request *fastopen_req;
506
503 /* When the cookie options are generated and exchanged, then this 507 /* When the cookie options are generated and exchanged, then this
504 * object holds a reference to them (cookie_values->kref). Also 508 * object holds a reference to them (cookie_values->kref). Also
505 * contains related tcp_cookie_transactions fields. 509 * contains related tcp_cookie_transactions fields.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e601da197361..867557b4244a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1289,6 +1289,15 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff
1289extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, 1289extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
1290 const struct tcp_md5sig_key *key); 1290 const struct tcp_md5sig_key *key);
1291 1291
1292struct tcp_fastopen_request {
1293 /* Fast Open cookie. Size 0 means a cookie request */
1294 struct tcp_fastopen_cookie cookie;
1295 struct msghdr *data; /* data in MSG_FASTOPEN */
1296 u16 copied; /* queued in tcp_connect() */
1297};
1298
1299void tcp_free_fastopen_req(struct tcp_sock *tp);
1300
1292/* write queue abstraction */ 1301/* write queue abstraction */
1293static inline void tcp_write_queue_purge(struct sock *sk) 1302static inline void tcp_write_queue_purge(struct sock *sk)
1294{ 1303{
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 07a02f6e9696..edc414625be2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -556,11 +556,12 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
556} 556}
557EXPORT_SYMBOL(inet_dgram_connect); 557EXPORT_SYMBOL(inet_dgram_connect);
558 558
559static long inet_wait_for_connect(struct sock *sk, long timeo) 559static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
560{ 560{
561 DEFINE_WAIT(wait); 561 DEFINE_WAIT(wait);
562 562
563 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 563 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
564 sk->sk_write_pending += writebias;
564 565
565 /* Basic assumption: if someone sets sk->sk_err, he _must_ 566 /* Basic assumption: if someone sets sk->sk_err, he _must_
566 * change state of the socket from TCP_SYN_*. 567 * change state of the socket from TCP_SYN_*.
@@ -576,6 +577,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
576 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 577 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
577 } 578 }
578 finish_wait(sk_sleep(sk), &wait); 579 finish_wait(sk_sleep(sk), &wait);
580 sk->sk_write_pending -= writebias;
579 return timeo; 581 return timeo;
580} 582}
581 583
@@ -634,8 +636,12 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
634 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 636 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
635 637
636 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 638 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
639 int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
640 tcp_sk(sk)->fastopen_req &&
641 tcp_sk(sk)->fastopen_req->data ? 1 : 0;
642
637 /* Error code is set above */ 643 /* Error code is set above */
638 if (!timeo || !inet_wait_for_connect(sk, timeo)) 644 if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
639 goto out; 645 goto out;
640 646
641 err = sock_intr_errno(timeo); 647 err = sock_intr_errno(timeo);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 2a5240b2ea61..957acd12250b 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -262,6 +262,7 @@ static const struct snmp_mib snmp4_net_list[] = {
262 SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE), 262 SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE),
263 SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), 263 SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK),
264 SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), 264 SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE),
265 SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE),
265 SNMP_MIB_SENTINEL 266 SNMP_MIB_SENTINEL
266}; 267};
267 268
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4849be76ccd6..88693281da4c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -596,6 +596,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? 596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
597 tcp_cookie_size_check(cvp->cookie_desired) : 597 tcp_cookie_size_check(cvp->cookie_desired) :
598 0; 598 0;
599 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
599 600
600#ifdef CONFIG_TCP_MD5SIG 601#ifdef CONFIG_TCP_MD5SIG
601 *md5 = tp->af_specific->md5_lookup(sk, sk); 602 *md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -636,6 +637,16 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
636 remaining -= TCPOLEN_SACKPERM_ALIGNED; 637 remaining -= TCPOLEN_SACKPERM_ALIGNED;
637 } 638 }
638 639
640 if (fastopen && fastopen->cookie.len >= 0) {
641 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
642 need = (need + 3) & ~3U; /* Align to 32 bits */
643 if (remaining >= need) {
644 opts->options |= OPTION_FAST_OPEN_COOKIE;
645 opts->fastopen_cookie = &fastopen->cookie;
646 remaining -= need;
647 tp->syn_fastopen = 1;
648 }
649 }
639 /* Note that timestamps are required by the specification. 650 /* Note that timestamps are required by the specification.
640 * 651 *
641 * Odd numbers of bytes are prohibited by the specification, ensuring 652 * Odd numbers of bytes are prohibited by the specification, ensuring
@@ -2824,6 +2835,96 @@ void tcp_connect_init(struct sock *sk)
2824 tcp_clear_retrans(tp); 2835 tcp_clear_retrans(tp);
2825} 2836}
2826 2837
2838static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
2839{
2840 struct tcp_sock *tp = tcp_sk(sk);
2841 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2842
2843 tcb->end_seq += skb->len;
2844 skb_header_release(skb);
2845 __tcp_add_write_queue_tail(sk, skb);
2846 sk->sk_wmem_queued += skb->truesize;
2847 sk_mem_charge(sk, skb->truesize);
2848 tp->write_seq = tcb->end_seq;
2849 tp->packets_out += tcp_skb_pcount(skb);
2850}
2851
2852/* Build and send a SYN with data and (cached) Fast Open cookie. However,
2853 * queue a data-only packet after the regular SYN, such that regular SYNs
2854 * are retransmitted on timeouts. Also if the remote SYN-ACK acknowledges
2855 * only the SYN sequence, the data are retransmitted in the first ACK.
2856 * If cookie is not cached or other error occurs, falls back to send a
2857 * regular SYN with Fast Open cookie request option.
2858 */
2859static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
2860{
2861 struct tcp_sock *tp = tcp_sk(sk);
2862 struct tcp_fastopen_request *fo = tp->fastopen_req;
2863 int space, i, err = 0, iovlen = fo->data->msg_iovlen;
2864 struct sk_buff *syn_data = NULL, *data;
2865
2866 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie);
2867 if (fo->cookie.len <= 0)
2868 goto fallback;
2869
2870 /* MSS for SYN-data is based on cached MSS and bounded by PMTU and
2871 * user-MSS. Reserve maximum option space for middleboxes that add
2872 * private TCP options. The cost is reduced data space in SYN :(
2873 */
2874 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
2875 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2876 space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
2877 MAX_TCP_OPTION_SPACE;
2878
2879 syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
2880 sk->sk_allocation);
2881 if (syn_data == NULL)
2882 goto fallback;
2883
2884 for (i = 0; i < iovlen && syn_data->len < space; ++i) {
2885 struct iovec *iov = &fo->data->msg_iov[i];
2886 unsigned char __user *from = iov->iov_base;
2887 int len = iov->iov_len;
2888
2889 if (syn_data->len + len > space)
2890 len = space - syn_data->len;
2891 else if (i + 1 == iovlen)
2892 /* No more data pending in inet_wait_for_connect() */
2893 fo->data = NULL;
2894
2895 if (skb_add_data(syn_data, from, len))
2896 goto fallback;
2897 }
2898
2899 /* Queue a data-only packet after the regular SYN for retransmission */
2900 data = pskb_copy(syn_data, sk->sk_allocation);
2901 if (data == NULL)
2902 goto fallback;
2903 TCP_SKB_CB(data)->seq++;
2904 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
2905 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
2906 tcp_connect_queue_skb(sk, data);
2907 fo->copied = data->len;
2908
2909 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
2910 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
2911 goto done;
2912 }
2913 syn_data = NULL;
2914
2915fallback:
2916 /* Send a regular SYN with Fast Open cookie request option */
2917 if (fo->cookie.len > 0)
2918 fo->cookie.len = 0;
2919 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
2920 if (err)
2921 tp->syn_fastopen = 0;
2922 kfree_skb(syn_data);
2923done:
2924 fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */
2925 return err;
2926}
2927
2827/* Build a SYN and send it off. */ 2928/* Build a SYN and send it off. */
2828int tcp_connect(struct sock *sk) 2929int tcp_connect(struct sock *sk)
2829{ 2930{
@@ -2841,17 +2942,13 @@ int tcp_connect(struct sock *sk)
2841 skb_reserve(buff, MAX_TCP_HEADER); 2942 skb_reserve(buff, MAX_TCP_HEADER);
2842 2943
2843 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); 2944 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2945 tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp;
2946 tcp_connect_queue_skb(sk, buff);
2844 TCP_ECN_send_syn(sk, buff); 2947 TCP_ECN_send_syn(sk, buff);
2845 2948
2846 /* Send it off. */ 2949 /* Send off SYN; include data in Fast Open. */
2847 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2950 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
2848 tp->retrans_stamp = TCP_SKB_CB(buff)->when; 2951 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2849 skb_header_release(buff);
2850 __tcp_add_write_queue_tail(sk, buff);
2851 sk->sk_wmem_queued += buff->truesize;
2852 sk_mem_charge(sk, buff->truesize);
2853 tp->packets_out += tcp_skb_pcount(buff);
2854 err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2855 if (err == -ECONNREFUSED) 2952 if (err == -ECONNREFUSED)
2856 return err; 2953 return err;
2857 2954