aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2012-07-19 02:43:09 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-19 14:02:03 -0400
commitcf60af03ca4e71134206809ea892e49b92a88896 (patch)
tree478ee362f10d0737fbc4e6642e2966abe0cd1397
parent8e4178c1c7b52f7c99f5fd22ef7af6b2bff409e3 (diff)
net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)
sendmsg() (or sendto()) with MSG_FASTOPEN is a combo of connect(2) and write(2). The application should replace connect() with it to send data in the opening SYN packet. For blocking socket, sendmsg() blocks until all the data are buffered locally and the handshake is completed like connect() call. It returns similar errno like connect() if the TCP handshake fails. For non-blocking socket, it returns the number of bytes queued (and transmitted in the SYN-data packet) if cookie is available. If cookie is not available, it transmits a data-less SYN packet with Fast Open cookie request option and returns -EINPROGRESS like connect(). Using MSG_FASTOPEN on connecting or connected socket will result in simlar errno like repeating connect() calls. Therefore the application should only use this flag on new sockets. The buffer size of sendmsg() is independent of the MSS of the connection. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt11
-rw-r--r--include/linux/socket.h1
-rw-r--r--include/net/inet_common.h6
-rw-r--r--include/net/tcp.h3
-rw-r--r--net/ipv4/af_inet.c19
-rw-r--r--net/ipv4/tcp.c61
-rw-r--r--net/ipv4/tcp_ipv4.c3
7 files changed, 92 insertions, 12 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index e1e021594cff..03964e088180 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -468,6 +468,17 @@ tcp_syncookies - BOOLEAN
468 SYN flood warnings in logs not being really flooded, your server 468 SYN flood warnings in logs not being really flooded, your server
469 is seriously misconfigured. 469 is seriously misconfigured.
470 470
471tcp_fastopen - INTEGER
472 Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data
473 in the opening SYN packet. To use this feature, the client application
474 must not use connect(). Instead, it should use sendmsg() or sendto()
475 with MSG_FASTOPEN flag which performs a TCP handshake automatically.
476
477 The values (bitmap) are:
478 1: Enables sending data in the opening SYN on the client
479
480 Default: 0
481
471tcp_syn_retries - INTEGER 482tcp_syn_retries - INTEGER
472 Number of times initial SYNs for an active TCP connection attempt 483 Number of times initial SYNs for an active TCP connection attempt
473 will be retransmitted. Should not be higher than 255. Default value 484 will be retransmitted. Should not be higher than 255. Default value
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 25d6322fb635..ba7b2e817cfa 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -268,6 +268,7 @@ struct ucred {
268#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ 268#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
269#define MSG_EOF MSG_FIN 269#define MSG_EOF MSG_FIN
270 270
271#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
271#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file 272#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file
272 descriptor received through 273 descriptor received through
273 SCM_RIGHTS */ 274 SCM_RIGHTS */
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 22fac9892b16..234008782c8c 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -14,9 +14,11 @@ struct sockaddr;
14struct socket; 14struct socket;
15 15
16extern int inet_release(struct socket *sock); 16extern int inet_release(struct socket *sock);
17extern int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, 17extern int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
18 int addr_len, int flags); 18 int addr_len, int flags);
19extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, 19extern int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
20 int addr_len, int flags);
21extern int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
20 int addr_len, int flags); 22 int addr_len, int flags);
21extern int inet_accept(struct socket *sock, struct socket *newsock, int flags); 23extern int inet_accept(struct socket *sock, struct socket *newsock, int flags);
22extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock, 24extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 867557b4244a..c0258100d70c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -212,6 +212,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
212/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */ 212/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
213#define TCP_INIT_CWND 10 213#define TCP_INIT_CWND 10
214 214
215/* Bit Flags for sysctl_tcp_fastopen */
216#define TFO_CLIENT_ENABLE 1
217
215extern struct inet_timewait_death_row tcp_death_row; 218extern struct inet_timewait_death_row tcp_death_row;
216 219
217/* sysctl variables for tcp */ 220/* sysctl variables for tcp */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index edc414625be2..fe4582ca969a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -585,8 +585,8 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
585 * Connect to a remote host. There is regrettably still a little 585 * Connect to a remote host. There is regrettably still a little
586 * TCP 'magic' in here. 586 * TCP 'magic' in here.
587 */ 587 */
588int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, 588int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
589 int addr_len, int flags) 589 int addr_len, int flags)
590{ 590{
591 struct sock *sk = sock->sk; 591 struct sock *sk = sock->sk;
592 int err; 592 int err;
@@ -595,8 +595,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
595 if (addr_len < sizeof(uaddr->sa_family)) 595 if (addr_len < sizeof(uaddr->sa_family))
596 return -EINVAL; 596 return -EINVAL;
597 597
598 lock_sock(sk);
599
600 if (uaddr->sa_family == AF_UNSPEC) { 598 if (uaddr->sa_family == AF_UNSPEC) {
601 err = sk->sk_prot->disconnect(sk, flags); 599 err = sk->sk_prot->disconnect(sk, flags);
602 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 600 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
@@ -663,7 +661,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
663 sock->state = SS_CONNECTED; 661 sock->state = SS_CONNECTED;
664 err = 0; 662 err = 0;
665out: 663out:
666 release_sock(sk);
667 return err; 664 return err;
668 665
669sock_error: 666sock_error:
@@ -673,6 +670,18 @@ sock_error:
673 sock->state = SS_DISCONNECTING; 670 sock->state = SS_DISCONNECTING;
674 goto out; 671 goto out;
675} 672}
673EXPORT_SYMBOL(__inet_stream_connect);
674
675int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
676 int addr_len, int flags)
677{
678 int err;
679
680 lock_sock(sock->sk);
681 err = __inet_stream_connect(sock, uaddr, addr_len, flags);
682 release_sock(sock->sk);
683 return err;
684}
676EXPORT_SYMBOL(inet_stream_connect); 685EXPORT_SYMBOL(inet_stream_connect);
677 686
678/* 687/*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4252cd8f39fd..581ecf02c6b5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -270,6 +270,7 @@
270#include <linux/slab.h> 270#include <linux/slab.h>
271 271
272#include <net/icmp.h> 272#include <net/icmp.h>
273#include <net/inet_common.h>
273#include <net/tcp.h> 274#include <net/tcp.h>
274#include <net/xfrm.h> 275#include <net/xfrm.h>
275#include <net/ip.h> 276#include <net/ip.h>
@@ -982,26 +983,67 @@ static inline int select_size(const struct sock *sk, bool sg)
982 return tmp; 983 return tmp;
983} 984}
984 985
986void tcp_free_fastopen_req(struct tcp_sock *tp)
987{
988 if (tp->fastopen_req != NULL) {
989 kfree(tp->fastopen_req);
990 tp->fastopen_req = NULL;
991 }
992}
993
994static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
995{
996 struct tcp_sock *tp = tcp_sk(sk);
997 int err, flags;
998
999 if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
1000 return -EOPNOTSUPP;
1001 if (tp->fastopen_req != NULL)
1002 return -EALREADY; /* Another Fast Open is in progress */
1003
1004 tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request),
1005 sk->sk_allocation);
1006 if (unlikely(tp->fastopen_req == NULL))
1007 return -ENOBUFS;
1008 tp->fastopen_req->data = msg;
1009
1010 flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
1011 err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
1012 msg->msg_namelen, flags);
1013 *size = tp->fastopen_req->copied;
1014 tcp_free_fastopen_req(tp);
1015 return err;
1016}
1017
985int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1018int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
986 size_t size) 1019 size_t size)
987{ 1020{
988 struct iovec *iov; 1021 struct iovec *iov;
989 struct tcp_sock *tp = tcp_sk(sk); 1022 struct tcp_sock *tp = tcp_sk(sk);
990 struct sk_buff *skb; 1023 struct sk_buff *skb;
991 int iovlen, flags, err, copied; 1024 int iovlen, flags, err, copied = 0;
992 int mss_now = 0, size_goal; 1025 int mss_now = 0, size_goal, copied_syn = 0, offset = 0;
993 bool sg; 1026 bool sg;
994 long timeo; 1027 long timeo;
995 1028
996 lock_sock(sk); 1029 lock_sock(sk);
997 1030
998 flags = msg->msg_flags; 1031 flags = msg->msg_flags;
1032 if (flags & MSG_FASTOPEN) {
1033 err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
1034 if (err == -EINPROGRESS && copied_syn > 0)
1035 goto out;
1036 else if (err)
1037 goto out_err;
1038 offset = copied_syn;
1039 }
1040
999 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1041 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1000 1042
1001 /* Wait for a connection to finish. */ 1043 /* Wait for a connection to finish. */
1002 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) 1044 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1003 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) 1045 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
1004 goto out_err; 1046 goto do_error;
1005 1047
1006 if (unlikely(tp->repair)) { 1048 if (unlikely(tp->repair)) {
1007 if (tp->repair_queue == TCP_RECV_QUEUE) { 1049 if (tp->repair_queue == TCP_RECV_QUEUE) {
@@ -1037,6 +1079,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1037 unsigned char __user *from = iov->iov_base; 1079 unsigned char __user *from = iov->iov_base;
1038 1080
1039 iov++; 1081 iov++;
1082 if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */
1083 if (offset >= seglen) {
1084 offset -= seglen;
1085 continue;
1086 }
1087 seglen -= offset;
1088 from += offset;
1089 offset = 0;
1090 }
1040 1091
1041 while (seglen > 0) { 1092 while (seglen > 0) {
1042 int copy = 0; 1093 int copy = 0;
@@ -1199,7 +1250,7 @@ out:
1199 if (copied && likely(!tp->repair)) 1250 if (copied && likely(!tp->repair))
1200 tcp_push(sk, flags, mss_now, tp->nonagle); 1251 tcp_push(sk, flags, mss_now, tp->nonagle);
1201 release_sock(sk); 1252 release_sock(sk);
1202 return copied; 1253 return copied + copied_syn;
1203 1254
1204do_fault: 1255do_fault:
1205 if (!skb->len) { 1256 if (!skb->len) {
@@ -1212,7 +1263,7 @@ do_fault:
1212 } 1263 }
1213 1264
1214do_error: 1265do_error:
1215 if (copied) 1266 if (copied + copied_syn)
1216 goto out; 1267 goto out;
1217out_err: 1268out_err:
1218 err = sk_stream_error(sk, flags, err); 1269 err = sk_stream_error(sk, flags, err);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 01aa77a97020..1d8b75a58981 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1952,6 +1952,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
1952 tp->cookie_values = NULL; 1952 tp->cookie_values = NULL;
1953 } 1953 }
1954 1954
1955 /* If socket is aborted during connect operation */
1956 tcp_free_fastopen_req(tp);
1957
1955 sk_sockets_allocated_dec(sk); 1958 sk_sockets_allocated_dec(sk);
1956 sock_release_memcg(sk); 1959 sock_release_memcg(sk);
1957} 1960}