diff options
author | Yuchung Cheng <ycheng@google.com> | 2012-07-19 02:43:09 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-07-19 14:02:03 -0400 |
commit | cf60af03ca4e71134206809ea892e49b92a88896 (patch) | |
tree | 478ee362f10d0737fbc4e6642e2966abe0cd1397 /net/ipv4 | |
parent | 8e4178c1c7b52f7c99f5fd22ef7af6b2bff409e3 (diff) |
net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN)
sendmsg() (or sendto()) with MSG_FASTOPEN is a combo of connect(2)
and write(2). The application should replace connect() with it to
send data in the opening SYN packet.
For blocking socket, sendmsg() blocks until all the data are buffered
locally and the handshake is completed like connect() call. It
returns similar errno like connect() if the TCP handshake fails.
For non-blocking socket, it returns the number of bytes queued (and
transmitted in the SYN-data packet) if cookie is available. If cookie
is not available, it transmits a data-less SYN packet with Fast Open
cookie request option and returns -EINPROGRESS like connect().
Using MSG_FASTOPEN on connecting or connected socket will result in
simlar errno like repeating connect() calls. Therefore the application
should only use this flag on new sockets.
The buffer size of sendmsg() is independent of the MSS of the connection.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 61 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 3 |
3 files changed, 73 insertions, 10 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index edc414625be2..fe4582ca969a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -585,8 +585,8 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) | |||
585 | * Connect to a remote host. There is regrettably still a little | 585 | * Connect to a remote host. There is regrettably still a little |
586 | * TCP 'magic' in here. | 586 | * TCP 'magic' in here. |
587 | */ | 587 | */ |
588 | int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, | 588 | int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, |
589 | int addr_len, int flags) | 589 | int addr_len, int flags) |
590 | { | 590 | { |
591 | struct sock *sk = sock->sk; | 591 | struct sock *sk = sock->sk; |
592 | int err; | 592 | int err; |
@@ -595,8 +595,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, | |||
595 | if (addr_len < sizeof(uaddr->sa_family)) | 595 | if (addr_len < sizeof(uaddr->sa_family)) |
596 | return -EINVAL; | 596 | return -EINVAL; |
597 | 597 | ||
598 | lock_sock(sk); | ||
599 | |||
600 | if (uaddr->sa_family == AF_UNSPEC) { | 598 | if (uaddr->sa_family == AF_UNSPEC) { |
601 | err = sk->sk_prot->disconnect(sk, flags); | 599 | err = sk->sk_prot->disconnect(sk, flags); |
602 | sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; | 600 | sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; |
@@ -663,7 +661,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, | |||
663 | sock->state = SS_CONNECTED; | 661 | sock->state = SS_CONNECTED; |
664 | err = 0; | 662 | err = 0; |
665 | out: | 663 | out: |
666 | release_sock(sk); | ||
667 | return err; | 664 | return err; |
668 | 665 | ||
669 | sock_error: | 666 | sock_error: |
@@ -673,6 +670,18 @@ sock_error: | |||
673 | sock->state = SS_DISCONNECTING; | 670 | sock->state = SS_DISCONNECTING; |
674 | goto out; | 671 | goto out; |
675 | } | 672 | } |
673 | EXPORT_SYMBOL(__inet_stream_connect); | ||
674 | |||
675 | int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, | ||
676 | int addr_len, int flags) | ||
677 | { | ||
678 | int err; | ||
679 | |||
680 | lock_sock(sock->sk); | ||
681 | err = __inet_stream_connect(sock, uaddr, addr_len, flags); | ||
682 | release_sock(sock->sk); | ||
683 | return err; | ||
684 | } | ||
676 | EXPORT_SYMBOL(inet_stream_connect); | 685 | EXPORT_SYMBOL(inet_stream_connect); |
677 | 686 | ||
678 | /* | 687 | /* |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4252cd8f39fd..581ecf02c6b5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -270,6 +270,7 @@ | |||
270 | #include <linux/slab.h> | 270 | #include <linux/slab.h> |
271 | 271 | ||
272 | #include <net/icmp.h> | 272 | #include <net/icmp.h> |
273 | #include <net/inet_common.h> | ||
273 | #include <net/tcp.h> | 274 | #include <net/tcp.h> |
274 | #include <net/xfrm.h> | 275 | #include <net/xfrm.h> |
275 | #include <net/ip.h> | 276 | #include <net/ip.h> |
@@ -982,26 +983,67 @@ static inline int select_size(const struct sock *sk, bool sg) | |||
982 | return tmp; | 983 | return tmp; |
983 | } | 984 | } |
984 | 985 | ||
986 | void tcp_free_fastopen_req(struct tcp_sock *tp) | ||
987 | { | ||
988 | if (tp->fastopen_req != NULL) { | ||
989 | kfree(tp->fastopen_req); | ||
990 | tp->fastopen_req = NULL; | ||
991 | } | ||
992 | } | ||
993 | |||
994 | static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) | ||
995 | { | ||
996 | struct tcp_sock *tp = tcp_sk(sk); | ||
997 | int err, flags; | ||
998 | |||
999 | if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) | ||
1000 | return -EOPNOTSUPP; | ||
1001 | if (tp->fastopen_req != NULL) | ||
1002 | return -EALREADY; /* Another Fast Open is in progress */ | ||
1003 | |||
1004 | tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), | ||
1005 | sk->sk_allocation); | ||
1006 | if (unlikely(tp->fastopen_req == NULL)) | ||
1007 | return -ENOBUFS; | ||
1008 | tp->fastopen_req->data = msg; | ||
1009 | |||
1010 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; | ||
1011 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, | ||
1012 | msg->msg_namelen, flags); | ||
1013 | *size = tp->fastopen_req->copied; | ||
1014 | tcp_free_fastopen_req(tp); | ||
1015 | return err; | ||
1016 | } | ||
1017 | |||
985 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 1018 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
986 | size_t size) | 1019 | size_t size) |
987 | { | 1020 | { |
988 | struct iovec *iov; | 1021 | struct iovec *iov; |
989 | struct tcp_sock *tp = tcp_sk(sk); | 1022 | struct tcp_sock *tp = tcp_sk(sk); |
990 | struct sk_buff *skb; | 1023 | struct sk_buff *skb; |
991 | int iovlen, flags, err, copied; | 1024 | int iovlen, flags, err, copied = 0; |
992 | int mss_now = 0, size_goal; | 1025 | int mss_now = 0, size_goal, copied_syn = 0, offset = 0; |
993 | bool sg; | 1026 | bool sg; |
994 | long timeo; | 1027 | long timeo; |
995 | 1028 | ||
996 | lock_sock(sk); | 1029 | lock_sock(sk); |
997 | 1030 | ||
998 | flags = msg->msg_flags; | 1031 | flags = msg->msg_flags; |
1032 | if (flags & MSG_FASTOPEN) { | ||
1033 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); | ||
1034 | if (err == -EINPROGRESS && copied_syn > 0) | ||
1035 | goto out; | ||
1036 | else if (err) | ||
1037 | goto out_err; | ||
1038 | offset = copied_syn; | ||
1039 | } | ||
1040 | |||
999 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 1041 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
1000 | 1042 | ||
1001 | /* Wait for a connection to finish. */ | 1043 | /* Wait for a connection to finish. */ |
1002 | if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) | 1044 | if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) |
1003 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) | 1045 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) |
1004 | goto out_err; | 1046 | goto do_error; |
1005 | 1047 | ||
1006 | if (unlikely(tp->repair)) { | 1048 | if (unlikely(tp->repair)) { |
1007 | if (tp->repair_queue == TCP_RECV_QUEUE) { | 1049 | if (tp->repair_queue == TCP_RECV_QUEUE) { |
@@ -1037,6 +1079,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1037 | unsigned char __user *from = iov->iov_base; | 1079 | unsigned char __user *from = iov->iov_base; |
1038 | 1080 | ||
1039 | iov++; | 1081 | iov++; |
1082 | if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ | ||
1083 | if (offset >= seglen) { | ||
1084 | offset -= seglen; | ||
1085 | continue; | ||
1086 | } | ||
1087 | seglen -= offset; | ||
1088 | from += offset; | ||
1089 | offset = 0; | ||
1090 | } | ||
1040 | 1091 | ||
1041 | while (seglen > 0) { | 1092 | while (seglen > 0) { |
1042 | int copy = 0; | 1093 | int copy = 0; |
@@ -1199,7 +1250,7 @@ out: | |||
1199 | if (copied && likely(!tp->repair)) | 1250 | if (copied && likely(!tp->repair)) |
1200 | tcp_push(sk, flags, mss_now, tp->nonagle); | 1251 | tcp_push(sk, flags, mss_now, tp->nonagle); |
1201 | release_sock(sk); | 1252 | release_sock(sk); |
1202 | return copied; | 1253 | return copied + copied_syn; |
1203 | 1254 | ||
1204 | do_fault: | 1255 | do_fault: |
1205 | if (!skb->len) { | 1256 | if (!skb->len) { |
@@ -1212,7 +1263,7 @@ do_fault: | |||
1212 | } | 1263 | } |
1213 | 1264 | ||
1214 | do_error: | 1265 | do_error: |
1215 | if (copied) | 1266 | if (copied + copied_syn) |
1216 | goto out; | 1267 | goto out; |
1217 | out_err: | 1268 | out_err: |
1218 | err = sk_stream_error(sk, flags, err); | 1269 | err = sk_stream_error(sk, flags, err); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 01aa77a97020..1d8b75a58981 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1952,6 +1952,9 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
1952 | tp->cookie_values = NULL; | 1952 | tp->cookie_values = NULL; |
1953 | } | 1953 | } |
1954 | 1954 | ||
1955 | /* If socket is aborted during connect operation */ | ||
1956 | tcp_free_fastopen_req(tp); | ||
1957 | |||
1955 | sk_sockets_allocated_dec(sk); | 1958 | sk_sockets_allocated_dec(sk); |
1956 | sock_release_memcg(sk); | 1959 | sock_release_memcg(sk); |
1957 | } | 1960 | } |