aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/tcp.h3
-rw-r--r--include/net/inet_sock.h6
-rw-r--r--include/net/tcp.h1
-rw-r--r--include/uapi/linux/tcp.h1
-rw-r--r--net/ipv4/af_inet.c31
-rw-r--r--net/ipv4/tcp.c35
-rw-r--r--net/ipv4/tcp_fastopen.c33
-rw-r--r--net/ipv4/tcp_ipv4.c7
-rw-r--r--net/ipv6/tcp_ipv6.c5
9 files changed, 111 insertions, 11 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 5371b3d70cfe..f88f4649ba6f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -222,7 +222,8 @@ struct tcp_sock {
222 u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ 222 u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
223 u8 chrono_type:2, /* current chronograph type */ 223 u8 chrono_type:2, /* current chronograph type */
224 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ 224 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
225 unused:5; 225 fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
226 unused:4;
226 u8 nonagle : 4,/* Disable Nagle algorithm? */ 227 u8 nonagle : 4,/* Disable Nagle algorithm? */
227 thin_lto : 1,/* Use linear timeouts for thin streams */ 228 thin_lto : 1,/* Use linear timeouts for thin streams */
228 unused1 : 1, 229 unused1 : 1,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index c9cff977a7fb..aa95053dfc78 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -206,7 +206,11 @@ struct inet_sock {
206 transparent:1, 206 transparent:1,
207 mc_all:1, 207 mc_all:1,
208 nodefrag:1; 208 nodefrag:1;
209 __u8 bind_address_no_port:1; 209 __u8 bind_address_no_port:1,
210 defer_connect:1; /* Indicates that fastopen_connect is set
211 * and cookie exists so we defer connect
212 * until first data frame is written
213 */
210 __u8 rcv_tos; 214 __u8 rcv_tos;
211 __u8 convert_csum; 215 __u8 convert_csum;
212 int uc_index; 216 int uc_index;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index de67541d7adf..6ec4ea652f3f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1495,6 +1495,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
1495void tcp_fastopen_init_key_once(bool publish); 1495void tcp_fastopen_init_key_once(bool publish);
1496bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, 1496bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
1497 struct tcp_fastopen_cookie *cookie); 1497 struct tcp_fastopen_cookie *cookie);
1498bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
1498#define TCP_FASTOPEN_KEY_LENGTH 16 1499#define TCP_FASTOPEN_KEY_LENGTH 16
1499 1500
1500/* Fastopen key context */ 1501/* Fastopen key context */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index c53de2691cec..6ff35eb48d10 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -116,6 +116,7 @@ enum {
116#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ 116#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */
117#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ 117#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */
118#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ 118#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */
119#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */
119 120
120struct tcp_repair_opt { 121struct tcp_repair_opt {
121 __u32 opt_code; 122 __u32 opt_code;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 28fe8da4e1ac..92e7f3e957fa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -576,13 +576,24 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
576 int err; 576 int err;
577 long timeo; 577 long timeo;
578 578
579 if (addr_len < sizeof(uaddr->sa_family)) 579 /*
580 return -EINVAL; 580 * uaddr can be NULL and addr_len can be 0 if:
581 * sk is a TCP fastopen active socket and
582 * TCP_FASTOPEN_CONNECT sockopt is set and
583 * we already have a valid cookie for this socket.
584 * In this case, user can call write() after connect().
585 * write() will invoke tcp_sendmsg_fastopen() which calls
586 * __inet_stream_connect().
587 */
588 if (uaddr) {
589 if (addr_len < sizeof(uaddr->sa_family))
590 return -EINVAL;
581 591
582 if (uaddr->sa_family == AF_UNSPEC) { 592 if (uaddr->sa_family == AF_UNSPEC) {
583 err = sk->sk_prot->disconnect(sk, flags); 593 err = sk->sk_prot->disconnect(sk, flags);
584 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 594 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
585 goto out; 595 goto out;
596 }
586 } 597 }
587 598
588 switch (sock->state) { 599 switch (sock->state) {
@@ -593,7 +604,10 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
593 err = -EISCONN; 604 err = -EISCONN;
594 goto out; 605 goto out;
595 case SS_CONNECTING: 606 case SS_CONNECTING:
596 err = -EALREADY; 607 if (inet_sk(sk)->defer_connect)
608 err = -EINPROGRESS;
609 else
610 err = -EALREADY;
597 /* Fall out of switch with err, set for this state */ 611 /* Fall out of switch with err, set for this state */
598 break; 612 break;
599 case SS_UNCONNECTED: 613 case SS_UNCONNECTED:
@@ -607,6 +621,9 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
607 621
608 sock->state = SS_CONNECTING; 622 sock->state = SS_CONNECTING;
609 623
624 if (!err && inet_sk(sk)->defer_connect)
625 goto out;
626
610 /* Just entered SS_CONNECTING state; the only 627 /* Just entered SS_CONNECTING state; the only
611 * difference is that return value in non-blocking 628 * difference is that return value in non-blocking
612 * case is EINPROGRESS, rather than EALREADY. 629 * case is EINPROGRESS, rather than EALREADY.
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c43eb1a831d7..d9735b76d073 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -533,6 +533,12 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
533 533
534 if (tp->urg_data & TCP_URG_VALID) 534 if (tp->urg_data & TCP_URG_VALID)
535 mask |= POLLPRI; 535 mask |= POLLPRI;
536 } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
537 /* Active TCP fastopen socket with defer_connect
538 * Return POLLOUT so application can call write()
539 * in order for kernel to generate SYN+data
540 */
541 mask |= POLLOUT | POLLWRNORM;
536 } 542 }
537 /* This barrier is coupled with smp_wmb() in tcp_reset() */ 543 /* This barrier is coupled with smp_wmb() in tcp_reset() */
538 smp_rmb(); 544 smp_rmb();
@@ -1071,6 +1077,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1071 int *copied, size_t size) 1077 int *copied, size_t size)
1072{ 1078{
1073 struct tcp_sock *tp = tcp_sk(sk); 1079 struct tcp_sock *tp = tcp_sk(sk);
1080 struct inet_sock *inet = inet_sk(sk);
1074 int err, flags; 1081 int err, flags;
1075 1082
1076 if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) 1083 if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
@@ -1085,9 +1092,19 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1085 tp->fastopen_req->data = msg; 1092 tp->fastopen_req->data = msg;
1086 tp->fastopen_req->size = size; 1093 tp->fastopen_req->size = size;
1087 1094
1095 if (inet->defer_connect) {
1096 err = tcp_connect(sk);
1097 /* Same failure procedure as in tcp_v4/6_connect */
1098 if (err) {
1099 tcp_set_state(sk, TCP_CLOSE);
1100 inet->inet_dport = 0;
1101 sk->sk_route_caps = 0;
1102 }
1103 }
1088 flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; 1104 flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
1089 err = __inet_stream_connect(sk->sk_socket, msg->msg_name, 1105 err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
1090 msg->msg_namelen, flags); 1106 msg->msg_namelen, flags);
1107 inet->defer_connect = 0;
1091 *copied = tp->fastopen_req->copied; 1108 *copied = tp->fastopen_req->copied;
1092 tcp_free_fastopen_req(tp); 1109 tcp_free_fastopen_req(tp);
1093 return err; 1110 return err;
@@ -1107,7 +1124,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
1107 lock_sock(sk); 1124 lock_sock(sk);
1108 1125
1109 flags = msg->msg_flags; 1126 flags = msg->msg_flags;
1110 if (flags & MSG_FASTOPEN) { 1127 if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
1111 err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); 1128 err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
1112 if (err == -EINPROGRESS && copied_syn > 0) 1129 if (err == -EINPROGRESS && copied_syn > 0)
1113 goto out; 1130 goto out;
@@ -2656,6 +2673,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2656 err = -EINVAL; 2673 err = -EINVAL;
2657 } 2674 }
2658 break; 2675 break;
2676 case TCP_FASTOPEN_CONNECT:
2677 if (val > 1 || val < 0) {
2678 err = -EINVAL;
2679 } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
2680 if (sk->sk_state == TCP_CLOSE)
2681 tp->fastopen_connect = val;
2682 else
2683 err = -EINVAL;
2684 } else {
2685 err = -EOPNOTSUPP;
2686 }
2687 break;
2659 case TCP_TIMESTAMP: 2688 case TCP_TIMESTAMP:
2660 if (!tp->repair) 2689 if (!tp->repair)
2661 err = -EPERM; 2690 err = -EPERM;
@@ -3016,6 +3045,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
3016 val = icsk->icsk_accept_queue.fastopenq.max_qlen; 3045 val = icsk->icsk_accept_queue.fastopenq.max_qlen;
3017 break; 3046 break;
3018 3047
3048 case TCP_FASTOPEN_CONNECT:
3049 val = tp->fastopen_connect;
3050 break;
3051
3019 case TCP_TIMESTAMP: 3052 case TCP_TIMESTAMP:
3020 val = tcp_time_stamp + tp->tsoffset; 3053 val = tcp_time_stamp + tp->tsoffset;
3021 break; 3054 break;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f90e09e1ff4c..9674bec4a0f8 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -346,3 +346,36 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
346 } 346 }
347 return cookie->len > 0; 347 return cookie->len > 0;
348} 348}
349
350/* This function checks if we want to defer sending SYN until the first
351 * write(). We defer under the following conditions:
352 * 1. fastopen_connect sockopt is set
353 * 2. we have a valid cookie
354 * Return value: return true if we want to defer until application writes data
355 * return false if we want to send out SYN immediately
356 */
357bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
358{
359 struct tcp_fastopen_cookie cookie = { .len = 0 };
360 struct tcp_sock *tp = tcp_sk(sk);
361 u16 mss;
362
363 if (tp->fastopen_connect && !tp->fastopen_req) {
364 if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) {
365 inet_sk(sk)->defer_connect = 1;
366 return true;
367 }
368
369 /* Alloc fastopen_req in order for FO option to be included
370 * in SYN
371 */
372 tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req),
373 sk->sk_allocation);
374 if (tp->fastopen_req)
375 tp->fastopen_req->cookie = cookie;
376 else
377 *err = -ENOBUFS;
378 }
379 return false;
380}
381EXPORT_SYMBOL(tcp_fastopen_defer_connect);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a90b4540c11e..8c9e9aa17d66 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -232,6 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
232 /* OK, now commit destination to socket. */ 232 /* OK, now commit destination to socket. */
233 sk->sk_gso_type = SKB_GSO_TCPV4; 233 sk->sk_gso_type = SKB_GSO_TCPV4;
234 sk_setup_caps(sk, &rt->dst); 234 sk_setup_caps(sk, &rt->dst);
235 rt = NULL;
235 236
236 if (!tp->write_seq && likely(!tp->repair)) 237 if (!tp->write_seq && likely(!tp->repair))
237 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 238 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
@@ -242,9 +243,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
242 243
243 inet->inet_id = tp->write_seq ^ jiffies; 244 inet->inet_id = tp->write_seq ^ jiffies;
244 245
246 if (tcp_fastopen_defer_connect(sk, &err))
247 return err;
248 if (err)
249 goto failure;
250
245 err = tcp_connect(sk); 251 err = tcp_connect(sk);
246 252
247 rt = NULL;
248 if (err) 253 if (err)
249 goto failure; 254 goto failure;
250 255
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0b7cd3d009b6..95c05e5293b1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -287,6 +287,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
287 inet->inet_dport, 287 inet->inet_dport,
288 &tp->tsoffset); 288 &tp->tsoffset);
289 289
290 if (tcp_fastopen_defer_connect(sk, &err))
291 return err;
292 if (err)
293 goto late_failure;
294
290 err = tcp_connect(sk); 295 err = tcp_connect(sk);
291 if (err) 296 if (err)
292 goto late_failure; 297 goto late_failure;