diff options
-rw-r--r-- | include/linux/tcp.h | 3 | ||||
-rw-r--r-- | include/net/inet_sock.h | 6 | ||||
-rw-r--r-- | include/net/tcp.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/tcp.h | 1 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 35 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 33 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 7 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 5 |
9 files changed, 111 insertions, 11 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5371b3d70cfe..f88f4649ba6f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -222,7 +222,8 @@ struct tcp_sock { | |||
222 | u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ | 222 | u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ |
223 | u8 chrono_type:2, /* current chronograph type */ | 223 | u8 chrono_type:2, /* current chronograph type */ |
224 | rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ | 224 | rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ |
225 | unused:5; | 225 | fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ |
226 | unused:4; | ||
226 | u8 nonagle : 4,/* Disable Nagle algorithm? */ | 227 | u8 nonagle : 4,/* Disable Nagle algorithm? */ |
227 | thin_lto : 1,/* Use linear timeouts for thin streams */ | 228 | thin_lto : 1,/* Use linear timeouts for thin streams */ |
228 | unused1 : 1, | 229 | unused1 : 1, |
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c9cff977a7fb..aa95053dfc78 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h | |||
@@ -206,7 +206,11 @@ struct inet_sock { | |||
206 | transparent:1, | 206 | transparent:1, |
207 | mc_all:1, | 207 | mc_all:1, |
208 | nodefrag:1; | 208 | nodefrag:1; |
209 | __u8 bind_address_no_port:1; | 209 | __u8 bind_address_no_port:1, |
210 | defer_connect:1; /* Indicates that fastopen_connect is set | ||
211 | * and cookie exists so we defer connect | ||
212 | * until first data frame is written | ||
213 | */ | ||
210 | __u8 rcv_tos; | 214 | __u8 rcv_tos; |
211 | __u8 convert_csum; | 215 | __u8 convert_csum; |
212 | int uc_index; | 216 | int uc_index; |
diff --git a/include/net/tcp.h b/include/net/tcp.h index de67541d7adf..6ec4ea652f3f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -1495,6 +1495,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, | |||
1495 | void tcp_fastopen_init_key_once(bool publish); | 1495 | void tcp_fastopen_init_key_once(bool publish); |
1496 | bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, | 1496 | bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, |
1497 | struct tcp_fastopen_cookie *cookie); | 1497 | struct tcp_fastopen_cookie *cookie); |
1498 | bool tcp_fastopen_defer_connect(struct sock *sk, int *err); | ||
1498 | #define TCP_FASTOPEN_KEY_LENGTH 16 | 1499 | #define TCP_FASTOPEN_KEY_LENGTH 16 |
1499 | 1500 | ||
1500 | /* Fastopen key context */ | 1501 | /* Fastopen key context */ |
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index c53de2691cec..6ff35eb48d10 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h | |||
@@ -116,6 +116,7 @@ enum { | |||
116 | #define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ | 116 | #define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ |
117 | #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ | 117 | #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ |
118 | #define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ | 118 | #define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ |
119 | #define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ | ||
119 | 120 | ||
120 | struct tcp_repair_opt { | 121 | struct tcp_repair_opt { |
121 | __u32 opt_code; | 122 | __u32 opt_code; |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 28fe8da4e1ac..92e7f3e957fa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -576,13 +576,24 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, | |||
576 | int err; | 576 | int err; |
577 | long timeo; | 577 | long timeo; |
578 | 578 | ||
579 | if (addr_len < sizeof(uaddr->sa_family)) | 579 | /* |
580 | return -EINVAL; | 580 | * uaddr can be NULL and addr_len can be 0 if: |
581 | * sk is a TCP fastopen active socket and | ||
582 | * TCP_FASTOPEN_CONNECT sockopt is set and | ||
583 | * we already have a valid cookie for this socket. | ||
584 | * In this case, user can call write() after connect(). | ||
585 | * write() will invoke tcp_sendmsg_fastopen() which calls | ||
586 | * __inet_stream_connect(). | ||
587 | */ | ||
588 | if (uaddr) { | ||
589 | if (addr_len < sizeof(uaddr->sa_family)) | ||
590 | return -EINVAL; | ||
581 | 591 | ||
582 | if (uaddr->sa_family == AF_UNSPEC) { | 592 | if (uaddr->sa_family == AF_UNSPEC) { |
583 | err = sk->sk_prot->disconnect(sk, flags); | 593 | err = sk->sk_prot->disconnect(sk, flags); |
584 | sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; | 594 | sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; |
585 | goto out; | 595 | goto out; |
596 | } | ||
586 | } | 597 | } |
587 | 598 | ||
588 | switch (sock->state) { | 599 | switch (sock->state) { |
@@ -593,7 +604,10 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, | |||
593 | err = -EISCONN; | 604 | err = -EISCONN; |
594 | goto out; | 605 | goto out; |
595 | case SS_CONNECTING: | 606 | case SS_CONNECTING: |
596 | err = -EALREADY; | 607 | if (inet_sk(sk)->defer_connect) |
608 | err = -EINPROGRESS; | ||
609 | else | ||
610 | err = -EALREADY; | ||
597 | /* Fall out of switch with err, set for this state */ | 611 | /* Fall out of switch with err, set for this state */ |
598 | break; | 612 | break; |
599 | case SS_UNCONNECTED: | 613 | case SS_UNCONNECTED: |
@@ -607,6 +621,9 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, | |||
607 | 621 | ||
608 | sock->state = SS_CONNECTING; | 622 | sock->state = SS_CONNECTING; |
609 | 623 | ||
624 | if (!err && inet_sk(sk)->defer_connect) | ||
625 | goto out; | ||
626 | |||
610 | /* Just entered SS_CONNECTING state; the only | 627 | /* Just entered SS_CONNECTING state; the only |
611 | * difference is that return value in non-blocking | 628 | * difference is that return value in non-blocking |
612 | * case is EINPROGRESS, rather than EALREADY. | 629 | * case is EINPROGRESS, rather than EALREADY. |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c43eb1a831d7..d9735b76d073 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -533,6 +533,12 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
533 | 533 | ||
534 | if (tp->urg_data & TCP_URG_VALID) | 534 | if (tp->urg_data & TCP_URG_VALID) |
535 | mask |= POLLPRI; | 535 | mask |= POLLPRI; |
536 | } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { | ||
537 | /* Active TCP fastopen socket with defer_connect | ||
538 | * Return POLLOUT so application can call write() | ||
539 | * in order for kernel to generate SYN+data | ||
540 | */ | ||
541 | mask |= POLLOUT | POLLWRNORM; | ||
536 | } | 542 | } |
537 | /* This barrier is coupled with smp_wmb() in tcp_reset() */ | 543 | /* This barrier is coupled with smp_wmb() in tcp_reset() */ |
538 | smp_rmb(); | 544 | smp_rmb(); |
@@ -1071,6 +1077,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, | |||
1071 | int *copied, size_t size) | 1077 | int *copied, size_t size) |
1072 | { | 1078 | { |
1073 | struct tcp_sock *tp = tcp_sk(sk); | 1079 | struct tcp_sock *tp = tcp_sk(sk); |
1080 | struct inet_sock *inet = inet_sk(sk); | ||
1074 | int err, flags; | 1081 | int err, flags; |
1075 | 1082 | ||
1076 | if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) | 1083 | if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) |
@@ -1085,9 +1092,19 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, | |||
1085 | tp->fastopen_req->data = msg; | 1092 | tp->fastopen_req->data = msg; |
1086 | tp->fastopen_req->size = size; | 1093 | tp->fastopen_req->size = size; |
1087 | 1094 | ||
1095 | if (inet->defer_connect) { | ||
1096 | err = tcp_connect(sk); | ||
1097 | /* Same failure procedure as in tcp_v4/6_connect */ | ||
1098 | if (err) { | ||
1099 | tcp_set_state(sk, TCP_CLOSE); | ||
1100 | inet->inet_dport = 0; | ||
1101 | sk->sk_route_caps = 0; | ||
1102 | } | ||
1103 | } | ||
1088 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; | 1104 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; |
1089 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, | 1105 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, |
1090 | msg->msg_namelen, flags); | 1106 | msg->msg_namelen, flags); |
1107 | inet->defer_connect = 0; | ||
1091 | *copied = tp->fastopen_req->copied; | 1108 | *copied = tp->fastopen_req->copied; |
1092 | tcp_free_fastopen_req(tp); | 1109 | tcp_free_fastopen_req(tp); |
1093 | return err; | 1110 | return err; |
@@ -1107,7 +1124,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) | |||
1107 | lock_sock(sk); | 1124 | lock_sock(sk); |
1108 | 1125 | ||
1109 | flags = msg->msg_flags; | 1126 | flags = msg->msg_flags; |
1110 | if (flags & MSG_FASTOPEN) { | 1127 | if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) { |
1111 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); | 1128 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); |
1112 | if (err == -EINPROGRESS && copied_syn > 0) | 1129 | if (err == -EINPROGRESS && copied_syn > 0) |
1113 | goto out; | 1130 | goto out; |
@@ -2656,6 +2673,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2656 | err = -EINVAL; | 2673 | err = -EINVAL; |
2657 | } | 2674 | } |
2658 | break; | 2675 | break; |
2676 | case TCP_FASTOPEN_CONNECT: | ||
2677 | if (val > 1 || val < 0) { | ||
2678 | err = -EINVAL; | ||
2679 | } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { | ||
2680 | if (sk->sk_state == TCP_CLOSE) | ||
2681 | tp->fastopen_connect = val; | ||
2682 | else | ||
2683 | err = -EINVAL; | ||
2684 | } else { | ||
2685 | err = -EOPNOTSUPP; | ||
2686 | } | ||
2687 | break; | ||
2659 | case TCP_TIMESTAMP: | 2688 | case TCP_TIMESTAMP: |
2660 | if (!tp->repair) | 2689 | if (!tp->repair) |
2661 | err = -EPERM; | 2690 | err = -EPERM; |
@@ -3016,6 +3045,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
3016 | val = icsk->icsk_accept_queue.fastopenq.max_qlen; | 3045 | val = icsk->icsk_accept_queue.fastopenq.max_qlen; |
3017 | break; | 3046 | break; |
3018 | 3047 | ||
3048 | case TCP_FASTOPEN_CONNECT: | ||
3049 | val = tp->fastopen_connect; | ||
3050 | break; | ||
3051 | |||
3019 | case TCP_TIMESTAMP: | 3052 | case TCP_TIMESTAMP: |
3020 | val = tcp_time_stamp + tp->tsoffset; | 3053 | val = tcp_time_stamp + tp->tsoffset; |
3021 | break; | 3054 | break; |
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f90e09e1ff4c..9674bec4a0f8 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c | |||
@@ -346,3 +346,36 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, | |||
346 | } | 346 | } |
347 | return cookie->len > 0; | 347 | return cookie->len > 0; |
348 | } | 348 | } |
349 | |||
350 | /* This function checks if we want to defer sending SYN until the first | ||
351 | * write(). We defer under the following conditions: | ||
352 | * 1. fastopen_connect sockopt is set | ||
353 | * 2. we have a valid cookie | ||
354 | * Return value: return true if we want to defer until application writes data | ||
355 | * return false if we want to send out SYN immediately | ||
356 | */ | ||
357 | bool tcp_fastopen_defer_connect(struct sock *sk, int *err) | ||
358 | { | ||
359 | struct tcp_fastopen_cookie cookie = { .len = 0 }; | ||
360 | struct tcp_sock *tp = tcp_sk(sk); | ||
361 | u16 mss; | ||
362 | |||
363 | if (tp->fastopen_connect && !tp->fastopen_req) { | ||
364 | if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) { | ||
365 | inet_sk(sk)->defer_connect = 1; | ||
366 | return true; | ||
367 | } | ||
368 | |||
369 | /* Alloc fastopen_req in order for FO option to be included | ||
370 | * in SYN | ||
371 | */ | ||
372 | tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), | ||
373 | sk->sk_allocation); | ||
374 | if (tp->fastopen_req) | ||
375 | tp->fastopen_req->cookie = cookie; | ||
376 | else | ||
377 | *err = -ENOBUFS; | ||
378 | } | ||
379 | return false; | ||
380 | } | ||
381 | EXPORT_SYMBOL(tcp_fastopen_defer_connect); | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a90b4540c11e..8c9e9aa17d66 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -232,6 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
232 | /* OK, now commit destination to socket. */ | 232 | /* OK, now commit destination to socket. */ |
233 | sk->sk_gso_type = SKB_GSO_TCPV4; | 233 | sk->sk_gso_type = SKB_GSO_TCPV4; |
234 | sk_setup_caps(sk, &rt->dst); | 234 | sk_setup_caps(sk, &rt->dst); |
235 | rt = NULL; | ||
235 | 236 | ||
236 | if (!tp->write_seq && likely(!tp->repair)) | 237 | if (!tp->write_seq && likely(!tp->repair)) |
237 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, | 238 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, |
@@ -242,9 +243,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
242 | 243 | ||
243 | inet->inet_id = tp->write_seq ^ jiffies; | 244 | inet->inet_id = tp->write_seq ^ jiffies; |
244 | 245 | ||
246 | if (tcp_fastopen_defer_connect(sk, &err)) | ||
247 | return err; | ||
248 | if (err) | ||
249 | goto failure; | ||
250 | |||
245 | err = tcp_connect(sk); | 251 | err = tcp_connect(sk); |
246 | 252 | ||
247 | rt = NULL; | ||
248 | if (err) | 253 | if (err) |
249 | goto failure; | 254 | goto failure; |
250 | 255 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0b7cd3d009b6..95c05e5293b1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -287,6 +287,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
287 | inet->inet_dport, | 287 | inet->inet_dport, |
288 | &tp->tsoffset); | 288 | &tp->tsoffset); |
289 | 289 | ||
290 | if (tcp_fastopen_defer_connect(sk, &err)) | ||
291 | return err; | ||
292 | if (err) | ||
293 | goto late_failure; | ||
294 | |||
290 | err = tcp_connect(sk); | 295 | err = tcp_connect(sk); |
291 | if (err) | 296 | if (err) |
292 | goto late_failure; | 297 | goto late_failure; |