diff options
author | Jerry Chu <hkchu@google.com> | 2012-08-31 08:29:12 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-31 20:02:19 -0400 |
commit | 8336886f786fdacbc19b719c1f7ea91eb70706d4 (patch) | |
tree | c1fa912f7583ce0ffcb5ae673802da4a7dfb3b19 /net/ipv4/tcp.c | |
parent | 1046716368979dee857a2b8a91c4a8833f21b9cb (diff) |
tcp: TCP Fast Open Server - support TFO listeners
This patch builds on top of the previous patch to add the support
for TFO listeners. This includes -
1. allocating, properly initializing, and managing the per listener
fastopen_queue structure when TFO is enabled
2. changes to the inet_csk_accept code to support TFO. E.g., the
request_sock can no longer be freed upon accept(), not until 3WHS
finishes
3. allowing a TCP_SYN_RECV socket to properly poll() and sendmsg()
if it's a TFO socket
4. properly closing a TFO listener, and a TFO socket before 3WHS
finishes
5. supporting TCP_FASTOPEN socket option
6. modifying tcp_check_req() to use to check a TFO socket as well
as request_sock
7. supporting TCP's TFO cookie option
8. adding a new SYN-ACK retransmit handler to use the timer directly
off the TFO socket rather than the listener socket. Note that TFO
server side will not retransmit anything other than SYN-ACK until
the 3WHS is completed.
The patch also contains an important function
"reqsk_fastopen_remove()" to manage the somewhat complex relation
between a listener, its request_sock, and the corresponding child
socket. See the comment above the function for the detail.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 49 |
1 files changed, 42 insertions, 7 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2109ff4a1daf..df83d744e380 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -486,8 +486,9 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
486 | if (sk->sk_shutdown & RCV_SHUTDOWN) | 486 | if (sk->sk_shutdown & RCV_SHUTDOWN) |
487 | mask |= POLLIN | POLLRDNORM | POLLRDHUP; | 487 | mask |= POLLIN | POLLRDNORM | POLLRDHUP; |
488 | 488 | ||
489 | /* Connected? */ | 489 | /* Connected or passive Fast Open socket? */ |
490 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 490 | if (sk->sk_state != TCP_SYN_SENT && |
491 | (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) { | ||
491 | int target = sock_rcvlowat(sk, 0, INT_MAX); | 492 | int target = sock_rcvlowat(sk, 0, INT_MAX); |
492 | 493 | ||
493 | if (tp->urg_seq == tp->copied_seq && | 494 | if (tp->urg_seq == tp->copied_seq && |
@@ -840,10 +841,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
840 | ssize_t copied; | 841 | ssize_t copied; |
841 | long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 842 | long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
842 | 843 | ||
843 | /* Wait for a connection to finish. */ | 844 | /* Wait for a connection to finish. One exception is TCP Fast Open |
844 | if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) | 845 | * (passive side) where data is allowed to be sent before a connection |
846 | * is fully established. | ||
847 | */ | ||
848 | if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && | ||
849 | !tcp_passive_fastopen(sk)) { | ||
845 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) | 850 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) |
846 | goto out_err; | 851 | goto out_err; |
852 | } | ||
847 | 853 | ||
848 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 854 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
849 | 855 | ||
@@ -1042,10 +1048,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1042 | 1048 | ||
1043 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 1049 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
1044 | 1050 | ||
1045 | /* Wait for a connection to finish. */ | 1051 | /* Wait for a connection to finish. One exception is TCP Fast Open |
1046 | if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) | 1052 | * (passive side) where data is allowed to be sent before a connection |
1053 | * is fully established. | ||
1054 | */ | ||
1055 | if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && | ||
1056 | !tcp_passive_fastopen(sk)) { | ||
1047 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) | 1057 | if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) |
1048 | goto do_error; | 1058 | goto do_error; |
1059 | } | ||
1049 | 1060 | ||
1050 | if (unlikely(tp->repair)) { | 1061 | if (unlikely(tp->repair)) { |
1051 | if (tp->repair_queue == TCP_RECV_QUEUE) { | 1062 | if (tp->repair_queue == TCP_RECV_QUEUE) { |
@@ -2144,6 +2155,10 @@ void tcp_close(struct sock *sk, long timeout) | |||
2144 | * they look as CLOSING or LAST_ACK for Linux) | 2155 | * they look as CLOSING or LAST_ACK for Linux) |
2145 | * Probably, I missed some more holelets. | 2156 | * Probably, I missed some more holelets. |
2146 | * --ANK | 2157 | * --ANK |
2158 | * XXX (TFO) - To start off we don't support SYN+ACK+FIN | ||
2159 | * in a single packet! (May consider it later but will | ||
2160 | * probably need API support or TCP_CORK SYN-ACK until | ||
2161 | * data is written and socket is closed.) | ||
2147 | */ | 2162 | */ |
2148 | tcp_send_fin(sk); | 2163 | tcp_send_fin(sk); |
2149 | } | 2164 | } |
@@ -2215,8 +2230,16 @@ adjudge_to_death: | |||
2215 | } | 2230 | } |
2216 | } | 2231 | } |
2217 | 2232 | ||
2218 | if (sk->sk_state == TCP_CLOSE) | 2233 | if (sk->sk_state == TCP_CLOSE) { |
2234 | struct request_sock *req = tcp_sk(sk)->fastopen_rsk; | ||
2235 | /* We could get here with a non-NULL req if the socket is | ||
2236 | * aborted (e.g., closed with unread data) before 3WHS | ||
2237 | * finishes. | ||
2238 | */ | ||
2239 | if (req != NULL) | ||
2240 | reqsk_fastopen_remove(sk, req, false); | ||
2219 | inet_csk_destroy_sock(sk); | 2241 | inet_csk_destroy_sock(sk); |
2242 | } | ||
2220 | /* Otherwise, socket is reprieved until protocol close. */ | 2243 | /* Otherwise, socket is reprieved until protocol close. */ |
2221 | 2244 | ||
2222 | out: | 2245 | out: |
@@ -2688,6 +2711,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2688 | else | 2711 | else |
2689 | icsk->icsk_user_timeout = msecs_to_jiffies(val); | 2712 | icsk->icsk_user_timeout = msecs_to_jiffies(val); |
2690 | break; | 2713 | break; |
2714 | |||
2715 | case TCP_FASTOPEN: | ||
2716 | if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | | ||
2717 | TCPF_LISTEN))) | ||
2718 | err = fastopen_init_queue(sk, val); | ||
2719 | else | ||
2720 | err = -EINVAL; | ||
2721 | break; | ||
2691 | default: | 2722 | default: |
2692 | err = -ENOPROTOOPT; | 2723 | err = -ENOPROTOOPT; |
2693 | break; | 2724 | break; |
@@ -3501,11 +3532,15 @@ EXPORT_SYMBOL(tcp_cookie_generator); | |||
3501 | 3532 | ||
3502 | void tcp_done(struct sock *sk) | 3533 | void tcp_done(struct sock *sk) |
3503 | { | 3534 | { |
3535 | struct request_sock *req = tcp_sk(sk)->fastopen_rsk; | ||
3536 | |||
3504 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) | 3537 | if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) |
3505 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); | 3538 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); |
3506 | 3539 | ||
3507 | tcp_set_state(sk, TCP_CLOSE); | 3540 | tcp_set_state(sk, TCP_CLOSE); |
3508 | tcp_clear_xmit_timers(sk); | 3541 | tcp_clear_xmit_timers(sk); |
3542 | if (req != NULL) | ||
3543 | reqsk_fastopen_remove(sk, req, false); | ||
3509 | 3544 | ||
3510 | sk->sk_shutdown = SHUTDOWN_MASK; | 3545 | sk->sk_shutdown = SHUTDOWN_MASK; |
3511 | 3546 | ||