diff options
author | Jerry Chu <hkchu@google.com> | 2012-08-31 08:29:12 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-31 20:02:19 -0400 |
commit | 8336886f786fdacbc19b719c1f7ea91eb70706d4 (patch) | |
tree | c1fa912f7583ce0ffcb5ae673802da4a7dfb3b19 /net/ipv4/inet_connection_sock.c | |
parent | 1046716368979dee857a2b8a91c4a8833f21b9cb (diff) |
tcp: TCP Fast Open Server - support TFO listeners
This patch builds on top of the previous patch to add the support
for TFO listeners. This includes -
1. allocating, properly initializing, and managing the per listener
fastopen_queue structure when TFO is enabled
2. changes to the inet_csk_accept code to support TFO. E.g., the
request_sock can no longer be freed upon accept(), not until 3WHS
finishes
3. allowing a TCP_SYN_RECV socket to properly poll() and sendmsg()
if it's a TFO socket
4. properly closing a TFO listener, and a TFO socket before 3WHS
finishes
5. supporting TCP_FASTOPEN socket option
6. modifying tcp_check_req() to use to check a TFO socket as well
as request_sock
7. supporting TCP's TFO cookie option
8. adding a new SYN-ACK retransmit handler to use the timer directly
off the TFO socket rather than the listener socket. Note that TFO
server side will not retransmit anything other than SYN-ACK until
the 3WHS is completed.
The patch also contains an important function
"reqsk_fastopen_remove()" to manage the somewhat complex relation
between a listener, its request_sock, and the corresponding child
socket. See the comment above the function for the detail.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_connection_sock.c')
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 57 |
1 files changed, 51 insertions, 6 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7f75f21d7b83..8464b79c493f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) | |||
283 | struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | 283 | struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) |
284 | { | 284 | { |
285 | struct inet_connection_sock *icsk = inet_csk(sk); | 285 | struct inet_connection_sock *icsk = inet_csk(sk); |
286 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; | ||
286 | struct sock *newsk; | 287 | struct sock *newsk; |
288 | struct request_sock *req; | ||
287 | int error; | 289 | int error; |
288 | 290 | ||
289 | lock_sock(sk); | 291 | lock_sock(sk); |
@@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | |||
296 | goto out_err; | 298 | goto out_err; |
297 | 299 | ||
298 | /* Find already established connection */ | 300 | /* Find already established connection */ |
299 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { | 301 | if (reqsk_queue_empty(queue)) { |
300 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); | 302 | long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); |
301 | 303 | ||
302 | /* If this is a non blocking socket don't sleep */ | 304 | /* If this is a non blocking socket don't sleep */ |
@@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) | |||
308 | if (error) | 310 | if (error) |
309 | goto out_err; | 311 | goto out_err; |
310 | } | 312 | } |
311 | 313 | req = reqsk_queue_remove(queue); | |
312 | newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); | 314 | newsk = req->sk; |
313 | WARN_ON(newsk->sk_state == TCP_SYN_RECV); | 315 | |
316 | sk_acceptq_removed(sk); | ||
317 | if (sk->sk_type == SOCK_STREAM && queue->fastopenq != NULL) { | ||
318 | spin_lock_bh(&queue->fastopenq->lock); | ||
319 | if (tcp_rsk(req)->listener) { | ||
320 | /* We are still waiting for the final ACK from 3WHS | ||
321 | * so can't free req now. Instead, we set req->sk to | ||
322 | * NULL to signify that the child socket is taken | ||
323 | * so reqsk_fastopen_remove() will free the req | ||
324 | * when 3WHS finishes (or is aborted). | ||
325 | */ | ||
326 | req->sk = NULL; | ||
327 | req = NULL; | ||
328 | } | ||
329 | spin_unlock_bh(&queue->fastopenq->lock); | ||
330 | } | ||
314 | out: | 331 | out: |
315 | release_sock(sk); | 332 | release_sock(sk); |
333 | if (req) | ||
334 | __reqsk_free(req); | ||
316 | return newsk; | 335 | return newsk; |
317 | out_err: | 336 | out_err: |
318 | newsk = NULL; | 337 | newsk = NULL; |
338 | req = NULL; | ||
319 | *err = error; | 339 | *err = error; |
320 | goto out; | 340 | goto out; |
321 | } | 341 | } |
@@ -720,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start); | |||
720 | void inet_csk_listen_stop(struct sock *sk) | 740 | void inet_csk_listen_stop(struct sock *sk) |
721 | { | 741 | { |
722 | struct inet_connection_sock *icsk = inet_csk(sk); | 742 | struct inet_connection_sock *icsk = inet_csk(sk); |
743 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; | ||
723 | struct request_sock *acc_req; | 744 | struct request_sock *acc_req; |
724 | struct request_sock *req; | 745 | struct request_sock *req; |
725 | 746 | ||
726 | inet_csk_delete_keepalive_timer(sk); | 747 | inet_csk_delete_keepalive_timer(sk); |
727 | 748 | ||
728 | /* make all the listen_opt local to us */ | 749 | /* make all the listen_opt local to us */ |
729 | acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); | 750 | acc_req = reqsk_queue_yank_acceptq(queue); |
730 | 751 | ||
731 | /* Following specs, it would be better either to send FIN | 752 | /* Following specs, it would be better either to send FIN |
732 | * (and enter FIN-WAIT-1, it is normal close) | 753 | * (and enter FIN-WAIT-1, it is normal close) |
@@ -736,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk) | |||
736 | * To be honest, we are not able to make either | 757 | * To be honest, we are not able to make either |
737 | * of the variants now. --ANK | 758 | * of the variants now. --ANK |
738 | */ | 759 | */ |
739 | reqsk_queue_destroy(&icsk->icsk_accept_queue); | 760 | reqsk_queue_destroy(queue); |
740 | 761 | ||
741 | while ((req = acc_req) != NULL) { | 762 | while ((req = acc_req) != NULL) { |
742 | struct sock *child = req->sk; | 763 | struct sock *child = req->sk; |
@@ -754,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk) | |||
754 | 775 | ||
755 | percpu_counter_inc(sk->sk_prot->orphan_count); | 776 | percpu_counter_inc(sk->sk_prot->orphan_count); |
756 | 777 | ||
778 | if (sk->sk_type == SOCK_STREAM && tcp_rsk(req)->listener) { | ||
779 | BUG_ON(tcp_sk(child)->fastopen_rsk != req); | ||
780 | BUG_ON(sk != tcp_rsk(req)->listener); | ||
781 | |||
782 | /* Paranoid, to prevent race condition if | ||
783 | * an inbound pkt destined for child is | ||
784 | * blocked by sock lock in tcp_v4_rcv(). | ||
785 | * Also to satisfy an assertion in | ||
786 | * tcp_v4_destroy_sock(). | ||
787 | */ | ||
788 | tcp_sk(child)->fastopen_rsk = NULL; | ||
789 | sock_put(sk); | ||
790 | } | ||
757 | inet_csk_destroy_sock(child); | 791 | inet_csk_destroy_sock(child); |
758 | 792 | ||
759 | bh_unlock_sock(child); | 793 | bh_unlock_sock(child); |
@@ -763,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk) | |||
763 | sk_acceptq_removed(sk); | 797 | sk_acceptq_removed(sk); |
764 | __reqsk_free(req); | 798 | __reqsk_free(req); |
765 | } | 799 | } |
800 | if (queue->fastopenq != NULL) { | ||
801 | /* Free all the reqs queued in rskq_rst_head. */ | ||
802 | spin_lock_bh(&queue->fastopenq->lock); | ||
803 | acc_req = queue->fastopenq->rskq_rst_head; | ||
804 | queue->fastopenq->rskq_rst_head = NULL; | ||
805 | spin_unlock_bh(&queue->fastopenq->lock); | ||
806 | while ((req = acc_req) != NULL) { | ||
807 | acc_req = req->dl_next; | ||
808 | __reqsk_free(req); | ||
809 | } | ||
810 | } | ||
766 | WARN_ON(sk->sk_ack_backlog); | 811 | WARN_ON(sk->sk_ack_backlog); |
767 | } | 812 | } |
768 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | 813 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); |