aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/inet_connection_sock.c
diff options
context:
space:
mode:
authorJerry Chu <hkchu@google.com>2012-08-31 08:29:12 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-31 20:02:19 -0400
commit8336886f786fdacbc19b719c1f7ea91eb70706d4 (patch)
treec1fa912f7583ce0ffcb5ae673802da4a7dfb3b19 /net/ipv4/inet_connection_sock.c
parent1046716368979dee857a2b8a91c4a8833f21b9cb (diff)
tcp: TCP Fast Open Server - support TFO listeners
This patch builds on top of the previous patch to add the support for TFO listeners. This includes - 1. allocating, properly initializing, and managing the per listener fastopen_queue structure when TFO is enabled 2. changes to the inet_csk_accept code to support TFO. E.g., the request_sock can no longer be freed upon accept(), not until 3WHS finishes 3. allowing a TCP_SYN_RECV socket to properly poll() and sendmsg() if it's a TFO socket 4. properly closing a TFO listener, and a TFO socket before 3WHS finishes 5. supporting TCP_FASTOPEN socket option 6. modifying tcp_check_req() to use to check a TFO socket as well as request_sock 7. supporting TCP's TFO cookie option 8. adding a new SYN-ACK retransmit handler to use the timer directly off the TFO socket rather than the listener socket. Note that TFO server side will not retransmit anything other than SYN-ACK until the 3WHS is completed. The patch also contains an important function "reqsk_fastopen_remove()" to manage the somewhat complex relation between a listener, its request_sock, and the corresponding child socket. See the comment above the function for the detail. Signed-off-by: H.K. Jerry Chu <hkchu@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_connection_sock.c')
-rw-r--r--net/ipv4/inet_connection_sock.c57
1 files changed, 51 insertions, 6 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7f75f21d7b83..8464b79c493f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
283struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) 283struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
284{ 284{
285 struct inet_connection_sock *icsk = inet_csk(sk); 285 struct inet_connection_sock *icsk = inet_csk(sk);
286 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
286 struct sock *newsk; 287 struct sock *newsk;
288 struct request_sock *req;
287 int error; 289 int error;
288 290
289 lock_sock(sk); 291 lock_sock(sk);
@@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
296 goto out_err; 298 goto out_err;
297 299
298 /* Find already established connection */ 300 /* Find already established connection */
299 if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { 301 if (reqsk_queue_empty(queue)) {
300 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 302 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
301 303
302 /* If this is a non blocking socket don't sleep */ 304 /* If this is a non blocking socket don't sleep */
@@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
308 if (error) 310 if (error)
309 goto out_err; 311 goto out_err;
310 } 312 }
311 313 req = reqsk_queue_remove(queue);
312 newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); 314 newsk = req->sk;
313 WARN_ON(newsk->sk_state == TCP_SYN_RECV); 315
316 sk_acceptq_removed(sk);
317 if (sk->sk_type == SOCK_STREAM && queue->fastopenq != NULL) {
318 spin_lock_bh(&queue->fastopenq->lock);
319 if (tcp_rsk(req)->listener) {
320 /* We are still waiting for the final ACK from 3WHS
321 * so can't free req now. Instead, we set req->sk to
322 * NULL to signify that the child socket is taken
323 * so reqsk_fastopen_remove() will free the req
324 * when 3WHS finishes (or is aborted).
325 */
326 req->sk = NULL;
327 req = NULL;
328 }
329 spin_unlock_bh(&queue->fastopenq->lock);
330 }
314out: 331out:
315 release_sock(sk); 332 release_sock(sk);
333 if (req)
334 __reqsk_free(req);
316 return newsk; 335 return newsk;
317out_err: 336out_err:
318 newsk = NULL; 337 newsk = NULL;
338 req = NULL;
319 *err = error; 339 *err = error;
320 goto out; 340 goto out;
321} 341}
@@ -720,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start);
720void inet_csk_listen_stop(struct sock *sk) 740void inet_csk_listen_stop(struct sock *sk)
721{ 741{
722 struct inet_connection_sock *icsk = inet_csk(sk); 742 struct inet_connection_sock *icsk = inet_csk(sk);
743 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
723 struct request_sock *acc_req; 744 struct request_sock *acc_req;
724 struct request_sock *req; 745 struct request_sock *req;
725 746
726 inet_csk_delete_keepalive_timer(sk); 747 inet_csk_delete_keepalive_timer(sk);
727 748
728 /* make all the listen_opt local to us */ 749 /* make all the listen_opt local to us */
729 acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); 750 acc_req = reqsk_queue_yank_acceptq(queue);
730 751
731 /* Following specs, it would be better either to send FIN 752 /* Following specs, it would be better either to send FIN
732 * (and enter FIN-WAIT-1, it is normal close) 753 * (and enter FIN-WAIT-1, it is normal close)
@@ -736,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk)
736 * To be honest, we are not able to make either 757 * To be honest, we are not able to make either
737 * of the variants now. --ANK 758 * of the variants now. --ANK
738 */ 759 */
739 reqsk_queue_destroy(&icsk->icsk_accept_queue); 760 reqsk_queue_destroy(queue);
740 761
741 while ((req = acc_req) != NULL) { 762 while ((req = acc_req) != NULL) {
742 struct sock *child = req->sk; 763 struct sock *child = req->sk;
@@ -754,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk)
754 775
755 percpu_counter_inc(sk->sk_prot->orphan_count); 776 percpu_counter_inc(sk->sk_prot->orphan_count);
756 777
778 if (sk->sk_type == SOCK_STREAM && tcp_rsk(req)->listener) {
779 BUG_ON(tcp_sk(child)->fastopen_rsk != req);
780 BUG_ON(sk != tcp_rsk(req)->listener);
781
782 /* Paranoid, to prevent race condition if
783 * an inbound pkt destined for child is
784 * blocked by sock lock in tcp_v4_rcv().
785 * Also to satisfy an assertion in
786 * tcp_v4_destroy_sock().
787 */
788 tcp_sk(child)->fastopen_rsk = NULL;
789 sock_put(sk);
790 }
757 inet_csk_destroy_sock(child); 791 inet_csk_destroy_sock(child);
758 792
759 bh_unlock_sock(child); 793 bh_unlock_sock(child);
@@ -763,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk)
763 sk_acceptq_removed(sk); 797 sk_acceptq_removed(sk);
764 __reqsk_free(req); 798 __reqsk_free(req);
765 } 799 }
800 if (queue->fastopenq != NULL) {
801 /* Free all the reqs queued in rskq_rst_head. */
802 spin_lock_bh(&queue->fastopenq->lock);
803 acc_req = queue->fastopenq->rskq_rst_head;
804 queue->fastopenq->rskq_rst_head = NULL;
805 spin_unlock_bh(&queue->fastopenq->lock);
806 while ((req = acc_req) != NULL) {
807 acc_req = req->dl_next;
808 __reqsk_free(req);
809 }
810 }
766 WARN_ON(sk->sk_ack_backlog); 811 WARN_ON(sk->sk_ack_backlog);
767} 812}
768EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 813EXPORT_SYMBOL_GPL(inet_csk_listen_stop);