Merge branch 'tcp-fastopen-ipv6'

Yuchung Cheng says: ==================== tcp: IPv6 support for fastopen server This patch series add IPv6 support for fastopen server. To minimize code duplication in IPv4 and IPv6, the current v4 only code is refactored and common code is moved into net/ipv4/tcp_fastopen.c. Also the current code uses a different function from tcp_v4_send_synack() to send the first SYN-ACK in fastopen. The new code eliminates this separate function by refactoring the child-socket and syn-ack creation code. After these refactoring in the first four patches, we can easily add the fastopen code in IPv6 by changing corresponding IPv6 functions. Note Fast Open client already supports IPv6. This patch is for the server-side (passive open) IPv6 support only. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2014-05-13 17:53:46 -0400
committer: David S. Miller <davem@davemloft.net> 2014-05-13 17:53:46 -0400
commit: ae8b42c6fc37ca1b7eb30898f5a65196bbb47291 (patch)
tree: a5e44c9427f0c6a5cb0e6252e3ccbd5ef4f7a50e
parent: 4b9734e547aaa947e56480ecf6d509cf9cc307cc (diff)
parent: 3a19ce0eec32667b835d8dc887002019fc6b3a02 (diff)
7 files changed, 323 insertions, 309 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4e37c71ecd74..bc35e4709e8e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -366,11 +366,6 @@ static inline bool tcp_passive_fastopen(const struct sock *sk)
                tcp_sk(sk)->fastopen_rsk != NULL);
 }
-static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc)
-{
-        return foc->len != -1;
-}
 extern void tcp_sock_destruct(struct sock *sk);
 static inline int fastopen_init_queue(struct sock *sk, int backlog)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3c9418456640..f5d6ca4a9d28 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -220,8 +220,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TFO_SERVER_ENABLE       2
 #define TFO_CLIENT_NO_COOKIE    4       /* Data in SYN w/o cookie option */
-/* Process SYN data but skip cookie validation */
-#define TFO_SERVER_COOKIE_NOT_CHKED     0x100
 /* Accept SYN data w/o any cookie option */
 #define TFO_SERVER_COOKIE_NOT_REQD      0x200
@@ -230,10 +228,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 */
 #define TFO_SERVER_WO_SOCKOPT1  0x400
 #define TFO_SERVER_WO_SOCKOPT2  0x800
-/* Always create TFO child sockets on a TFO listener even when
- * cookie/data not present. (For testing purpose!)
- */
-#define TFO_SERVER_ALWAYS       0x1000
 extern struct inet_timewait_death_row tcp_death_row;
@@ -1120,6 +1114,9 @@ static inline void tcp_openreq_init(struct request_sock *req,
        ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
 }
+extern void tcp_openreq_init_rwin(struct request_sock *req,
+                                  struct sock *sk, struct dst_entry *dst);
 void tcp_enter_memory_pressure(struct sock *sk);
 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
@@ -1329,8 +1326,10 @@ void tcp_free_fastopen_req(struct tcp_sock *tp);
 extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
 int tcp_fastopen_reset_cipher(void *key, unsigned int len);
-void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
+bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
-                             struct tcp_fastopen_cookie *foc);
+                      struct request_sock *req,
+                      struct tcp_fastopen_cookie *foc,
+                      struct dst_entry *dst);
 void tcp_fastopen_init_key_once(bool publish);
 #define TCP_FASTOPEN_KEY_LENGTH 16
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f195d9316e55..62e48cf84e60 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -72,25 +72,224 @@ error:		kfree(ctx);
        return err;
 }
-/* Computes the fastopen cookie for the IP path.
+static bool __tcp_fastopen_cookie_gen(const void *path,
- * The path is a 128 bits long (pad with zeros for IPv4).
+                                      struct tcp_fastopen_cookie *foc)
- *
- * The caller must check foc->len to determine if a valid cookie
- * has been generated successfully.
-*/
-void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
-                             struct tcp_fastopen_cookie *foc)
 {
-        __be32 path[4] = { src, dst, 0, 0 };
        struct tcp_fastopen_context *ctx;
+        bool ok = false;
        tcp_fastopen_init_key_once(true);
        rcu_read_lock();
        ctx = rcu_dereference(tcp_fastopen_ctx);
        if (ctx) {
-                crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path);
+                crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
                foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+                ok = true;
        }
        rcu_read_unlock();
+        return ok;
+}
+/* Generate the fastopen cookie by doing aes128 encryption on both
+ * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6
+ * addresses. For the longer IPv6 addresses use CBC-MAC.
+ *
+ * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
+ */
+static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+                                    struct sk_buff *syn,
+                                    struct tcp_fastopen_cookie *foc)
+{
+        if (req->rsk_ops->family == AF_INET) {
+                const struct iphdr *iph = ip_hdr(syn);
+                __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
+                return __tcp_fastopen_cookie_gen(path, foc);
+        }
+#if IS_ENABLED(CONFIG_IPV6)
+        if (req->rsk_ops->family == AF_INET6) {
+                const struct ipv6hdr *ip6h = ipv6_hdr(syn);
+                struct tcp_fastopen_cookie tmp;
+                if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+                        struct in6_addr *buf = (struct in6_addr *) tmp.val;
+                        int i = 4;
+                        for (i = 0; i < 4; i++)
+                                buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
+                        return __tcp_fastopen_cookie_gen(buf, foc);
+                }
+        }
+#endif
+        return false;
+}
+static bool tcp_fastopen_create_child(struct sock *sk,
+                                      struct sk_buff *skb,
+                                      struct dst_entry *dst,
+                                      struct request_sock *req)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+        struct sock *child;
+        req->num_retrans = 0;
+        req->num_timeout = 0;
+        req->sk = NULL;
+        child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+        if (child == NULL)
+                return false;
+        spin_lock(&queue->fastopenq->lock);
+        queue->fastopenq->qlen++;
+        spin_unlock(&queue->fastopenq->lock);
+        /* Initialize the child socket. Have to fix some values to take
+         * into account the child is a Fast Open socket and is created
+         * only out of the bits carried in the SYN packet.
+         */
+        tp = tcp_sk(child);
+        tp->fastopen_rsk = req;
+        /* Do a hold on the listner sk so that if the listener is being
+         * closed, the child that has been accepted can live on and still
+         * access listen_lock.
+         */
+        sock_hold(sk);
+        tcp_rsk(req)->listener = sk;
+        /* RFC1323: The window in SYN & SYN/ACK segments is never
+         * scaled. So correct it appropriately.
+         */
+        tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
+        /* Activate the retrans timer so that SYNACK can be retransmitted.
+         * The request socket is not added to the SYN table of the parent
+         * because it's been added to the accept queue directly.
+         */
+        inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
+                                  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+        /* Add the child socket directly into the accept queue */
+        inet_csk_reqsk_queue_add(sk, req, child);
+        /* Now finish processing the fastopen child socket. */
+        inet_csk(child)->icsk_af_ops->rebuild_header(child);
+        tcp_init_congestion_control(child);
+        tcp_mtup_init(child);
+        tcp_init_metrics(child);
+        tcp_init_buffer_space(child);
+        /* Queue the data carried in the SYN packet. We need to first
+         * bump skb's refcnt because the caller will attempt to free it.
+         *
+         * XXX (TFO) - we honor a zero-payload TFO request for now,
+         * (any reason not to?) but no need to queue the skb since
+         * there is no data. How about SYN+FIN?
+         */
+        if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) {
+                skb = skb_get(skb);
+                skb_dst_drop(skb);
+                __skb_pull(skb, tcp_hdr(skb)->doff * 4);
+                skb_set_owner_r(skb, child);
+                __skb_queue_tail(&child->sk_receive_queue, skb);
+                tp->syn_data_acked = 1;
+        }
+        tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+        sk->sk_data_ready(sk);
+        bh_unlock_sock(child);
+        sock_put(child);
+        WARN_ON(req->sk == NULL);
+        return true;
+}
+EXPORT_SYMBOL(tcp_fastopen_create_child);
+static bool tcp_fastopen_queue_check(struct sock *sk)
+{
+        struct fastopen_queue *fastopenq;
+        /* Make sure the listener has enabled fastopen, and we don't
+         * exceed the max # of pending TFO requests allowed before trying
+         * to validating the cookie in order to avoid burning CPU cycles
+         * unnecessarily.
+         *
+         * XXX (TFO) - The implication of checking the max_qlen before
+         * processing a cookie request is that clients can't differentiate
+         * between qlen overflow causing Fast Open to be disabled
+         * temporarily vs a server not supporting Fast Open at all.
+         */
+        fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
+        if (fastopenq == NULL || fastopenq->max_qlen == 0)
+                return false;
+        if (fastopenq->qlen >= fastopenq->max_qlen) {
+                struct request_sock *req1;
+                spin_lock(&fastopenq->lock);
+                req1 = fastopenq->rskq_rst_head;
+                if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
+                        spin_unlock(&fastopenq->lock);
+                        NET_INC_STATS_BH(sock_net(sk),
+                                         LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
+                        return false;
+                }
+                fastopenq->rskq_rst_head = req1->dl_next;
+                fastopenq->qlen--;
+                spin_unlock(&fastopenq->lock);
+                reqsk_free(req1);
+        }
+        return true;
+}
+/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
+ * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
+ * cookie request (foc->len == 0).
+ */
+bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+                      struct request_sock *req,
+                      struct tcp_fastopen_cookie *foc,
+                      struct dst_entry *dst)
+{
+        struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+        bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+        if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+              (syn_data || foc->len >= 0) &&
+              tcp_fastopen_queue_check(sk))) {
+                foc->len = -1;
+                return false;
+        }
+        if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+                goto fastopen;
+        if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+            foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
+            foc->len == valid_foc.len &&
+            !memcmp(foc->val, valid_foc.val, foc->len)) {
+                /* Cookie is valid. Create a (full) child socket to accept
+                 * the data in SYN before returning a SYN-ACK to ack the
+                 * data. If we fail to create the socket, fall back and
+                 * ack the ISN only but includes the same cookie.
+                 *
+                 * Note: Data-less SYN with valid cookie is allowed to send
+                 * data in SYN_RECV state.
+                 */
+fastopen:
+                if (tcp_fastopen_create_child(sk, skb, dst, req)) {
+                        foc->len = -1;
+                        NET_INC_STATS_BH(sock_net(sk),
+                                         LINUX_MIB_TCPFASTOPENPASSIVE);
+                        return true;
+                }
+        }
+        NET_INC_STATS_BH(sock_net(sk), foc->len ?
+                         LINUX_MIB_TCPFASTOPENPASSIVEFAIL :
+                         LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+        *foc = valid_foc;
+        return false;
 }
+EXPORT_SYMBOL(tcp_try_fastopen);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad166dcc278f..a2780e5334c9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -336,8 +336,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
        const int code = icmp_hdr(icmp_skb)->code;
        struct sock *sk;
        struct sk_buff *skb;
-        struct request_sock *req;
+        struct request_sock *fastopen;
-        __u32 seq;
+        __u32 seq, snd_una;
        __u32 remaining;
        int err;
        struct net *net = dev_net(icmp_skb->dev);
@@ -378,12 +378,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
        icsk = inet_csk(sk);
        tp = tcp_sk(sk);
-        req = tp->fastopen_rsk;
        seq = ntohl(th->seq);
+        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+        fastopen = tp->fastopen_rsk;
+        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
        if (sk->sk_state != TCP_LISTEN &&
-            !between(seq, tp->snd_una, tp->snd_nxt) &&
+            !between(seq, snd_una, tp->snd_nxt)) {
-            (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
-                /* For a Fast Open socket, allow seq to be snt_isn. */
                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
                goto out;
        }
@@ -426,11 +426,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
                        break;
                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
-                    !icsk->icsk_backoff)
+                    !icsk->icsk_backoff || fastopen)
                        break;
-                /* XXX (TFO) - revisit the following logic for TFO */
                if (sock_owned_by_user(sk))
                        break;
@@ -462,14 +460,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
                goto out;
        }
-        /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
-         * than following the TCP_SYN_RECV case and closing the socket,
-         * we ignore the ICMP error and keep trying like a fully established
-         * socket. Is this the right thing to do?
-         */
-        if (req && req->sk == NULL)
-                goto out;
        switch (sk->sk_state) {
                struct request_sock *req, **prev;
        case TCP_LISTEN:
@@ -502,10 +492,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
                goto out;
        case TCP_SYN_SENT:
-        case TCP_SYN_RECV:  /* Cannot happen.
+        case TCP_SYN_RECV:
-                               It can f.e. if SYNs crossed,
+                /* Only in fast or simultaneous open. If a fast open socket is
-                               or Fast Open.
+                 * is already accepted it is treated as a connected one below.
-                             */
+                 */
+                if (fastopen && fastopen->sk == NULL)
+                        break;
                if (!sock_owned_by_user(sk)) {
                        sk->sk_err = err;
@@ -822,7 +815,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 */
 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
                              struct request_sock *req,
-                              u16 queue_mapping)
+                              u16 queue_mapping,
+                              struct tcp_fastopen_cookie *foc)
 {
        const struct inet_request_sock *ireq = inet_rsk(req);
        struct flowi4 fl4;
@@ -833,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
                return -1;
-        skb = tcp_make_synack(sk, dst, req, NULL);
+        skb = tcp_make_synack(sk, dst, req, foc);
        if (skb) {
                __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -852,7 +846,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
 {
-        int res = tcp_v4_send_synack(sk, NULL, req, 0);
+        int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
        if (!res) {
                TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -1260,187 +1254,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 };
 #endif
-static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
-                               struct request_sock *req,
-                               struct tcp_fastopen_cookie *foc,
-                               struct tcp_fastopen_cookie *valid_foc)
-{
-        bool skip_cookie = false;
-        struct fastopen_queue *fastopenq;
-        if (likely(!fastopen_cookie_present(foc))) {
-                /* See include/net/tcp.h for the meaning of these knobs */
-                if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
-                    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
-                    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
-                        skip_cookie = true; /* no cookie to validate */
-                else
-                        return false;
-        }
-        fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
-        /* A FO option is present; bump the counter. */
-        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
-        /* Make sure the listener has enabled fastopen, and we don't
-         * exceed the max # of pending TFO requests allowed before trying
-         * to validating the cookie in order to avoid burning CPU cycles
-         * unnecessarily.
-         *
-         * XXX (TFO) - The implication of checking the max_qlen before
-         * processing a cookie request is that clients can't differentiate
-         * between qlen overflow causing Fast Open to be disabled
-         * temporarily vs a server not supporting Fast Open at all.
-         */
-        if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
-            fastopenq == NULL || fastopenq->max_qlen == 0)
-                return false;
-        if (fastopenq->qlen >= fastopenq->max_qlen) {
-                struct request_sock *req1;
-                spin_lock(&fastopenq->lock);
-                req1 = fastopenq->rskq_rst_head;
-                if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
-                        spin_unlock(&fastopenq->lock);
-                        NET_INC_STATS_BH(sock_net(sk),
-                            LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
-                        /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
-                        foc->len = -1;
-                        return false;
-                }
-                fastopenq->rskq_rst_head = req1->dl_next;
-                fastopenq->qlen--;
-                spin_unlock(&fastopenq->lock);
-                reqsk_free(req1);
-        }
-        if (skip_cookie) {
-                tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-                return true;
-        }
-        if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
-                if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
-                        tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-                                                ip_hdr(skb)->daddr, valid_foc);
-                        if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
-                            memcmp(&foc->val[0], &valid_foc->val[0],
-                            TCP_FASTOPEN_COOKIE_SIZE) != 0)
-                                return false;
-                        valid_foc->len = -1;
-                }
-                /* Acknowledge the data received from the peer. */
-                tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-                return true;
-        } else if (foc->len == 0) { /* Client requesting a cookie */
-                tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-                                        ip_hdr(skb)->daddr, valid_foc);
-                NET_INC_STATS_BH(sock_net(sk),
-                    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
-        } else {
-                /* Client sent a cookie with wrong size. Treat it
-                 * the same as invalid and return a valid one.
-                 */
-                tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-                                        ip_hdr(skb)->daddr, valid_foc);
-        }
-        return false;
-}
-static int tcp_v4_conn_req_fastopen(struct sock *sk,
-                                    struct sk_buff *skb,
-                                    struct sk_buff *skb_synack,
-                                    struct request_sock *req)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
-        const struct inet_request_sock *ireq = inet_rsk(req);
-        struct sock *child;
-        int err;
-        req->num_retrans = 0;
-        req->num_timeout = 0;
-        req->sk = NULL;
-        child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
-        if (child == NULL) {
-                NET_INC_STATS_BH(sock_net(sk),
-                                 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-                kfree_skb(skb_synack);
-                return -1;
-        }
-        err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
-                                    ireq->ir_rmt_addr, ireq->opt);
-        err = net_xmit_eval(err);
-        if (!err)
-                tcp_rsk(req)->snt_synack = tcp_time_stamp;
-        /* XXX (TFO) - is it ok to ignore error and continue? */
-        spin_lock(&queue->fastopenq->lock);
-        queue->fastopenq->qlen++;
-        spin_unlock(&queue->fastopenq->lock);
-        /* Initialize the child socket. Have to fix some values to take
-         * into account the child is a Fast Open socket and is created
-         * only out of the bits carried in the SYN packet.
-         */
-        tp = tcp_sk(child);
-        tp->fastopen_rsk = req;
-        /* Do a hold on the listner sk so that if the listener is being
-         * closed, the child that has been accepted can live on and still
-         * access listen_lock.
-         */
-        sock_hold(sk);
-        tcp_rsk(req)->listener = sk;
-        /* RFC1323: The window in SYN & SYN/ACK segments is never
-         * scaled. So correct it appropriately.
-         */
-        tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
-        /* Activate the retrans timer so that SYNACK can be retransmitted.
-         * The request socket is not added to the SYN table of the parent
-         * because it's been added to the accept queue directly.
-         */
-        inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
-            TCP_TIMEOUT_INIT, TCP_RTO_MAX);
-        /* Add the child socket directly into the accept queue */
-        inet_csk_reqsk_queue_add(sk, req, child);
-        /* Now finish processing the fastopen child socket. */
-        inet_csk(child)->icsk_af_ops->rebuild_header(child);
-        tcp_init_congestion_control(child);
-        tcp_mtup_init(child);
-        tcp_init_metrics(child);
-        tcp_init_buffer_space(child);
-        /* Queue the data carried in the SYN packet. We need to first
-         * bump skb's refcnt because the caller will attempt to free it.
-         *
-         * XXX (TFO) - we honor a zero-payload TFO request for now.
-         * (Any reason not to?)
-         */
-        if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
-                /* Don't queue the skb if there is no payload in SYN.
-                 * XXX (TFO) - How about SYN+FIN?
-                 */
-                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-        } else {
-                skb = skb_get(skb);
-                skb_dst_drop(skb);
-                __skb_pull(skb, tcp_hdr(skb)->doff * 4);
-                skb_set_owner_r(skb, child);
-                __skb_queue_tail(&child->sk_receive_queue, skb);
-                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-                tp->syn_data_acked = 1;
-        }
-        sk->sk_data_ready(sk);
-        bh_unlock_sock(child);
-        sock_put(child);
-        WARN_ON(req->sk == NULL);
-        return 0;
-}
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_options_received tmp_opt;
@@ -1451,12 +1264,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        __be32 saddr = ip_hdr(skb)->saddr;
        __be32 daddr = ip_hdr(skb)->daddr;
        __u32 isn = TCP_SKB_CB(skb)->when;
-        bool want_cookie = false;
+        bool want_cookie = false, fastopen;
        struct flowi4 fl4;
        struct tcp_fastopen_cookie foc = { .len = -1 };
-        struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+        int err;
-        struct sk_buff *skb_synack;
-        int do_fastopen;
        /* Never answer to SYNs send to broadcast or multicast */
        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1555,52 +1366,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                isn = tcp_v4_init_sequence(skb);
        }
-        tcp_rsk(req)->snt_isn = isn;
+        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
-        if (dst == NULL) {
-                dst = inet_csk_route_req(sk, &fl4, req);
-                if (dst == NULL)
-                        goto drop_and_free;
-        }
-        do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
-        /* We don't call tcp_v4_send_synack() directly because we need
-         * to make sure a child socket can be created successfully before
-         * sending back synack!
-         *
-         * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
-         * (or better yet, call tcp_send_synack() in the child context
-         * directly, but will have to fix bunch of other code first)
-         * after syn_recv_sock() except one will need to first fix the
-         * latter to remove its dependency on the current implementation
-         * of tcp_v4_send_synack()->tcp_select_initial_window().
-         */
-        skb_synack = tcp_make_synack(sk, dst, req,
-            fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
-        if (skb_synack) {
-                __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
-                skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
-        } else
                goto drop_and_free;
-        if (likely(!do_fastopen)) {
+        tcp_rsk(req)->snt_isn = isn;
-                int err;
+        tcp_rsk(req)->snt_synack = tcp_time_stamp;
-                err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
+        tcp_openreq_init_rwin(req, sk, dst);
-                     ireq->ir_rmt_addr, ireq->opt);
+        fastopen = !want_cookie &&
-                err = net_xmit_eval(err);
+                   tcp_try_fastopen(sk, skb, req, &foc, dst);
+        err = tcp_v4_send_synack(sk, dst, req,
+                                 skb_get_queue_mapping(skb), &foc);
+        if (!fastopen) {
                if (err || want_cookie)
                        goto drop_and_free;
                tcp_rsk(req)->snt_synack = tcp_time_stamp;
                tcp_rsk(req)->listener = NULL;
-                /* Add the request_sock to the SYN table */
                inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-                if (fastopen_cookie_present(&foc) && foc.len != 0)
+        }
-                        NET_INC_STATS_BH(sock_net(sk),
-                            LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-        } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
-                goto drop_and_free;
        return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 05c1b155251d..e68e0d4af6c9 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
+void tcp_openreq_init_rwin(struct request_sock *req,
+                           struct sock *sk, struct dst_entry *dst)
+{
+        struct inet_request_sock *ireq = inet_rsk(req);
+        struct tcp_sock *tp = tcp_sk(sk);
+        __u8 rcv_wscale;
+        int mss = dst_metric_advmss(dst);
+        if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
+                mss = tp->rx_opt.user_mss;
+        /* Set this up on the first call only */
+        req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+        /* limit the window selection if the user enforce a smaller rx buffer */
+        if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+            (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
+                req->window_clamp = tcp_full_space(sk);
+        /* tcp_full_space because it is guaranteed to be the first packet */
+        tcp_select_initial_window(tcp_full_space(sk),
+                mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+                &req->rcv_wnd,
+                &req->window_clamp,
+                ireq->wscale_ok,
+                &rcv_wscale,
+                dst_metric(dst, RTAX_INITRWND));
+        ireq->rcv_wscale = rcv_wscale;
+}
+EXPORT_SYMBOL(tcp_openreq_init_rwin);
 static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
                                         struct request_sock *req)
 {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 694711a140d4..3d61c52bdf79 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
                if (unlikely(!ireq->tstamp_ok))
                        remaining -= TCPOLEN_SACKPERM_ALIGNED;
        }
-        if (foc != NULL) {
+        if (foc != NULL && foc->len >= 0) {
                u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
                need = (need + 3) & ~3U;  /* Align to 32 bits */
                if (remaining >= need) {
@@ -2803,27 +2803,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
        if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
                mss = tp->rx_opt.user_mss;
-        if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
-                __u8 rcv_wscale;
-                /* Set this up on the first call only */
-                req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
-                /* limit the window selection if the user enforce a smaller rx buffer */
-                if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
-                    (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
-                        req->window_clamp = tcp_full_space(sk);
-                /* tcp_full_space because it is guaranteed to be the first packet */
-                tcp_select_initial_window(tcp_full_space(sk),
-                        mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
-                        &req->rcv_wnd,
-                        &req->window_clamp,
-                        ireq->wscale_ok,
-                        &rcv_wscale,
-                        dst_metric(dst, RTAX_INITRWND));
-                ireq->rcv_wscale = rcv_wscale;
-        }
        memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
        if (unlikely(req->cookie_ts))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7fa67439f4d6..3a267bf14f2f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -340,7 +340,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        struct sock *sk;
        int err;
        struct tcp_sock *tp;
-        __u32 seq;
+        struct request_sock *fastopen;
+        __u32 seq, snd_una;
        struct net *net = dev_net(skb->dev);
        sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
@@ -371,8 +372,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        tp = tcp_sk(sk);
        seq = ntohl(th->seq);
+        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+        fastopen = tp->fastopen_rsk;
+        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
        if (sk->sk_state != TCP_LISTEN &&
-            !between(seq, tp->snd_una, tp->snd_nxt)) {
+            !between(seq, snd_una, tp->snd_nxt)) {
                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
                goto out;
        }
@@ -436,8 +440,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                goto out;
        case TCP_SYN_SENT:
-        case TCP_SYN_RECV:  /* Cannot happen.
+        case TCP_SYN_RECV:
-                               It can, it SYNs are crossed. --ANK */
+                /* Only in fast or simultaneous open. If a fast open socket is
+                 * is already accepted it is treated as a connected one below.
+                 */
+                if (fastopen && fastopen->sk == NULL)
+                        break;
                if (!sock_owned_by_user(sk)) {
                        sk->sk_err = err;
                        sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
@@ -463,7 +472,8 @@ out:
 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
                              struct flowi6 *fl6,
                              struct request_sock *req,
-                              u16 queue_mapping)
+                              u16 queue_mapping,
+                              struct tcp_fastopen_cookie *foc)
 {
        struct inet_request_sock *ireq = inet_rsk(req);
        struct ipv6_pinfo *np = inet6_sk(sk);
@@ -474,7 +484,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
        if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
                goto done;
-        skb = tcp_make_synack(sk, dst, req, NULL);
+        skb = tcp_make_synack(sk, dst, req, foc);
        if (skb) {
                __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -498,7 +508,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
        struct flowi6 fl6;
        int res;
-        res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
+        res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL);
        if (!res) {
                TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
@@ -917,7 +927,12 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
                                  struct request_sock *req)
 {
-        tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
+        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+         */
+        tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
+                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+                        tcp_rsk(req)->rcv_nxt,
                        req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
                        0, 0);
@@ -969,8 +984,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        struct tcp_sock *tp = tcp_sk(sk);
        __u32 isn = TCP_SKB_CB(skb)->when;
        struct dst_entry *dst = NULL;
+        struct tcp_fastopen_cookie foc = { .len = -1 };
+        bool want_cookie = false, fastopen;
        struct flowi6 fl6;
-        bool want_cookie = false;
+        int err;
        if (skb->protocol == htons(ETH_P_IP))
                return tcp_v4_conn_request(sk, skb);
@@ -1001,7 +1018,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        tcp_clear_options(&tmp_opt);
        tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
        tmp_opt.user_mss = tp->rx_opt.user_mss;
-        tcp_parse_options(skb, &tmp_opt, 0, NULL);
+        tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
        if (want_cookie && !tmp_opt.saw_tstamp)
                tcp_clear_options(&tmp_opt);
@@ -1074,19 +1091,27 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
                isn = tcp_v6_init_sequence(skb);
        }
 have_isn:
-        tcp_rsk(req)->snt_isn = isn;
        if (security_inet_conn_request(sk, skb, req))
                goto drop_and_release;
-        if (tcp_v6_send_synack(sk, dst, &fl6, req,
+        if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)
-                               skb_get_queue_mapping(skb)) ||
-            want_cookie)
                goto drop_and_free;
+        tcp_rsk(req)->snt_isn = isn;
        tcp_rsk(req)->snt_synack = tcp_time_stamp;
-        tcp_rsk(req)->listener = NULL;
+        tcp_openreq_init_rwin(req, sk, dst);
-        inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+        fastopen = !want_cookie &&
+                   tcp_try_fastopen(sk, skb, req, &foc, dst);
+        err = tcp_v6_send_synack(sk, dst, &fl6, req,
+                                 skb_get_queue_mapping(skb), &foc);
+        if (!fastopen) {
+                if (err || want_cookie)
+                        goto drop_and_free;
+                tcp_rsk(req)->listener = NULL;
+                inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+        }
        return 0;
 drop_and_release:
@@ -1760,6 +1785,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
        const struct inet_sock *inet = inet_sk(sp);
        const struct tcp_sock *tp = tcp_sk(sp);
        const struct inet_connection_sock *icsk = inet_csk(sp);
+        struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
        dest  = &sp->sk_v6_daddr;
        src   = &sp->sk_v6_rcv_saddr;
@@ -1802,7 +1828,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
                   jiffies_to_clock_t(icsk->icsk_ack.ato),
                   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
                   tp->snd_cwnd,
-                   tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
+                   sp->sk_state == TCP_LISTEN ?
+                        (fastopenq ? fastopenq->max_qlen : 0) :
+                        (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
                   );
 }
author	David S. Miller <davem@davemloft.net>	2014-05-13 17:53:46 -0400
committer	David S. Miller <davem@davemloft.net>	2014-05-13 17:53:46 -0400
commit	ae8b42c6fc37ca1b7eb30898f5a65196bbb47291 (patch)
tree	a5e44c9427f0c6a5cb0e6252e3ccbd5ef4f7a50e
parent	4b9734e547aaa947e56480ecf6d509cf9cc307cc (diff)
parent	3a19ce0eec32667b835d8dc887002019fc6b3a02 (diff)