52 files changed, 715 insertions, 756 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6d6dd345bc4d..d5e6836cf772 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -254,7 +254,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
        struct inet_sock *inet;
        struct proto *answer_prot;
        unsigned char answer_flags;
-        char answer_no_check;
        int try_loading_module = 0;
        int err;
@@ -312,7 +311,6 @@ lookup_protocol:
        sock->ops = answer->ops;
        answer_prot = answer->prot;
-        answer_no_check = answer->no_check;
        answer_flags = answer->flags;
        rcu_read_unlock();
@@ -324,7 +322,6 @@ lookup_protocol:
                goto out;
        err = 0;
-        sk->sk_no_check = answer_no_check;
        if (INET_PROTOSW_REUSE & answer_flags)
                sk->sk_reuse = SK_CAN_REUSE;
@@ -1002,7 +999,6 @@ static struct inet_protosw inetsw_array[] =
                .protocol =   IPPROTO_TCP,
                .prot =       &tcp_prot,
                .ops =        &inet_stream_ops,
-                .no_check =   0,
                .flags =      INET_PROTOSW_PERMANENT |
                              INET_PROTOSW_ICSK,
        },
@@ -1012,7 +1008,6 @@ static struct inet_protosw inetsw_array[] =
                .protocol =   IPPROTO_UDP,
                .prot =       &udp_prot,
                .ops =        &inet_dgram_ops,
-                .no_check =   UDP_CSUM_DEFAULT,
                .flags =      INET_PROTOSW_PERMANENT,
       },
@@ -1021,7 +1016,6 @@ static struct inet_protosw inetsw_array[] =
                .protocol =   IPPROTO_ICMP,
                .prot =       &ping_prot,
                .ops =        &inet_dgram_ops,
-                .no_check =   UDP_CSUM_DEFAULT,
                .flags =      INET_PROTOSW_REUSE,
       },
@@ -1030,7 +1024,6 @@ static struct inet_protosw inetsw_array[] =
               .protocol =   IPPROTO_IP,        /* wild card */
               .prot =       &raw_prot,
               .ops =        &inet_sockraw_ops,
-               .no_check =   UDP_CSUM_DEFAULT,
               .flags =      INET_PROTOSW_REUSE,
       }
 };
@@ -1261,10 +1254,12 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
                       SKB_GSO_DODGY |
                       SKB_GSO_TCP_ECN |
                       SKB_GSO_GRE |
+                       SKB_GSO_GRE_CSUM |
                       SKB_GSO_IPIP |
                       SKB_GSO_SIT |
                       SKB_GSO_TCPV6 |
                       SKB_GSO_UDP_TUNNEL |
+                       SKB_GSO_UDP_TUNNEL_CSUM |
                       SKB_GSO_MPLS |
                       0)))
                goto out;
@@ -1476,22 +1471,20 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 }
 EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
-unsigned long snmp_fold_field(void __percpu *mib[], int offt)
+unsigned long snmp_fold_field(void __percpu *mib, int offt)
 {
        unsigned long res = 0;
-        int i, j;
+        int i;
-        for_each_possible_cpu(i) {
+        for_each_possible_cpu(i)
-                for (j = 0; j < SNMP_ARRAY_SZ; j++)
+                res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt);
-                        res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt);
-        }
        return res;
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 #if BITS_PER_LONG==32
-u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
+u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
 {
        u64 res = 0;
        int cpu;
@@ -1502,7 +1495,7 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
                u64 v;
                unsigned int start;
-                bhptr = per_cpu_ptr(mib[0], cpu);
+                bhptr = per_cpu_ptr(mib, cpu);
                syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
                do {
                        start = u64_stats_fetch_begin_irq(syncp);
@@ -1516,25 +1509,6 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
 EXPORT_SYMBOL_GPL(snmp_fold_field64);
 #endif
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
-{
-        BUG_ON(ptr == NULL);
-        ptr[0] = __alloc_percpu(mibsize, align);
-        if (!ptr[0])
-                return -ENOMEM;
-#if SNMP_ARRAY_SZ == 2
-        ptr[1] = __alloc_percpu(mibsize, align);
-        if (!ptr[1]) {
-                free_percpu(ptr[0]);
-                ptr[0] = NULL;
-                return -ENOMEM;
-        }
-#endif
-        return 0;
-}
-EXPORT_SYMBOL_GPL(snmp_mib_init);
 #ifdef CONFIG_IP_MULTICAST
 static const struct net_protocol igmp_protocol = {
        .handler =      igmp_rcv,
@@ -1570,40 +1544,30 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 {
        int i;
-        if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
+        net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
-                          sizeof(struct tcp_mib),
+        if (!net->mib.tcp_statistics)
-                          __alignof__(struct tcp_mib)) < 0)
                goto err_tcp_mib;
-        if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
+        net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
-                          sizeof(struct ipstats_mib),
+        if (!net->mib.ip_statistics)
-                          __alignof__(struct ipstats_mib)) < 0)
                goto err_ip_mib;
        for_each_possible_cpu(i) {
                struct ipstats_mib *af_inet_stats;
-                af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i);
+                af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
                u64_stats_init(&af_inet_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
-                af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i);
-                u64_stats_init(&af_inet_stats->syncp);
-#endif
        }
-        if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
+        net->mib.net_statistics = alloc_percpu(struct linux_mib);
-                          sizeof(struct linux_mib),
+        if (!net->mib.net_statistics)
-                          __alignof__(struct linux_mib)) < 0)
                goto err_net_mib;
-        if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
+        net->mib.udp_statistics = alloc_percpu(struct udp_mib);
-                          sizeof(struct udp_mib),
+        if (!net->mib.udp_statistics)
-                          __alignof__(struct udp_mib)) < 0)
                goto err_udp_mib;
-        if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
+        net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
-                          sizeof(struct udp_mib),
+        if (!net->mib.udplite_statistics)
-                          __alignof__(struct udp_mib)) < 0)
                goto err_udplite_mib;
-        if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
+        net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
-                          sizeof(struct icmp_mib),
+        if (!net->mib.icmp_statistics)
-                          __alignof__(struct icmp_mib)) < 0)
                goto err_icmp_mib;
        net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
                                              GFP_KERNEL);
@@ -1614,17 +1578,17 @@ static __net_init int ipv4_mib_init_net(struct net *net)
        return 0;
 err_icmpmsg_mib:
-        snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
+        free_percpu(net->mib.icmp_statistics);
 err_icmp_mib:
-        snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
+        free_percpu(net->mib.udplite_statistics);
 err_udplite_mib:
-        snmp_mib_free((void __percpu **)net->mib.udp_statistics);
+        free_percpu(net->mib.udp_statistics);
 err_udp_mib:
-        snmp_mib_free((void __percpu **)net->mib.net_statistics);
+        free_percpu(net->mib.net_statistics);
 err_net_mib:
-        snmp_mib_free((void __percpu **)net->mib.ip_statistics);
+        free_percpu(net->mib.ip_statistics);
 err_ip_mib:
-        snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
+        free_percpu(net->mib.tcp_statistics);
 err_tcp_mib:
        return -ENOMEM;
 }
@@ -1632,12 +1596,12 @@ err_tcp_mib:
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
        kfree(net->mib.icmpmsg_statistics);
-        snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
+        free_percpu(net->mib.icmp_statistics);
-        snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
+        free_percpu(net->mib.udplite_statistics);
-        snmp_mib_free((void __percpu **)net->mib.udp_statistics);
+        free_percpu(net->mib.udp_statistics);
-        snmp_mib_free((void __percpu **)net->mib.net_statistics);
+        free_percpu(net->mib.net_statistics);
-        snmp_mib_free((void __percpu **)net->mib.ip_statistics);
+        free_percpu(net->mib.ip_statistics);
-        snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
+        free_percpu(net->mib.tcp_statistics);
 }
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
@@ -1736,13 +1700,9 @@ static int __init inet_init(void)
        BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
-        sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
-        if (!sysctl_local_reserved_ports)
-                goto out;
        rc = proto_register(&tcp_prot, 1);
        if (rc)
-                goto out_free_reserved_ports;
+                goto out;
        rc = proto_register(&udp_prot, 1);
        if (rc)
@@ -1852,8 +1812,6 @@ out_unregister_udp_proto:
        proto_unregister(&udp_prot);
 out_unregister_tcp_proto:
        proto_unregister(&tcp_prot);
-out_free_reserved_ports:
-        kfree(sysctl_local_reserved_ports);
        goto out;
 }
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 8b5134c582f1..a3095fdefbed 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -86,18 +86,26 @@ out:
 }
 EXPORT_SYMBOL(ip4_datagram_connect);
+/* Because UDP xmit path can manipulate sk_dst_cache without holding
+ * socket lock, we need to use sk_dst_set() here,
+ * even if we own the socket lock.
+ */
 void ip4_datagram_release_cb(struct sock *sk)
 {
        const struct inet_sock *inet = inet_sk(sk);
        const struct ip_options_rcu *inet_opt;
        __be32 daddr = inet->inet_daddr;
+        struct dst_entry *dst;
        struct flowi4 fl4;
        struct rtable *rt;
-        if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
-                return;
        rcu_read_lock();
+        dst = __sk_dst_get(sk);
+        if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
+                rcu_read_unlock();
+                return;
+        }
        inet_opt = rcu_dereference(inet->inet_opt);
        if (inet_opt && inet_opt->opt.srr)
                daddr = inet_opt->opt.faddr;
@@ -105,8 +113,10 @@ void ip4_datagram_release_cb(struct sock *sk)
                                   inet->inet_saddr, inet->inet_dport,
                                   inet->inet_sport, sk->sk_protocol,
                                   RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
-        if (!IS_ERR(rt))
-                __sk_dst_set(sk, &rt->dst);
+        dst = !IS_ERR(rt) ? &rt->dst : NULL;
+        sk_dst_set(sk, dst);
        rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bdbf68bb2e2d..e9449376b58e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -106,7 +106,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
-static DEFINE_SPINLOCK(inet_addr_hash_lock);
 static u32 inet_addr_hash(struct net *net, __be32 addr)
 {
@@ -119,16 +118,14 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 {
        u32 hash = inet_addr_hash(net, ifa->ifa_local);
-        spin_lock(&inet_addr_hash_lock);
+        ASSERT_RTNL();
        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
-        spin_unlock(&inet_addr_hash_lock);
 }
 static void inet_hash_remove(struct in_ifaddr *ifa)
 {
-        spin_lock(&inet_addr_hash_lock);
+        ASSERT_RTNL();
        hlist_del_init_rcu(&ifa->hash);
-        spin_unlock(&inet_addr_hash_lock);
 }
 /**
@@ -830,7 +827,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
        ifa_existing = find_matching_ifa(ifa);
        if (!ifa_existing) {
                /* It would be best to check for !NLM_F_CREATE here but
-                 * userspace alreay relies on not having to provide this.
+                 * userspace already relies on not having to provide this.
                 */
                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
                return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 250be7421ab3..4e9619bca732 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -84,7 +84,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
                        ptr--;
                }
                if (tpi->flags&TUNNEL_CSUM &&
-                    !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
+                    !(skb_shinfo(skb)->gso_type &
+                      (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
                        *ptr = 0;
                        *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
                                                                 skb->len, 0));
@@ -93,28 +94,6 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 }
 EXPORT_SYMBOL_GPL(gre_build_header);
-static __sum16 check_checksum(struct sk_buff *skb)
-{
-        __sum16 csum = 0;
-        switch (skb->ip_summed) {
-        case CHECKSUM_COMPLETE:
-                csum = csum_fold(skb->csum);
-                if (!csum)
-                        break;
-                /* Fall through. */
-        case CHECKSUM_NONE:
-                skb->csum = 0;
-                csum = __skb_checksum_complete(skb);
-                skb->ip_summed = CHECKSUM_COMPLETE;
-                break;
-        }
-        return csum;
-}
 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
                            bool *csum_err)
 {
@@ -141,7 +120,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
        options = (__be32 *)(greh + 1);
        if (greh->flags & GRE_CSUM) {
-                if (check_checksum(skb)) {
+                if (skb_checksum_simple_validate(skb)) {
                        *csum_err = true;
                        return -EINVAL;
                }
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index f1d32280cb54..eb92deb12666 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
                                  SKB_GSO_DODGY |
                                  SKB_GSO_TCP_ECN |
                                  SKB_GSO_GRE |
+                                  SKB_GSO_GRE_CSUM |
                                  SKB_GSO_IPIP)))
                goto out;
@@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
                goto out;
        csum = !!(greh->flags & GRE_CSUM);
+        if (csum)
+                skb->encap_hdr_csum = 1;
        if (unlikely(!pskb_may_pull(skb, ghl)))
                goto out;
@@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
                                }
                        }
-                        greh = (struct gre_base_hdr *)(skb->data);
+                        skb_reset_transport_header(skb);
+                        greh = (struct gre_base_hdr *)
+                            skb_transport_header(skb);
                        pcsum = (__be32 *)(greh + 1);
                        *pcsum = 0;
-                        *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+                        *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
                }
                __skb_push(skb, tnl_hlen - ghl);
@@ -125,10 +131,12 @@ static __sum16 gro_skb_checksum(struct sk_buff *skb)
                csum_partial(skb->data, skb_gro_offset(skb), 0));
        sum = csum_fold(NAPI_GRO_CB(skb)->csum);
        if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) {
-                if (unlikely(!sum))
+                if (unlikely(!sum) && !skb->csum_complete_sw)
                        netdev_rx_csum_fault(skb->dev);
-        } else
+        } else {
                skb->ip_summed = CHECKSUM_COMPLETE;
+                skb->csum_complete_sw = 1;
+        }
        return sum;
 }
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 0134663fdbce..79c3d947a481 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
        struct sock *sk;
        struct inet_sock *inet;
        __be32 daddr, saddr;
+        u32 mark = IP4_REPLY_MARK(net, skb->mark);
        if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
                return;
@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
        icmp_param->data.icmph.checksum = 0;
        inet->tos = ip_hdr(skb)->tos;
+        sk->sk_mark = mark;
        daddr = ipc.addr = ip_hdr(skb)->saddr;
        saddr = fib_compute_spec_dst(skb);
        ipc.opt = NULL;
@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
        memset(&fl4, 0, sizeof(fl4));
        fl4.daddr = daddr;
        fl4.saddr = saddr;
+        fl4.flowi4_mark = mark;
        fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
        fl4.flowi4_proto = IPPROTO_ICMP;
        security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
                                        struct flowi4 *fl4,
                                        struct sk_buff *skb_in,
                                        const struct iphdr *iph,
-                                        __be32 saddr, u8 tos,
+                                        __be32 saddr, u8 tos, u32 mark,
                                        int type, int code,
                                        struct icmp_bxm *param)
 {
@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
        fl4->daddr = (param->replyopts.opt.opt.srr ?
                      param->replyopts.opt.opt.faddr : iph->saddr);
        fl4->saddr = saddr;
+        fl4->flowi4_mark = mark;
        fl4->flowi4_tos = RT_TOS(tos);
        fl4->flowi4_proto = IPPROTO_ICMP;
        fl4->fl4_icmp_type = type;
@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
        struct flowi4 fl4;
        __be32 saddr;
        u8  tos;
+        u32 mark;
        struct net *net;
        struct sock *sk;
@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
        tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
                                           IPTOS_PREC_INTERNETCONTROL) :
                                          iph->tos;
+        mark = IP4_REPLY_MARK(net, skb_in->mark);
        if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
                goto out_unlock;
@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
        icmp_param->skb   = skb_in;
        icmp_param->offset = skb_network_offset(skb_in);
        inet_sk(sk)->tos = tos;
+        sk->sk_mark = mark;
        ipc.addr = iph->saddr;
        ipc.opt = &icmp_param->replyopts.opt;
        ipc.tx_flags = 0;
        ipc.ttl = 0;
        ipc.tos = -1;
-        rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
+        rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
                               type, code, icmp_param);
        if (IS_ERR(rt))
                goto out_unlock;
@@ -908,16 +915,8 @@ int icmp_rcv(struct sk_buff *skb)
        ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
-        switch (skb->ip_summed) {
+        if (skb_checksum_simple_validate(skb))
-        case CHECKSUM_COMPLETE:
+                goto csum_error;
-                if (!csum_fold(skb->csum))
-                        break;
-                /* fall through */
-        case CHECKSUM_NONE:
-                skb->csum = 0;
-                if (__skb_checksum_complete(skb))
-                        goto csum_error;
-        }
        if (!pskb_pull(skb, sizeof(*icmph)))
                goto error;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 97e4d1655d26..6748d420f714 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
        pip->saddr    = fl4.saddr;
        pip->protocol = IPPROTO_IGMP;
        pip->tot_len  = 0;      /* filled in later */
-        ip_select_ident(skb, &rt->dst, NULL);
+        ip_select_ident(skb, NULL);
        ((u8 *)&pip[1])[0] = IPOPT_RA;
        ((u8 *)&pip[1])[1] = 4;
        ((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
        iph->daddr    = dst;
        iph->saddr    = fl4.saddr;
        iph->protocol = IPPROTO_IGMP;
-        ip_select_ident(skb, &rt->dst, NULL);
+        ip_select_ident(skb, NULL);
        ((u8 *)&iph[1])[0] = IPOPT_RA;
        ((u8 *)&iph[1])[1] = 4;
        ((u8 *)&iph[1])[2] = 0;
@@ -988,16 +988,8 @@ int igmp_rcv(struct sk_buff *skb)
        if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
                goto drop;
-        switch (skb->ip_summed) {
+        if (skb_checksum_simple_validate(skb))
-        case CHECKSUM_COMPLETE:
+                goto drop;
-                if (!csum_fold(skb->csum))
-                        break;
-                /* fall through */
-        case CHECKSUM_NONE:
-                skb->csum = 0;
-                if (__skb_checksum_complete(skb))
-                        goto drop;
-        }
        ih = igmp_hdr(skb);
        switch (ih->type) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a56b8e6e866a..14d02ea905b6 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -29,9 +29,6 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
 EXPORT_SYMBOL(inet_csk_timer_bug_msg);
 #endif
-unsigned long *sysctl_local_reserved_ports;
-EXPORT_SYMBOL(sysctl_local_reserved_ports);
 void inet_get_local_port_range(struct net *net, int *low, int *high)
 {
        unsigned int seq;
@@ -113,7 +110,7 @@ again:
                smallest_size = -1;
                do {
-                        if (inet_is_reserved_local_port(rover))
+                        if (inet_is_local_reserved_port(net, rover))
                                goto next_nolock;
                        head = &hashinfo->bhash[inet_bhashfn(net, rover,
                                        hashinfo->bhash_size)];
@@ -408,7 +405,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
        struct net *net = sock_net(sk);
        int flags = inet_sk_flowi_flags(sk);
-        flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+        flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
                           RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
                           sk->sk_protocol,
                           flags,
@@ -445,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
        rcu_read_lock();
        opt = rcu_dereference(newinet->inet_opt);
-        flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+        flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
                           RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
                           sk->sk_protocol, inet_sk_flowi_flags(sk),
                           (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
@@ -680,6 +677,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
                inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
                newsk->sk_write_space = sk_stream_write_space;
+                newsk->sk_mark = inet_rsk(req)->ir_mark;
                newicsk->icsk_retransmits = 0;
                newicsk->icsk_backoff     = 0;
                newicsk->icsk_probes_out  = 0;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 8b9cf279450d..43116e8c8e13 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -274,7 +274,7 @@ struct sock *__inet_lookup_established(struct net *net,
                                  const __be32 daddr, const u16 hnum,
                                  const int dif)
 {
-        INET_ADDR_COOKIE(acookie, saddr, daddr)
+        INET_ADDR_COOKIE(acookie, saddr, daddr);
        const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
        struct sock *sk;
        const struct hlist_nulls_node *node;
@@ -327,7 +327,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
        __be32 daddr = inet->inet_rcv_saddr;
        __be32 saddr = inet->inet_daddr;
        int dif = sk->sk_bound_dev_if;
-        INET_ADDR_COOKIE(acookie, saddr, daddr)
+        INET_ADDR_COOKIE(acookie, saddr, daddr);
        const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
        struct net *net = sock_net(sk);
        unsigned int hash = inet_ehashfn(net, daddr, lport,
@@ -500,7 +500,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
                local_bh_disable();
                for (i = 1; i <= remaining; i++) {
                        port = low + (i + offset) % remaining;
-                        if (inet_is_reserved_local_port(port))
+                        if (inet_is_local_reserved_port(net, port))
                                continue;
                        head = &hinfo->bhash[inet_bhashfn(net, port,
                                        hinfo->bhash_size)];
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 56cd458a1b8c..bd5f5928167d 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -26,20 +26,7 @@
 *  Theory of operations.
 *  We keep one entry for each peer IP address.  The nodes contains long-living
 *  information about the peer which doesn't depend on routes.
- *  At this moment this information consists only of ID field for the next
- *  outgoing IP packet.  This field is incremented with each packet as encoded
- *  in inet_getid() function (include/net/inetpeer.h).
- *  At the moment of writing this notes identifier of IP packets is generated
- *  to be unpredictable using this code only for packets subjected
- *  (actually or potentially) to defragmentation.  I.e. DF packets less than
- *  PMTU in size when local fragmentation is disabled use a constant ID and do
- *  not use this code (see ip_select_ident() in include/net/ip.h).
 *
- *  Route cache entries hold references to our nodes.
- *  New cache entries get references via lookup by destination IP address in
- *  the avl tree.  The reference is grabbed only when it's needed i.e. only
- *  when we try to output IP packet which needs an unpredictable ID (see
- *  __ip_select_ident() in net/ipv4/route.c).
 *  Nodes are removed only when reference counter goes to 0.
 *  When it's happened the node may be removed when a sufficient amount of
 *  time has been passed since its last use.  The less-recently-used entry can
@@ -62,7 +49,6 @@
 *              refcnt: atomically against modifications on other CPU;
 *                 usually under some other lock to prevent node disappearing
 *              daddr: unchangeable
- *              ip_id_count: atomic value (no lock needed)
 */
 static struct kmem_cache *peer_cachep __read_mostly;
@@ -120,7 +106,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min
 static void inetpeer_gc_worker(struct work_struct *work)
 {
        struct inet_peer *p, *n, *c;
-        LIST_HEAD(list);
+        struct list_head list;
        spin_lock_bh(&gc_lock);
        list_replace_init(&gc_list, &list);
@@ -497,10 +483,6 @@ relookup:
                p->daddr = *daddr;
                atomic_set(&p->refcnt, 1);
                atomic_set(&p->rid, 0);
-                atomic_set(&p->ip_id_count,
-                                (daddr->family == AF_INET) ?
-                                        secure_ip_id(daddr->addr.a4) :
-                                        secure_ipv6_id(daddr->addr.a6));
                p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
                p->rate_tokens = 0;
                /* 60*HZ is arbitrary, but chosen enough high so that the first
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 6f111e48e11c..3a83ce5efa80 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,7 +42,7 @@
 static bool ip_may_fragment(const struct sk_buff *skb)
 {
        return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
-                skb->local_df;
+                skb->ignore_df;
 }
 static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 94213c891565..9b842544aea3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -410,7 +410,7 @@ static int ipgre_open(struct net_device *dev)
                struct flowi4 fl4;
                struct rtable *rt;
-                rt = ip_route_output_gre(dev_net(dev), &fl4,
+                rt = ip_route_output_gre(t->net, &fl4,
                                         t->parms.iph.daddr,
                                         t->parms.iph.saddr,
                                         t->parms.o_key,
@@ -434,7 +434,7 @@ static int ipgre_close(struct net_device *dev)
        if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
                struct in_device *in_dev;
-                in_dev = inetdev_by_index(dev_net(dev), t->mlink);
+                in_dev = inetdev_by_index(t->net, t->mlink);
                if (in_dev)
                        ip_mc_dec_group(in_dev, t->parms.iph.daddr);
        }
@@ -478,7 +478,7 @@ static void __gre_tunnel_init(struct net_device *dev)
        dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
-        dev->features           |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
+        dev->features           |= GRE_FEATURES;
        dev->hw_features        |= GRE_FEATURES;
        if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
@@ -649,6 +649,7 @@ static void ipgre_tap_setup(struct net_device *dev)
 {
        ether_setup(dev);
        dev->netdev_ops         = &gre_tap_netdev_ops;
+        dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
        ip_tunnel_setup(dev, gre_tap_net_id);
 }
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f4ab72e19af9..5e7aecea05cd 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -364,7 +364,7 @@ int ip_options_compile(struct net *net,
                        }
                        if (optptr[2] <= optlen) {
                                unsigned char *timeptr = NULL;
-                                if (optptr[2]+3 > optptr[1]) {
+                                if (optptr[2]+3 > optlen) {
                                        pp_ptr = optptr + 2;
                                        goto error;
                                }
@@ -376,7 +376,7 @@ int ip_options_compile(struct net *net,
                                        optptr[2] += 4;
                                        break;
                                case IPOPT_TS_TSANDADDR:
-                                        if (optptr[2]+7 > optptr[1]) {
+                                        if (optptr[2]+7 > optlen) {
                                                pp_ptr = optptr + 2;
                                                goto error;
                                        }
@@ -390,7 +390,7 @@ int ip_options_compile(struct net *net,
                                        optptr[2] += 8;
                                        break;
                                case IPOPT_TS_PRESPEC:
-                                        if (optptr[2]+7 > optptr[1]) {
+                                        if (optptr[2]+7 > optlen) {
                                                pp_ptr = optptr + 2;
                                                goto error;
                                        }
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a52f50187b54..8d3b6b0e9857 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
        iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
        iph->saddr    = saddr;
        iph->protocol = sk->sk_protocol;
-        ip_select_ident(skb, &rt->dst, sk);
+        ip_select_ident(skb, sk);
        if (opt && opt->opt.optlen) {
                iph->ihl += opt->opt.optlen>>2;
@@ -415,7 +415,7 @@ packet_routed:
        skb_reset_network_header(skb);
        iph = ip_hdr(skb);
        *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-        if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
+        if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
                iph->frag_off = htons(IP_DF);
        else
                iph->frag_off = 0;
@@ -430,8 +430,7 @@ packet_routed:
                ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
        }
-        ip_select_ident_more(skb, &rt->dst, sk,
+        ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
-                             (skb_shinfo(skb)->gso_segs ?: 1) - 1);
        /* TODO : should we use skb->sk here instead of sk ? */
        skb->priority = sk->sk_priority;
@@ -501,7 +500,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
        iph = ip_hdr(skb);
        mtu = ip_skb_dst_mtu(skb);
-        if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
+        if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
                     (IPCB(skb)->frag_max_size &&
                      IPCB(skb)->frag_max_size > mtu))) {
                IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -866,7 +865,7 @@ static int __ip_append_data(struct sock *sk,
        fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-        maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
+        maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
        if (cork->length + length > maxnonfragsize - fragheaderlen) {
                ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1189,7 +1188,7 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
        fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
        maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-        maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
+        maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
        if (cork->length + size > maxnonfragsize - fragheaderlen) {
                ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1350,10 +1349,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
         * to fragment the frame generated here. No matter, what transforms
         * how transforms change size of the packet, it will come out.
         */
-        skb->local_df = ip_sk_local_df(sk);
+        skb->ignore_df = ip_sk_ignore_df(sk);
        /* DF bit is set when we want to see DF on outgoing frames.
-         * If local_df is set too, we still allow to fragment this frame
+         * If ignore_df is set too, we still allow to fragment this frame
         * locally. */
        if (inet->pmtudisc == IP_PMTUDISC_DO ||
            inet->pmtudisc == IP_PMTUDISC_PROBE ||
@@ -1379,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
        iph->ttl = ttl;
        iph->protocol = sk->sk_protocol;
        ip_copy_addrs(iph, fl4);
-        ip_select_ident(skb, &rt->dst, sk);
+        ip_select_ident(skb, sk);
        if (opt) {
                iph->ihl += opt->optlen>>2;
@@ -1546,7 +1545,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
                        daddr = replyopts.opt.opt.faddr;
        }
-        flowi4_init_output(&fl4, arg->bound_dev_if, 0,
+        flowi4_init_output(&fl4, arg->bound_dev_if,
+                           IP4_REPLY_MARK(net, skb->mark),
                           RT_TOS(arg->tos),
                           RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
                           ip_reply_arg_flowi_flags(arg),
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2acc2337d38b..097b3e7c1e8f 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -268,6 +268,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
        __be32 remote = parms->iph.daddr;
        __be32 local = parms->iph.saddr;
        __be32 key = parms->i_key;
+        __be16 flags = parms->i_flags;
        int link = parms->link;
        struct ip_tunnel *t = NULL;
        struct hlist_head *head = ip_bucket(itn, parms);
@@ -275,9 +276,9 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
        hlist_for_each_entry_rcu(t, head, hash_node) {
                if (local == t->parms.iph.saddr &&
                    remote == t->parms.iph.daddr &&
-                    key == t->parms.i_key &&
                    link == t->parms.link &&
-                    type == t->dev->type)
+                    type == t->dev->type &&
+                    ip_tunnel_key_match(&t->parms, flags, key))
                        break;
        }
        return t;
@@ -395,11 +396,10 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
                                          struct ip_tunnel_net *itn,
                                          struct ip_tunnel_parm *parms)
 {
-        struct ip_tunnel *nt, *fbt;
+        struct ip_tunnel *nt;
        struct net_device *dev;
        BUG_ON(!itn->fb_tunnel_dev);
-        fbt = netdev_priv(itn->fb_tunnel_dev);
        dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
        if (IS_ERR(dev))
                return ERR_CAST(dev);
@@ -668,6 +668,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
                dev->needed_headroom = max_headroom;
        if (skb_cow_head(skb, dev->needed_headroom)) {
+                ip_rt_put(rt);
                dev->stats.tx_dropped++;
                kfree_skb(skb);
                return;
@@ -747,19 +748,19 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
                        goto done;
                if (p->iph.ttl)
                        p->iph.frag_off |= htons(IP_DF);
-                if (!(p->i_flags&TUNNEL_KEY))
+                if (!(p->i_flags & VTI_ISVTI)) {
-                        p->i_key = 0;
+                        if (!(p->i_flags & TUNNEL_KEY))
-                if (!(p->o_flags&TUNNEL_KEY))
+                                p->i_key = 0;
-                        p->o_key = 0;
+                        if (!(p->o_flags & TUNNEL_KEY))
+                                p->o_key = 0;
+                }
                t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
                if (!t && (cmd == SIOCADDTUNNEL)) {
                        t = ip_tunnel_create(net, itn, p);
-                        if (IS_ERR(t)) {
+                        err = PTR_ERR_OR_ZERO(t);
-                                err = PTR_ERR(t);
+                        break;
-                                break;
-                        }
                }
                if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
                        if (t != NULL) {
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index bcf206c79005..f4c987bb7e94 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
        iph->daddr      =       dst;
        iph->saddr      =       src;
        iph->ttl        =       ttl;
-        __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+        __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
        err = ip_local_out_sk(sk, skb);
        if (unlikely(net_xmit_eval(err)))
@@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
                return skb;
        }
+        /* If packet is not gso and we are resolving any partial checksum,
+         * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
+         * on the outer header without confusing devices that implement
+         * NETIF_F_IP_CSUM with encapsulation.
+         */
+        if (csum_help)
+                skb->encapsulation = 0;
        if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
                err = skb_checksum_help(skb);
                if (unlikely(err))
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 13ef00f1e17b..b8960f3527f3 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -313,7 +313,13 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        return -EINVAL;
        }
-        p.i_flags |= VTI_ISVTI;
+        if (!(p.i_flags & GRE_KEY))
+                p.i_key = 0;
+        if (!(p.o_flags & GRE_KEY))
+                p.o_key = 0;
+        p.i_flags = VTI_ISVTI;
        err = ip_tunnel_ioctl(dev, &p, cmd);
        if (err)
                return err;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 812b18351462..62eaa005e146 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info)
        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-                                 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
+                                 t->parms.link, 0, IPPROTO_IPIP, 0);
                err = 0;
                goto out;
        }
        if (type == ICMP_REDIRECT) {
-                ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+                ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
                              IPPROTO_IPIP, 0);
                err = 0;
                goto out;
@@ -486,4 +486,5 @@ static void __exit ipip_fini(void)
 module_init(ipip_init);
 module_exit(ipip_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ipip");
 MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d84dc8d4c916..65bcaa789043 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -484,7 +484,7 @@ static void reg_vif_setup(struct net_device *dev)
        dev->type               = ARPHRD_PIMREG;
        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
        dev->flags              = IFF_NOARP;
-        dev->netdev_ops         = &reg_vif_netdev_ops,
+        dev->netdev_ops         = &reg_vif_netdev_ops;
        dev->destructor         = free_netdev;
        dev->features           |= NETIF_F_NETNS_LOCAL;
 }
@@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
        iph->protocol   =       IPPROTO_IPIP;
        iph->ihl        =       5;
        iph->tot_len    =       htons(skb->len);
-        ip_select_ident(skb, skb_dst(skb), NULL);
+        ip_select_ident(skb, NULL);
        ip_send_check(iph);
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ee2886126e3d..f1787c04a4dd 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -91,17 +91,9 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
        if (nf_ct_is_untracked(ct))
                return NF_ACCEPT;
-        nat = nfct_nat(ct);
+        nat = nf_ct_nat_ext_add(ct);
-        if (!nat) {
+        if (nat == NULL)
-                /* NAT module was loaded late. */
+                return NF_ACCEPT;
-                if (nf_ct_is_confirmed(ct))
-                        return NF_ACCEPT;
-                nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-                if (nat == NULL) {
-                        pr_debug("failed to add NAT extension\n");
-                        return NF_ACCEPT;
-                }
-        }
        switch (ctinfo) {
        case IP_CT_RELATED:
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index f40f321b41fc..b8f6381c7d0b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -34,7 +34,7 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
        if (!err) {
                ip_send_check(ip_hdr(skb));
-                skb->local_df = 1;
+                skb->ignore_df = 1;
        }
        return err;
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index b5b256d45e67..3964157d826c 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -48,15 +48,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
        NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
-        nat = nfct_nat(ct);
+        nat = nf_ct_nat_ext_add(ct);
-        if (nat == NULL) {
+        if (nat == NULL)
-                /* Conntrack module was loaded late, can't add extension. */
+                return NF_ACCEPT;
-                if (nf_ct_is_confirmed(ct))
-                        return NF_ACCEPT;
-                nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-                if (nat == NULL)
-                        return NF_ACCEPT;
-        }
        switch (ctinfo) {
        case IP_CT_RELATED:
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ad737fad6d8b..ae0af9386f7c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -345,15 +345,15 @@ static void icmp_put(struct seq_file *seq)
        for (i = 0; icmpmibmap[i].name != NULL; i++)
                seq_printf(seq, " Out%s", icmpmibmap[i].name);
        seq_printf(seq, "\nIcmp: %lu %lu %lu",
-                snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
+                snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS),
-                snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS),
+                snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS),
-                snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
+                snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
        for (i = 0; icmpmibmap[i].name != NULL; i++)
                seq_printf(seq, " %lu",
                           atomic_long_read(ptr + icmpmibmap[i].index));
        seq_printf(seq, " %lu %lu",
-                snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
+                snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
-                snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
+                snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
        for (i = 0; icmpmibmap[i].name != NULL; i++)
                seq_printf(seq, " %lu",
                           atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
@@ -379,7 +379,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
        BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
        for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
                seq_printf(seq, " %llu",
-                           snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+                           snmp_fold_field64(net->mib.ip_statistics,
                                             snmp4_ipstats_list[i].entry,
                                             offsetof(struct ipstats_mib, syncp)));
@@ -395,11 +395,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
                /* MaxConn field is signed, RFC 2012 */
                if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
                        seq_printf(seq, " %ld",
-                                   snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
+                                   snmp_fold_field(net->mib.tcp_statistics,
                                                   snmp4_tcp_list[i].entry));
                else
                        seq_printf(seq, " %lu",
-                                   snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
+                                   snmp_fold_field(net->mib.tcp_statistics,
                                                   snmp4_tcp_list[i].entry));
        }
@@ -410,7 +410,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
        seq_puts(seq, "\nUdp:");
        for (i = 0; snmp4_udp_list[i].name != NULL; i++)
                seq_printf(seq, " %lu",
-                           snmp_fold_field((void __percpu **)net->mib.udp_statistics,
+                           snmp_fold_field(net->mib.udp_statistics,
                                           snmp4_udp_list[i].entry));
        /* the UDP and UDP-Lite MIBs are the same */
@@ -421,7 +421,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
        seq_puts(seq, "\nUdpLite:");
        for (i = 0; snmp4_udp_list[i].name != NULL; i++)
                seq_printf(seq, " %lu",
-                           snmp_fold_field((void __percpu **)net->mib.udplite_statistics,
+                           snmp_fold_field(net->mib.udplite_statistics,
                                           snmp4_udp_list[i].entry));
        seq_putc(seq, '\n');
@@ -458,7 +458,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
        seq_puts(seq, "\nTcpExt:");
        for (i = 0; snmp4_net_list[i].name != NULL; i++)
                seq_printf(seq, " %lu",
-                           snmp_fold_field((void __percpu **)net->mib.net_statistics,
+                           snmp_fold_field(net->mib.net_statistics,
                                           snmp4_net_list[i].entry));
        seq_puts(seq, "\nIpExt:");
@@ -468,7 +468,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
        seq_puts(seq, "\nIpExt:");
        for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
                seq_printf(seq, " %llu",
-                           snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+                           snmp_fold_field64(net->mib.ip_statistics,
                                             snmp4_ipextstats_list[i].entry,
                                             offsetof(struct ipstats_mib, syncp)));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a9dbe58bdfe7..2c65160565e1 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
                iph->check   = 0;
                iph->tot_len = htons(length);
                if (!iph->id)
-                        ip_select_ident(skb, &rt->dst, NULL);
+                        ip_select_ident(skb, NULL);
                iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
        }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5e676be3daeb..082239ffe34a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
 #include <linux/rcupdate.h>
 #include <linux/times.h>
 #include <linux/slab.h>
+#include <linux/jhash.h>
 #include <net/dst.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
@@ -456,39 +457,19 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
        return neigh_create(&arp_tbl, pkey, dev);
 }
-/*
+atomic_t *ip_idents __read_mostly;
- * Peer allocation may fail only in serious out-of-memory conditions.  However
+EXPORT_SYMBOL(ip_idents);
- * we still can generate some output.
- * Random ID selection looks a bit dangerous because we have no chances to
- * select ID being unique in a reasonable period of time.
- * But broken packet identifier may be better than no packet at all.
- */
-static void ip_select_fb_ident(struct iphdr *iph)
-{
-        static DEFINE_SPINLOCK(ip_fb_id_lock);
-        static u32 ip_fallback_id;
-        u32 salt;
-        spin_lock_bh(&ip_fb_id_lock);
-        salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
-        iph->id = htons(salt & 0xFFFF);
-        ip_fallback_id = salt;
-        spin_unlock_bh(&ip_fb_id_lock);
-}
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
+void __ip_select_ident(struct iphdr *iph, int segs)
 {
-        struct net *net = dev_net(dst->dev);
+        static u32 ip_idents_hashrnd __read_mostly;
-        struct inet_peer *peer;
+        u32 hash, id;
-        peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
+        net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
-        if (peer) {
-                iph->id = htons(inet_getid(peer, more));
-                inet_putpeer(peer);
-                return;
-        }
-        ip_select_fb_ident(iph);
+        hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd);
+        id = ip_idents_reserve(hash, segs);
+        iph->id = htons(id);
 }
 EXPORT_SYMBOL(__ip_select_ident);
@@ -993,6 +974,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
        struct flowi4 fl4;
        struct rtable *rt;
+        if (!mark)
+                mark = IP4_REPLY_MARK(net, skb->mark);
        __build_flow_key(&fl4, NULL, iph, oif,
                         RT_TOS(iph->tos), protocol, mark, flow_flags);
        rt = __ip_route_output_key(net, &fl4);
@@ -1010,6 +994,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
        struct rtable *rt;
        __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+        if (!fl4.flowi4_mark)
+                fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
        rt = __ip_route_output_key(sock_net(sk), &fl4);
        if (!IS_ERR(rt)) {
                __ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -2704,6 +2692,12 @@ int __init ip_rt_init(void)
 {
        int rc = 0;
+        ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+        if (!ip_idents)
+                panic("IP: failed to allocate ip_idents\n");
+        prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
 #ifdef CONFIG_IP_ROUTE_CLASSID
        ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
        if (!ip_rt_acct)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f2ed13c2125f..c86624b36a62 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
        ireq->ir_rmt_port       = th->source;
        ireq->ir_loc_addr       = ip_hdr(skb)->daddr;
        ireq->ir_rmt_addr       = ip_hdr(skb)->saddr;
+        ireq->ir_mark           = inet_request_mark(sk, skb);
        ireq->ecn_ok            = ecn_ok;
        ireq->snd_wscale        = tcp_opt.snd_wscale;
        ireq->sack_ok           = tcp_opt.sack_ok;
@@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
         * hasn't changed since we received the original syn, but I see
         * no easy way to do this.
         */
-        flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+        flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
                           RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
                           inet_sk_flowi_flags(sk),
                           (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5cde8f263d40..79a007c52558 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -437,13 +437,6 @@ static struct ctl_table ipv4_table[] = {
                .proc_handler   = proc_dointvec
        },
        {
-                .procname       = "ip_local_reserved_ports",
-                .data           = NULL, /* initialized in sysctl_ipv4_init */
-                .maxlen         = 65536,
-                .mode           = 0644,
-                .proc_handler   = proc_do_large_bitmap,
-        },
-        {
                .procname       = "igmp_max_memberships",
                .data           = &sysctl_igmp_max_memberships,
                .maxlen         = sizeof(int),
@@ -825,6 +818,13 @@ static struct ctl_table ipv4_net_table[] = {
                .proc_handler   = ipv4_local_port_range,
        },
        {
+                .procname       = "ip_local_reserved_ports",
+                .data           = &init_net.ipv4.sysctl_local_reserved_ports,
+                .maxlen         = 65536,
+                .mode           = 0644,
+                .proc_handler   = proc_do_large_bitmap,
+        },
+        {
                .procname       = "ip_no_pmtu_disc",
                .data           = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
                .maxlen         = sizeof(int),
@@ -838,6 +838,20 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+        {
+                .procname       = "fwmark_reflect",
+                .data           = &init_net.ipv4.sysctl_fwmark_reflect,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec,
+        },
+        {
+                .procname       = "tcp_fwmark_accept",
+                .data           = &init_net.ipv4.sysctl_tcp_fwmark_accept,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec,
+        },
        { }
 };
@@ -862,8 +876,14 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
        if (net->ipv4.ipv4_hdr == NULL)
                goto err_reg;
+        net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
+        if (!net->ipv4.sysctl_local_reserved_ports)
+                goto err_ports;
        return 0;
+err_ports:
+        unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
 err_reg:
        if (!net_eq(net, &init_net))
                kfree(table);
@@ -875,6 +895,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net)
 {
        struct ctl_table *table;
+        kfree(net->ipv4.sysctl_local_reserved_ports);
        table = net->ipv4.ipv4_hdr->ctl_table_arg;
        unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
        kfree(table);
@@ -888,16 +909,6 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
 static __init int sysctl_ipv4_init(void)
 {
        struct ctl_table_header *hdr;
-        struct ctl_table *i;
-        for (i = ipv4_table; i->procname; i++) {
-                if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
-                        i->data = sysctl_local_reserved_ports;
-                        break;
-                }
-        }
-        if (!i->procname)
-                return -EINVAL;
        hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
        if (hdr == NULL)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4bd6d52eeffb..eb1dde37e678 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2916,6 +2916,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
        case TCP_USER_TIMEOUT:
                val = jiffies_to_msecs(icsk->icsk_user_timeout);
                break;
+        case TCP_FASTOPEN:
+                if (icsk->icsk_accept_queue.fastopenq != NULL)
+                        val = icsk->icsk_accept_queue.fastopenq->max_qlen;
+                else
+                        val = 0;
+                break;
        case TCP_TIMESTAMP:
                val = tcp_time_stamp + tp->tsoffset;
                break;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 821846fb0a7e..d5de69bc04f5 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -140,13 +140,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
                ca->cnt = 1;
 }
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-                              u32 in_flight)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct bictcp *ca = inet_csk_ca(sk);
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2b9464c93b88..7b09d8b49fa5 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
        return err;
 }
-/* RFC2861 Check whether we are limited by application or congestion window
- * This is the inverse of cwnd check in tcp_tso_should_defer
- */
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
-{
-        const struct tcp_sock *tp = tcp_sk(sk);
-        u32 left;
-        if (in_flight >= tp->snd_cwnd)
-                return true;
-        left = tp->snd_cwnd - in_flight;
-        if (sk_can_gso(sk) &&
-            left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
-            left < tp->xmit_size_goal_segs)
-                return true;
-        return left <= tcp_max_tso_deferred_mss(tp);
-}
-EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
 /* Slow start is used when congestion window is no greater than the slow start
 * threshold. We base on RFC2581 and also handle stretch ACKs properly.
 * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
@@ -337,11 +317,11 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
 /* This is Jacobson's slow start and congestion avoidance.
 * SIGCOMM '88, p. 328.
 */
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        /* In "safe" area, increase. */
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index b4f1b29b08bd..a9bd8a4828a9 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -304,13 +304,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
                ca->cnt = 1;
 }
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-                              u32 in_flight)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct bictcp *ca = inet_csk_ca(sk);
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        if (tp->snd_cwnd <= tp->snd_ssthresh) {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f195d9316e55..62e48cf84e60 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -72,25 +72,224 @@ error:		kfree(ctx);
        return err;
 }
-/* Computes the fastopen cookie for the IP path.
+static bool __tcp_fastopen_cookie_gen(const void *path,
- * The path is a 128 bits long (pad with zeros for IPv4).
+                                      struct tcp_fastopen_cookie *foc)
- *
- * The caller must check foc->len to determine if a valid cookie
- * has been generated successfully.
-*/
-void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
-                             struct tcp_fastopen_cookie *foc)
 {
-        __be32 path[4] = { src, dst, 0, 0 };
        struct tcp_fastopen_context *ctx;
+        bool ok = false;
        tcp_fastopen_init_key_once(true);
        rcu_read_lock();
        ctx = rcu_dereference(tcp_fastopen_ctx);
        if (ctx) {
-                crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path);
+                crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
                foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+                ok = true;
        }
        rcu_read_unlock();
+        return ok;
+}
+/* Generate the fastopen cookie by doing aes128 encryption on both
+ * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6
+ * addresses. For the longer IPv6 addresses use CBC-MAC.
+ *
+ * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
+ */
+static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+                                    struct sk_buff *syn,
+                                    struct tcp_fastopen_cookie *foc)
+{
+        if (req->rsk_ops->family == AF_INET) {
+                const struct iphdr *iph = ip_hdr(syn);
+                __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
+                return __tcp_fastopen_cookie_gen(path, foc);
+        }
+#if IS_ENABLED(CONFIG_IPV6)
+        if (req->rsk_ops->family == AF_INET6) {
+                const struct ipv6hdr *ip6h = ipv6_hdr(syn);
+                struct tcp_fastopen_cookie tmp;
+                if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+                        struct in6_addr *buf = (struct in6_addr *) tmp.val;
+                        int i = 4;
+                        for (i = 0; i < 4; i++)
+                                buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
+                        return __tcp_fastopen_cookie_gen(buf, foc);
+                }
+        }
+#endif
+        return false;
+}
+static bool tcp_fastopen_create_child(struct sock *sk,
+                                      struct sk_buff *skb,
+                                      struct dst_entry *dst,
+                                      struct request_sock *req)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+        struct sock *child;
+        req->num_retrans = 0;
+        req->num_timeout = 0;
+        req->sk = NULL;
+        child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+        if (child == NULL)
+                return false;
+        spin_lock(&queue->fastopenq->lock);
+        queue->fastopenq->qlen++;
+        spin_unlock(&queue->fastopenq->lock);
+        /* Initialize the child socket. Have to fix some values to take
+         * into account the child is a Fast Open socket and is created
+         * only out of the bits carried in the SYN packet.
+         */
+        tp = tcp_sk(child);
+        tp->fastopen_rsk = req;
+        /* Do a hold on the listner sk so that if the listener is being
+         * closed, the child that has been accepted can live on and still
+         * access listen_lock.
+         */
+        sock_hold(sk);
+        tcp_rsk(req)->listener = sk;
+        /* RFC1323: The window in SYN & SYN/ACK segments is never
+         * scaled. So correct it appropriately.
+         */
+        tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
+        /* Activate the retrans timer so that SYNACK can be retransmitted.
+         * The request socket is not added to the SYN table of the parent
+         * because it's been added to the accept queue directly.
+         */
+        inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
+                                  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+        /* Add the child socket directly into the accept queue */
+        inet_csk_reqsk_queue_add(sk, req, child);
+        /* Now finish processing the fastopen child socket. */
+        inet_csk(child)->icsk_af_ops->rebuild_header(child);
+        tcp_init_congestion_control(child);
+        tcp_mtup_init(child);
+        tcp_init_metrics(child);
+        tcp_init_buffer_space(child);
+        /* Queue the data carried in the SYN packet. We need to first
+         * bump skb's refcnt because the caller will attempt to free it.
+         *
+         * XXX (TFO) - we honor a zero-payload TFO request for now,
+         * (any reason not to?) but no need to queue the skb since
+         * there is no data. How about SYN+FIN?
+         */
+        if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) {
+                skb = skb_get(skb);
+                skb_dst_drop(skb);
+                __skb_pull(skb, tcp_hdr(skb)->doff * 4);
+                skb_set_owner_r(skb, child);
+                __skb_queue_tail(&child->sk_receive_queue, skb);
+                tp->syn_data_acked = 1;
+        }
+        tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+        sk->sk_data_ready(sk);
+        bh_unlock_sock(child);
+        sock_put(child);
+        WARN_ON(req->sk == NULL);
+        return true;
+}
+EXPORT_SYMBOL(tcp_fastopen_create_child);
+static bool tcp_fastopen_queue_check(struct sock *sk)
+{
+        struct fastopen_queue *fastopenq;
+        /* Make sure the listener has enabled fastopen, and we don't
+         * exceed the max # of pending TFO requests allowed before trying
+         * to validating the cookie in order to avoid burning CPU cycles
+         * unnecessarily.
+         *
+         * XXX (TFO) - The implication of checking the max_qlen before
+         * processing a cookie request is that clients can't differentiate
+         * between qlen overflow causing Fast Open to be disabled
+         * temporarily vs a server not supporting Fast Open at all.
+         */
+        fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
+        if (fastopenq == NULL || fastopenq->max_qlen == 0)
+                return false;
+        if (fastopenq->qlen >= fastopenq->max_qlen) {
+                struct request_sock *req1;
+                spin_lock(&fastopenq->lock);
+                req1 = fastopenq->rskq_rst_head;
+                if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
+                        spin_unlock(&fastopenq->lock);
+                        NET_INC_STATS_BH(sock_net(sk),
+                                         LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
+                        return false;
+                }
+                fastopenq->rskq_rst_head = req1->dl_next;
+                fastopenq->qlen--;
+                spin_unlock(&fastopenq->lock);
+                reqsk_free(req1);
+        }
+        return true;
+}
+/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
+ * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
+ * cookie request (foc->len == 0).
+ */
+bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+                      struct request_sock *req,
+                      struct tcp_fastopen_cookie *foc,
+                      struct dst_entry *dst)
+{
+        struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+        bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+        if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+              (syn_data || foc->len >= 0) &&
+              tcp_fastopen_queue_check(sk))) {
+                foc->len = -1;
+                return false;
+        }
+        if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+                goto fastopen;
+        if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+            foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
+            foc->len == valid_foc.len &&
+            !memcmp(foc->val, valid_foc.val, foc->len)) {
+                /* Cookie is valid. Create a (full) child socket to accept
+                 * the data in SYN before returning a SYN-ACK to ack the
+                 * data. If we fail to create the socket, fall back and
+                 * ack the ISN only but includes the same cookie.
+                 *
+                 * Note: Data-less SYN with valid cookie is allowed to send
+                 * data in SYN_RECV state.
+                 */
+fastopen:
+                if (tcp_fastopen_create_child(sk, skb, dst, req)) {
+                        foc->len = -1;
+                        NET_INC_STATS_BH(sock_net(sk),
+                                         LINUX_MIB_TCPFASTOPENPASSIVE);
+                        return true;
+                }
+        }
+        NET_INC_STATS_BH(sock_net(sk), foc->len ?
+                         LINUX_MIB_TCPFASTOPENPASSIVEFAIL :
+                         LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+        *foc = valid_foc;
+        return false;
 }
+EXPORT_SYMBOL(tcp_try_fastopen);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8b9e7bad77c0..1c4908280d92 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -109,12 +109,12 @@ static void hstcp_init(struct sock *sk)
        tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
 }
-static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct hstcp *ca = inet_csk_ca(sk);
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 4a194acfd923..031361311a8b 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -227,12 +227,12 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
        return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
 }
-static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct htcp *ca = inet_csk_ca(sk);
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index a15a799bf768..d8f8f05a4951 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -87,8 +87,7 @@ static inline u32 hybla_fraction(u32 odds)
 *     o Give cwnd a new value based on the model proposed
 *     o remember increments <1
 */
-static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-                             u32 in_flight)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct hybla *ca = inet_csk_ca(sk);
@@ -101,11 +100,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
                ca->minrtt_us = tp->srtt_us;
        }
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        if (!ca->hybla_en) {
-                tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+                tcp_reno_cong_avoid(sk, ack, acked);
                return;
        }
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 863d105e3015..5999b3972e64 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -255,8 +255,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state)
 /*
 * Increase window in response to successful acknowledgment.
 */
-static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-                                    u32 in_flight)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct illinois *ca = inet_csk_ca(sk);
@@ -265,7 +264,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
                update_params(sk);
        /* RFC2861 only increase cwnd if fully utilized */
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        /* In slow start */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3a26b3b23f16..40661fc1e233 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1167,7 +1167,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
                        }
                        pkt_len = new_len;
                }
-                err = tcp_fragment(sk, skb, pkt_len, mss);
+                err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
                if (err < 0)
                        return err;
        }
@@ -2241,7 +2241,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
                                break;
                        mss = skb_shinfo(skb)->gso_size;
-                        err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
+                        err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
+                                           mss, GFP_ATOMIC);
                        if (err < 0)
                                break;
                        cnt = packets;
@@ -2937,10 +2938,11 @@ static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
                tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
 }
-static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
-        icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight);
+        icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
        tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
 }
@@ -3363,7 +3365,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        u32 ack_seq = TCP_SKB_CB(skb)->seq;
        u32 ack = TCP_SKB_CB(skb)->ack_seq;
        bool is_dupack = false;
-        u32 prior_in_flight;
        u32 prior_fackets;
        int prior_packets = tp->packets_out;
        const int prior_unsacked = tp->packets_out - tp->sacked_out;
@@ -3396,7 +3397,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                flag |= FLAG_SND_UNA_ADVANCED;
        prior_fackets = tp->fackets_out;
-        prior_in_flight = tcp_packets_in_flight(tp);
        /* ts_recent update must be made after we are sure that the packet
         * is in window.
@@ -3451,7 +3451,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        /* Advance cwnd if state allows */
        if (tcp_may_raise_cwnd(sk, flag))
-                tcp_cong_avoid(sk, ack, acked, prior_in_flight);
+                tcp_cong_avoid(sk, ack, acked);
        if (tcp_ack_is_dubious(sk, flag)) {
                is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
@@ -4702,28 +4702,6 @@ static int tcp_prune_queue(struct sock *sk)
        return -1;
 }
-/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
- * As additional protections, we do not touch cwnd in retransmission phases,
- * and if application hit its sndbuf limit recently.
- */
-void tcp_cwnd_application_limited(struct sock *sk)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
-            sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
-                /* Limited by application or receiver window. */
-                u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
-                u32 win_used = max(tp->snd_cwnd_used, init_win);
-                if (win_used < tp->snd_cwnd) {
-                        tp->snd_ssthresh = tcp_current_ssthresh(sk);
-                        tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
-                }
-                tp->snd_cwnd_used = 0;
-        }
-        tp->snd_cwnd_stamp = tcp_time_stamp;
-}
 static bool tcp_should_expand_sndbuf(const struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 438f3b95143d..77cccda1ad0c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -336,8 +336,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
        const int code = icmp_hdr(icmp_skb)->code;
        struct sock *sk;
        struct sk_buff *skb;
-        struct request_sock *req;
+        struct request_sock *fastopen;
-        __u32 seq;
+        __u32 seq, snd_una;
        __u32 remaining;
        int err;
        struct net *net = dev_net(icmp_skb->dev);
@@ -378,12 +378,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
        icsk = inet_csk(sk);
        tp = tcp_sk(sk);
-        req = tp->fastopen_rsk;
        seq = ntohl(th->seq);
+        /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
+        fastopen = tp->fastopen_rsk;
+        snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
        if (sk->sk_state != TCP_LISTEN &&
-            !between(seq, tp->snd_una, tp->snd_nxt) &&
+            !between(seq, snd_una, tp->snd_nxt)) {
-            (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
-                /* For a Fast Open socket, allow seq to be snt_isn. */
                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
                goto out;
        }
@@ -426,11 +426,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
                if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
                        break;
                if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
-                    !icsk->icsk_backoff)
+                    !icsk->icsk_backoff || fastopen)
                        break;
-                /* XXX (TFO) - revisit the following logic for TFO */
                if (sock_owned_by_user(sk))
                        break;
@@ -462,14 +460,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
                goto out;
        }
-        /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
-         * than following the TCP_SYN_RECV case and closing the socket,
-         * we ignore the ICMP error and keep trying like a fully established
-         * socket. Is this the right thing to do?
-         */
-        if (req && req->sk == NULL)
-                goto out;
        switch (sk->sk_state) {
                struct request_sock *req, **prev;
        case TCP_LISTEN:
@@ -502,10 +492,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
                goto out;
        case TCP_SYN_SENT:
-        case TCP_SYN_RECV:  /* Cannot happen.
+        case TCP_SYN_RECV:
-                               It can f.e. if SYNs crossed,
+                /* Only in fast or simultaneous open. If a fast open socket is
-                               or Fast Open.
+                 * is already accepted it is treated as a connected one below.
-                             */
+                 */
+                if (fastopen && fastopen->sk == NULL)
+                        break;
                if (!sock_owned_by_user(sk)) {
                        sk->sk_err = err;
@@ -822,7 +815,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 */
 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
                              struct request_sock *req,
-                              u16 queue_mapping)
+                              u16 queue_mapping,
+                              struct tcp_fastopen_cookie *foc)
 {
        const struct inet_request_sock *ireq = inet_rsk(req);
        struct flowi4 fl4;
@@ -833,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
                return -1;
-        skb = tcp_make_synack(sk, dst, req, NULL);
+        skb = tcp_make_synack(sk, dst, req, foc);
        if (skb) {
                __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -852,7 +846,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
 {
-        int res = tcp_v4_send_synack(sk, NULL, req, 0);
+        int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
        if (!res) {
                TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -1260,187 +1254,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 };
 #endif
-static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
-                               struct request_sock *req,
-                               struct tcp_fastopen_cookie *foc,
-                               struct tcp_fastopen_cookie *valid_foc)
-{
-        bool skip_cookie = false;
-        struct fastopen_queue *fastopenq;
-        if (likely(!fastopen_cookie_present(foc))) {
-                /* See include/net/tcp.h for the meaning of these knobs */
-                if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
-                    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
-                    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
-                        skip_cookie = true; /* no cookie to validate */
-                else
-                        return false;
-        }
-        fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
-        /* A FO option is present; bump the counter. */
-        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
-        /* Make sure the listener has enabled fastopen, and we don't
-         * exceed the max # of pending TFO requests allowed before trying
-         * to validating the cookie in order to avoid burning CPU cycles
-         * unnecessarily.
-         *
-         * XXX (TFO) - The implication of checking the max_qlen before
-         * processing a cookie request is that clients can't differentiate
-         * between qlen overflow causing Fast Open to be disabled
-         * temporarily vs a server not supporting Fast Open at all.
-         */
-        if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
-            fastopenq == NULL || fastopenq->max_qlen == 0)
-                return false;
-        if (fastopenq->qlen >= fastopenq->max_qlen) {
-                struct request_sock *req1;
-                spin_lock(&fastopenq->lock);
-                req1 = fastopenq->rskq_rst_head;
-                if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
-                        spin_unlock(&fastopenq->lock);
-                        NET_INC_STATS_BH(sock_net(sk),
-                            LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
-                        /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
-                        foc->len = -1;
-                        return false;
-                }
-                fastopenq->rskq_rst_head = req1->dl_next;
-                fastopenq->qlen--;
-                spin_unlock(&fastopenq->lock);
-                reqsk_free(req1);
-        }
-        if (skip_cookie) {
-                tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-                return true;
-        }
-        if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
-                if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
-                        tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-                                                ip_hdr(skb)->daddr, valid_foc);
-                        if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
-                            memcmp(&foc->val[0], &valid_foc->val[0],
-                            TCP_FASTOPEN_COOKIE_SIZE) != 0)
-                                return false;
-                        valid_foc->len = -1;
-                }
-                /* Acknowledge the data received from the peer. */
-                tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-                return true;
-        } else if (foc->len == 0) { /* Client requesting a cookie */
-                tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-                                        ip_hdr(skb)->daddr, valid_foc);
-                NET_INC_STATS_BH(sock_net(sk),
-                    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
-        } else {
-                /* Client sent a cookie with wrong size. Treat it
-                 * the same as invalid and return a valid one.
-                 */
-                tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-                                        ip_hdr(skb)->daddr, valid_foc);
-        }
-        return false;
-}
-static int tcp_v4_conn_req_fastopen(struct sock *sk,
-                                    struct sk_buff *skb,
-                                    struct sk_buff *skb_synack,
-                                    struct request_sock *req)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
-        const struct inet_request_sock *ireq = inet_rsk(req);
-        struct sock *child;
-        int err;
-        req->num_retrans = 0;
-        req->num_timeout = 0;
-        req->sk = NULL;
-        child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
-        if (child == NULL) {
-                NET_INC_STATS_BH(sock_net(sk),
-                                 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-                kfree_skb(skb_synack);
-                return -1;
-        }
-        err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
-                                    ireq->ir_rmt_addr, ireq->opt);
-        err = net_xmit_eval(err);
-        if (!err)
-                tcp_rsk(req)->snt_synack = tcp_time_stamp;
-        /* XXX (TFO) - is it ok to ignore error and continue? */
-        spin_lock(&queue->fastopenq->lock);
-        queue->fastopenq->qlen++;
-        spin_unlock(&queue->fastopenq->lock);
-        /* Initialize the child socket. Have to fix some values to take
-         * into account the child is a Fast Open socket and is created
-         * only out of the bits carried in the SYN packet.
-         */
-        tp = tcp_sk(child);
-        tp->fastopen_rsk = req;
-        /* Do a hold on the listner sk so that if the listener is being
-         * closed, the child that has been accepted can live on and still
-         * access listen_lock.
-         */
-        sock_hold(sk);
-        tcp_rsk(req)->listener = sk;
-        /* RFC1323: The window in SYN & SYN/ACK segments is never
-         * scaled. So correct it appropriately.
-         */
-        tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
-        /* Activate the retrans timer so that SYNACK can be retransmitted.
-         * The request socket is not added to the SYN table of the parent
-         * because it's been added to the accept queue directly.
-         */
-        inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
-            TCP_TIMEOUT_INIT, TCP_RTO_MAX);
-        /* Add the child socket directly into the accept queue */
-        inet_csk_reqsk_queue_add(sk, req, child);
-        /* Now finish processing the fastopen child socket. */
-        inet_csk(child)->icsk_af_ops->rebuild_header(child);
-        tcp_init_congestion_control(child);
-        tcp_mtup_init(child);
-        tcp_init_metrics(child);
-        tcp_init_buffer_space(child);
-        /* Queue the data carried in the SYN packet. We need to first
-         * bump skb's refcnt because the caller will attempt to free it.
-         *
-         * XXX (TFO) - we honor a zero-payload TFO request for now.
-         * (Any reason not to?)
-         */
-        if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
-                /* Don't queue the skb if there is no payload in SYN.
-                 * XXX (TFO) - How about SYN+FIN?
-                 */
-                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-        } else {
-                skb = skb_get(skb);
-                skb_dst_drop(skb);
-                __skb_pull(skb, tcp_hdr(skb)->doff * 4);
-                skb_set_owner_r(skb, child);
-                __skb_queue_tail(&child->sk_receive_queue, skb);
-                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-                tp->syn_data_acked = 1;
-        }
-        sk->sk_data_ready(sk);
-        bh_unlock_sock(child);
-        sock_put(child);
-        WARN_ON(req->sk == NULL);
-        return 0;
-}
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_options_received tmp_opt;
@@ -1451,12 +1264,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        __be32 saddr = ip_hdr(skb)->saddr;
        __be32 daddr = ip_hdr(skb)->daddr;
        __u32 isn = TCP_SKB_CB(skb)->when;
-        bool want_cookie = false;
+        bool want_cookie = false, fastopen;
        struct flowi4 fl4;
        struct tcp_fastopen_cookie foc = { .len = -1 };
-        struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+        int err;
-        struct sk_buff *skb_synack;
-        int do_fastopen;
        /* Never answer to SYNs send to broadcast or multicast */
        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1507,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        ireq->ir_rmt_addr = saddr;
        ireq->no_srccheck = inet_sk(sk)->transparent;
        ireq->opt = tcp_v4_save_options(skb);
+        ireq->ir_mark = inet_request_mark(sk, skb);
        if (security_inet_conn_request(sk, skb, req))
                goto drop_and_free;
@@ -1555,52 +1367,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                isn = tcp_v4_init_sequence(skb);
        }
-        tcp_rsk(req)->snt_isn = isn;
+        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
-        if (dst == NULL) {
-                dst = inet_csk_route_req(sk, &fl4, req);
-                if (dst == NULL)
-                        goto drop_and_free;
-        }
-        do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
-        /* We don't call tcp_v4_send_synack() directly because we need
-         * to make sure a child socket can be created successfully before
-         * sending back synack!
-         *
-         * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
-         * (or better yet, call tcp_send_synack() in the child context
-         * directly, but will have to fix bunch of other code first)
-         * after syn_recv_sock() except one will need to first fix the
-         * latter to remove its dependency on the current implementation
-         * of tcp_v4_send_synack()->tcp_select_initial_window().
-         */
-        skb_synack = tcp_make_synack(sk, dst, req,
-            fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
-        if (skb_synack) {
-                __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
-                skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
-        } else
                goto drop_and_free;
-        if (likely(!do_fastopen)) {
+        tcp_rsk(req)->snt_isn = isn;
-                int err;
+        tcp_rsk(req)->snt_synack = tcp_time_stamp;
-                err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
+        tcp_openreq_init_rwin(req, sk, dst);
-                     ireq->ir_rmt_addr, ireq->opt);
+        fastopen = !want_cookie &&
-                err = net_xmit_eval(err);
+                   tcp_try_fastopen(sk, skb, req, &foc, dst);
+        err = tcp_v4_send_synack(sk, dst, req,
+                                 skb_get_queue_mapping(skb), &foc);
+        if (!fastopen) {
                if (err || want_cookie)
                        goto drop_and_free;
                tcp_rsk(req)->snt_synack = tcp_time_stamp;
                tcp_rsk(req)->listener = NULL;
-                /* Add the request_sock to the SYN table */
                inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-                if (fastopen_cookie_present(&foc) && foc.len != 0)
+        }
-                        NET_INC_STATS_BH(sock_net(sk),
-                            LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-        } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
-                goto drop_and_free;
        return 0;
@@ -1744,28 +1528,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
        return sk;
 }
-static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
-{
-        const struct iphdr *iph = ip_hdr(skb);
-        if (skb->ip_summed == CHECKSUM_COMPLETE) {
-                if (!tcp_v4_check(skb->len, iph->saddr,
-                                  iph->daddr, skb->csum)) {
-                        skb->ip_summed = CHECKSUM_UNNECESSARY;
-                        return 0;
-                }
-        }
-        skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-                                       skb->len, IPPROTO_TCP, 0);
-        if (skb->len <= 76) {
-                return __skb_checksum_complete(skb);
-        }
-        return 0;
-}
 /* The socket must have it's spinlock held when we get
 * here.
 *
@@ -1960,7 +1722,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
         * Packet length and doff are validated by header prediction,
         * provided case of th->doff==0 is eliminated.
         * So, we defer the checks. */
-        if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
+        if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
                goto csum_error;
        th = tcp_hdr(skb);
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c9aecae31327..1e70fa8fa793 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk)
 * Will only call newReno CA when away from inference.
 * From TCP-LP's paper, this will be handled in additive increasement.
 */
-static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-                              u32 in_flight)
 {
        struct lp *lp = inet_csk_ca(sk);
        if (!(lp->flag & LP_WITHIN_INF))
-                tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+                tcp_reno_cong_avoid(sk, ack, acked);
 }
 /**
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index dcaf72f10216..4fe041805989 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1159,10 +1159,7 @@ static void __net_exit tcp_net_metrics_exit(struct net *net)
                        tm = next;
                }
        }
-        if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash))
+        kvfree(net->ipv4.tcp_metrics_hash);
-                vfree(net->ipv4.tcp_metrics_hash);
-        else
-                kfree(net->ipv4.tcp_metrics_hash);
 }
 static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 05c1b155251d..e68e0d4af6c9 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
+void tcp_openreq_init_rwin(struct request_sock *req,
+                           struct sock *sk, struct dst_entry *dst)
+{
+        struct inet_request_sock *ireq = inet_rsk(req);
+        struct tcp_sock *tp = tcp_sk(sk);
+        __u8 rcv_wscale;
+        int mss = dst_metric_advmss(dst);
+        if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
+                mss = tp->rx_opt.user_mss;
+        /* Set this up on the first call only */
+        req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+        /* limit the window selection if the user enforce a smaller rx buffer */
+        if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+            (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
+                req->window_clamp = tcp_full_space(sk);
+        /* tcp_full_space because it is guaranteed to be the first packet */
+        tcp_select_initial_window(tcp_full_space(sk),
+                mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+                &req->rcv_wnd,
+                &req->window_clamp,
+                ireq->wscale_ok,
+                &rcv_wscale,
+                dst_metric(dst, RTAX_INITRWND));
+        ireq->rcv_wscale = rcv_wscale;
+}
+EXPORT_SYMBOL(tcp_openreq_init_rwin);
 static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
                                         struct request_sock *req)
 {
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b92b81718ca4..4e86c59ec7f7 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -57,10 +57,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
                               SKB_GSO_TCP_ECN |
                               SKB_GSO_TCPV6 |
                               SKB_GSO_GRE |
+                               SKB_GSO_GRE_CSUM |
                               SKB_GSO_IPIP |
                               SKB_GSO_SIT |
                               SKB_GSO_MPLS |
                               SKB_GSO_UDP_TUNNEL |
+                               SKB_GSO_UDP_TUNNEL_CSUM |
                               0) ||
                             !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
                        goto out;
@@ -97,9 +99,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
                th->check = newcheck;
                if (skb->ip_summed != CHECKSUM_PARTIAL)
-                        th->check =
+                        th->check = gso_make_checksum(skb, ~th->check);
-                             csum_fold(csum_partial(skb_transport_header(skb),
-                                                    thlen, skb->csum));
                seq += mss;
                if (copy_destructor) {
@@ -133,8 +133,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
        th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
                                (__force u32)delta));
        if (skb->ip_summed != CHECKSUM_PARTIAL)
-                th->check = csum_fold(csum_partial(skb_transport_header(skb),
+                th->check = gso_make_checksum(skb, ~th->check);
-                                                   thlen, skb->csum));
 out:
        return segs;
 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2d340bd2cd3d..d92bce0ea24e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
                if (unlikely(!ireq->tstamp_ok))
                        remaining -= TCPOLEN_SACKPERM_ALIGNED;
        }
-        if (foc != NULL) {
+        if (foc != NULL && foc->len >= 0) {
                u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
                need = (need + 3) & ~3U;  /* Align to 32 bits */
                if (remaining >= need) {
@@ -878,15 +878,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
        BUG_ON(!skb || !tcp_skb_pcount(skb));
        if (clone_it) {
-                const struct sk_buff *fclone = skb + 1;
                skb_mstamp_get(&skb->skb_mstamp);
-                if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
-                             fclone->fclone == SKB_FCLONE_CLONE))
-                        NET_INC_STATS(sock_net(sk),
-                                      LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
                if (unlikely(skb_cloned(skb)))
                        skb = pskb_copy(skb, gfp_mask);
                else
@@ -1081,7 +1074,7 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
 * Remember, these are still headerless SKBs at this point.
 */
 int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
-                 unsigned int mss_now)
+                 unsigned int mss_now, gfp_t gfp)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *buff;
@@ -1096,11 +1089,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
        if (nsize < 0)
                nsize = 0;
-        if (skb_unclone(skb, GFP_ATOMIC))
+        if (skb_unclone(skb, gfp))
                return -ENOMEM;
        /* Get a new skb... force flag on. */
-        buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+        buff = sk_stream_alloc_skb(sk, nsize, gfp);
        if (buff == NULL)
                return -ENOMEM; /* We'll just try again later. */
@@ -1387,12 +1380,43 @@ unsigned int tcp_current_mss(struct sock *sk)
        return mss_now;
 }
-/* Congestion window validation. (RFC2861) */
+/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
-static void tcp_cwnd_validate(struct sock *sk)
+ * As additional protections, we do not touch cwnd in retransmission phases,
+ * and if application hit its sndbuf limit recently.
+ */
+static void tcp_cwnd_application_limited(struct sock *sk)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
+            sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+                /* Limited by application or receiver window. */
+                u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
+                u32 win_used = max(tp->snd_cwnd_used, init_win);
+                if (win_used < tp->snd_cwnd) {
+                        tp->snd_ssthresh = tcp_current_ssthresh(sk);
+                        tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
+                }
+                tp->snd_cwnd_used = 0;
+        }
+        tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        if (tp->packets_out >= tp->snd_cwnd) {
+        /* Track the maximum number of outstanding packets in each
+         * window, and remember whether we were cwnd-limited then.
+         */
+        if (!before(tp->snd_una, tp->max_packets_seq) ||
+            tp->packets_out > tp->max_packets_out) {
+                tp->max_packets_out = tp->packets_out;
+                tp->max_packets_seq = tp->snd_nxt;
+                tp->is_cwnd_limited = is_cwnd_limited;
+        }
+        if (tcp_is_cwnd_limited(sk)) {
                /* Network is feed fully. */
                tp->snd_cwnd_used = 0;
                tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1601,7 +1625,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
        /* All of a TSO frame must be composed of paged data.  */
        if (skb->len != skb->data_len)
-                return tcp_fragment(sk, skb, len, mss_now);
+                return tcp_fragment(sk, skb, len, mss_now, gfp);
        buff = sk_stream_alloc_skb(sk, 0, gfp);
        if (unlikely(buff == NULL))
@@ -1644,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 *
 * This algorithm is from John Heffner.
 */
-static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+                                 bool *is_cwnd_limited)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1708,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
        if (!tp->tso_deferred)
                tp->tso_deferred = 1 | (jiffies << 1);
+        if (cong_win < send_win && cong_win < skb->len)
+                *is_cwnd_limited = true;
        return true;
 send_now:
@@ -1868,6 +1896,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
        unsigned int tso_segs, sent_pkts;
        int cwnd_quota;
        int result;
+        bool is_cwnd_limited = false;
        sent_pkts = 0;
@@ -1892,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                cwnd_quota = tcp_cwnd_test(tp, skb);
                if (!cwnd_quota) {
+                        is_cwnd_limited = true;
                        if (push_one == 2)
                                /* Force out a loss probe pkt. */
                                cwnd_quota = 1;
@@ -1908,7 +1938,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                                                      nonagle : TCP_NAGLE_PUSH))))
                                break;
                } else {
-                        if (!push_one && tcp_tso_should_defer(sk, skb))
+                        if (!push_one &&
+                            tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
                                break;
                }
@@ -1973,7 +2004,7 @@ repair:
                /* Send one loss probe per tail loss episode. */
                if (push_one != 2)
                        tcp_schedule_loss_probe(sk);
-                tcp_cwnd_validate(sk);
+                tcp_cwnd_validate(sk, is_cwnd_limited);
                return false;
        }
        return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
@@ -2037,6 +2068,25 @@ bool tcp_schedule_loss_probe(struct sock *sk)
        return true;
 }
+/* Thanks to skb fast clones, we can detect if a prior transmit of
+ * a packet is still in a qdisc or driver queue.
+ * In this case, there is very little point doing a retransmit !
+ * Note: This is called from BH context only.
+ */
+static bool skb_still_in_host_queue(const struct sock *sk,
+                                    const struct sk_buff *skb)
+{
+        const struct sk_buff *fclone = skb + 1;
+        if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
+                     fclone->fclone == SKB_FCLONE_CLONE)) {
+                NET_INC_STATS_BH(sock_net(sk),
+                                 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+                return true;
+        }
+        return false;
+}
 /* When probe timeout (PTO) fires, send a new segment if one exists, else
 * retransmit the last segment.
 */
@@ -2062,12 +2112,16 @@ void tcp_send_loss_probe(struct sock *sk)
        if (WARN_ON(!skb))
                goto rearm_timer;
+        if (skb_still_in_host_queue(sk, skb))
+                goto rearm_timer;
        pcount = tcp_skb_pcount(skb);
        if (WARN_ON(!pcount))
                goto rearm_timer;
        if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
-                if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
+                if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+                                          GFP_ATOMIC)))
                        goto rearm_timer;
                skb = tcp_write_queue_tail(sk);
        }
@@ -2075,9 +2129,7 @@ void tcp_send_loss_probe(struct sock *sk)
        if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
                goto rearm_timer;
-        /* Probe with zero data doesn't trigger fast recovery. */
+        err = __tcp_retransmit_skb(sk, skb);
-        if (skb->len > 0)
-                err = __tcp_retransmit_skb(sk, skb);
        /* Record snd_nxt for loss detection. */
        if (likely(!err))
@@ -2383,6 +2435,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
            min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
                return -EAGAIN;
+        if (skb_still_in_host_queue(sk, skb))
+                return -EBUSY;
        if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
                if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
                        BUG();
@@ -2405,7 +2460,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                return -EAGAIN;
        if (skb->len > cur_mss) {
-                if (tcp_fragment(sk, skb, cur_mss, cur_mss))
+                if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
                        return -ENOMEM; /* We'll try again later. */
        } else {
                int oldpcount = tcp_skb_pcount(skb);
@@ -2476,7 +2531,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                 * see tcp_input.c tcp_sacktag_write_queue().
                 */
                TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
-        } else {
+        } else if (err != -EBUSY) {
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
        }
        return err;
@@ -2754,27 +2809,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
        if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
                mss = tp->rx_opt.user_mss;
-        if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
-                __u8 rcv_wscale;
-                /* Set this up on the first call only */
-                req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
-                /* limit the window selection if the user enforce a smaller rx buffer */
-                if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
-                    (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
-                        req->window_clamp = tcp_full_space(sk);
-                /* tcp_full_space because it is guaranteed to be the first packet */
-                tcp_select_initial_window(tcp_full_space(sk),
-                        mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
-                        &req->rcv_wnd,
-                        &req->window_clamp,
-                        ireq->wscale_ok,
-                        &rcv_wscale,
-                        dst_metric(dst, RTAX_INITRWND));
-                ireq->rcv_wscale = rcv_wscale;
-        }
        memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
        if (unlikely(req->cookie_ts))
@@ -3207,7 +3241,7 @@ int tcp_write_wakeup(struct sock *sk)
                    skb->len > mss) {
                        seg_size = min(seg_size, mss);
                        TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
-                        if (tcp_fragment(sk, skb, seg_size, mss))
+                        if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
                                return -1;
                } else if (!tcp_skb_pcount(skb))
                        tcp_set_skb_tso_segs(sk, skb, mss);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 0ac50836da4d..8250949b8853 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,12 +15,11 @@
 #define TCP_SCALABLE_AI_CNT     50U
 #define TCP_SCALABLE_MD_SCALE   3
-static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-                                    u32 in_flight)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 48539fff6357..9a5e05f27f4f 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -163,14 +163,13 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
        return  min(tp->snd_ssthresh, tp->snd_cwnd-1);
 }
-static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-                                 u32 in_flight)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct vegas *vegas = inet_csk_ca(sk);
        if (!vegas->doing_vegas_now) {
-                tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+                tcp_reno_cong_avoid(sk, ack, acked);
                return;
        }
@@ -195,7 +194,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
                        /* We don't have enough RTT samples to do the Vegas
                         * calculation, so we'll behave like Reno.
                         */
-                        tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+                        tcp_reno_cong_avoid(sk, ack, acked);
                } else {
                        u32 rtt, diff;
                        u64 target_cwnd;
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 1b8e28fcd7e1..27b9825753d1 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -114,19 +114,18 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
                tcp_veno_init(sk);
 }
-static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-                                u32 in_flight)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct veno *veno = inet_csk_ca(sk);
        if (!veno->doing_veno_now) {
-                tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+                tcp_reno_cong_avoid(sk, ack, acked);
                return;
        }
        /* limited by applications */
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        /* We do the Veno calculations only if we got enough rtt samples */
@@ -134,7 +133,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
                /* We don't have enough rtt samples to do the Veno
                 * calculation, so we'll behave like Reno.
                 */
-                tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+                tcp_reno_cong_avoid(sk, ack, acked);
        } else {
                u64 target_cwnd;
                u32 rtt;
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 5ede0e727945..599b79b8eac0 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -69,13 +69,12 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
        tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
 }
-static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked,
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-                                u32 in_flight)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct yeah *yeah = inet_csk_ca(sk);
-        if (!tcp_is_cwnd_limited(sk, in_flight))
+        if (!tcp_is_cwnd_limited(sk))
                return;
        if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4468e1adc094..185ed3e59802 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -246,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
                        do {
                                if (low <= snum && snum <= high &&
                                    !test_bit(snum >> udptable->log, bitmap) &&
-                                    !inet_is_reserved_local_port(snum))
+                                    !inet_is_local_reserved_port(net, snum))
                                        goto found;
                                snum += rand;
                        } while (snum != first);
@@ -727,13 +727,12 @@ EXPORT_SYMBOL(udp_flush_pending_frames);
 void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
 {
        struct udphdr *uh = udp_hdr(skb);
-        struct sk_buff *frags = skb_shinfo(skb)->frag_list;
        int offset = skb_transport_offset(skb);
        int len = skb->len - offset;
        int hlen = len;
        __wsum csum = 0;
-        if (!frags) {
+        if (!skb_has_frag_list(skb)) {
                /*
                 * Only one fragment on the socket.
                 */
@@ -742,15 +741,17 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
                uh->check = ~csum_tcpudp_magic(src, dst, len,
                                               IPPROTO_UDP, 0);
        } else {
+                struct sk_buff *frags;
                /*
                 * HW-checksum won't work as there are two or more
                 * fragments on the socket so that all csums of sk_buffs
                 * should be together
                 */
-                do {
+                skb_walk_frags(skb, frags) {
                        csum = csum_add(csum, frags->csum);
                        hlen -= frags->len;
-                } while ((frags = frags->next));
+                }
                csum = skb_checksum(skb, offset, hlen, csum);
                skb->ip_summed = CHECKSUM_NONE;
@@ -762,6 +763,43 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
 }
 EXPORT_SYMBOL_GPL(udp4_hwcsum);
+/* Function to set UDP checksum for an IPv4 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp_set_csum(bool nocheck, struct sk_buff *skb,
+                  __be32 saddr, __be32 daddr, int len)
+{
+        struct udphdr *uh = udp_hdr(skb);
+        if (nocheck)
+                uh->check = 0;
+        else if (skb_is_gso(skb))
+                uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+        else if (skb_dst(skb) && skb_dst(skb)->dev &&
+                 (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+                BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+                skb->ip_summed = CHECKSUM_PARTIAL;
+                skb->csum_start = skb_transport_header(skb) - skb->head;
+                skb->csum_offset = offsetof(struct udphdr, check);
+                uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+        } else {
+                __wsum csum;
+                BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+                uh->check = 0;
+                csum = skb_checksum(skb, 0, len, 0);
+                uh->check = udp_v4_check(len, saddr, daddr, csum);
+                if (uh->check == 0)
+                        uh->check = CSUM_MANGLED_0;
+                skb->ip_summed = CHECKSUM_UNNECESSARY;
+        }
+}
+EXPORT_SYMBOL(udp_set_csum);
 static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 {
        struct sock *sk = skb->sk;
@@ -785,7 +823,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
        if (is_udplite)                                  /*     UDP-Lite      */
                csum = udplite_csum(skb);
-        else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
+        else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
                skb->ip_summed = CHECKSUM_NONE;
                goto send;
@@ -1495,6 +1533,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {
                        int ret;
+                        /* Verify checksum before giving to encap */
+                        if (udp_lib_checksum_complete(skb))
+                                goto csum_error;
                        ret = encap_rcv(sk, skb);
                        if (ret <= 0) {
                                UDP_INC_STATS_BH(sock_net(sk),
@@ -1672,7 +1714,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
                                 int proto)
 {
-        const struct iphdr *iph;
        int err;
        UDP_SKB_CB(skb)->partial_cov = 0;
@@ -1684,22 +1725,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
                        return err;
        }
-        iph = ip_hdr(skb);
+        return skb_checksum_init_zero_check(skb, proto, uh->check,
-        if (uh->check == 0) {
+                                            inet_compute_pseudo);
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
-        } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
-                if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
-                                      proto, skb->csum))
-                        skb->ip_summed = CHECKSUM_UNNECESSARY;
-        }
-        if (!skb_csum_unnecessary(skb))
-                skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-                                               skb->len, proto, 0);
-        /* Probably, we should checksum udp header (it should be in cache
-         * in any case) and data in tiny packets (< rx copybreak).
-         */
-        return 0;
 }
 /*
@@ -1886,7 +1913,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
        unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
        unsigned int slot2 = hash2 & udp_table.mask;
        struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
-        INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr)
+        INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
        const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
        rcu_read_lock();
@@ -1979,7 +2006,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
                       int (*push_pending_frames)(struct sock *))
 {
        struct udp_sock *up = udp_sk(sk);
-        int val;
+        int val, valbool;
        int err = 0;
        int is_udplite = IS_UDPLITE(sk);
@@ -1989,6 +2016,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
        if (get_user(val, (int __user *)optval))
                return -EFAULT;
+        valbool = val ? 1 : 0;
        switch (optname) {
        case UDP_CORK:
                if (val != 0) {
@@ -2018,6 +2047,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
                }
                break;
+        case UDP_NO_CHECK6_TX:
+                up->no_check6_tx = valbool;
+                break;
+        case UDP_NO_CHECK6_RX:
+                up->no_check6_rx = valbool;
+                break;
        /*
         *      UDP-Lite's partial checksum coverage (RFC 3828).
         */
@@ -2100,6 +2137,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
                val = up->encap_type;
                break;
+        case UDP_NO_CHECK6_TX:
+                val = up->no_check6_tx;
+                break;
+        case UDP_NO_CHECK6_RX:
+                val = up->no_check6_rx;
+                break;
        /* The following two cannot be changed on UDP sockets, the return is
         * always 0 (which corresponds to the full checksum coverage of UDP). */
        case UDPLITE_SEND_CSCOV:
@@ -2484,7 +2529,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
        int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
        __be16 protocol = skb->protocol;
        netdev_features_t enc_features;
-        int outer_hlen;
+        int udp_offset, outer_hlen;
+        unsigned int oldlen;
+        bool need_csum;
+        oldlen = (u16)~skb->len;
        if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
                goto out;
@@ -2496,6 +2545,10 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
        skb->mac_len = skb_inner_network_offset(skb);
        skb->protocol = htons(ETH_P_TEB);
+        need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+        if (need_csum)
+                skb->encap_hdr_csum = 1;
        /* segment inner packet. */
        enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
        segs = skb_mac_gso_segment(skb, enc_features);
@@ -2506,10 +2559,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
        }
        outer_hlen = skb_tnl_header_len(skb);
+        udp_offset = outer_hlen - tnl_hlen;
        skb = segs;
        do {
                struct udphdr *uh;
-                int udp_offset = outer_hlen - tnl_hlen;
+                int len;
                skb_reset_inner_headers(skb);
                skb->encapsulation = 1;
@@ -2520,31 +2574,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
                skb_reset_mac_header(skb);
                skb_set_network_header(skb, mac_len);
                skb_set_transport_header(skb, udp_offset);
+                len = skb->len - udp_offset;
                uh = udp_hdr(skb);
-                uh->len = htons(skb->len - udp_offset);
+                uh->len = htons(len);
-                /* csum segment if tunnel sets skb with csum. */
-                if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) {
-                        struct iphdr *iph = ip_hdr(skb);
-                        uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+                if (need_csum) {
-                                                       skb->len - udp_offset,
+                        __be32 delta = htonl(oldlen + len);
-                                                       IPPROTO_UDP, 0);
-                        uh->check = csum_fold(skb_checksum(skb, udp_offset,
-                                                           skb->len - udp_offset, 0));
-                        if (uh->check == 0)
-                                uh->check = CSUM_MANGLED_0;
-                } else if (protocol == htons(ETH_P_IPV6)) {
+                        uh->check = ~csum_fold((__force __wsum)
-                        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+                                               ((__force u32)uh->check +
-                        u32 len = skb->len - udp_offset;
+                                                (__force u32)delta));
+                        uh->check = gso_make_checksum(skb, ~uh->check);
-                        uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
-                                                     len, IPPROTO_UDP, 0);
-                        uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));
                        if (uh->check == 0)
                                uh->check = CSUM_MANGLED_0;
-                        skb->ip_summed = CHECKSUM_NONE;
                }
                skb->protocol = protocol;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 88b4023ecfcf..546d2d439dda 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -56,7 +56,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
        __wsum csum;
        if (skb->encapsulation &&
-            skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) {
+            (skb_shinfo(skb)->gso_type &
+             (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
                segs = skb_udp_tunnel_segment(skb, features);
                goto out;
        }
@@ -71,8 +72,10 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
                if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
                                      SKB_GSO_UDP_TUNNEL |
+                                      SKB_GSO_UDP_TUNNEL_CSUM |
                                      SKB_GSO_IPIP |
-                                      SKB_GSO_GRE | SKB_GSO_MPLS) ||
+                                      SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
+                                      SKB_GSO_MPLS) ||
                             !(type & (SKB_GSO_UDP))))
                        goto out;
@@ -197,6 +200,7 @@ unflush:
        }
        skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
+        skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
        pp = uo_priv->offload->callbacks.gro_receive(head, skb);
 out_unlock:
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 2c46acd4cc36..3b3efbda48e1 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -70,7 +70,6 @@ static struct inet_protosw udplite4_protosw = {
        .protocol       =  IPPROTO_UDPLITE,
        .prot           =  &udplite_prot,
        .ops            =  &inet_dgram_ops,
-        .no_check       =  0,           /* must checksum (RFC 3828) */
        .flags          =  INET_PROTOSW_PERMANENT,
 };
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 05f2b484954f..91771a7c802f 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
        top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
                0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
-        ip_select_ident(skb, dst->child, NULL);
        top_iph->ttl = ip4_dst_hoplimit(dst->child);
        top_iph->saddr = x->props.saddr.a4;
        top_iph->daddr = x->id.daddr.a4;
+        ip_select_ident(skb, NULL);
        return 0;
 }
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 186a8ecf92fa..d5f6bd9a210a 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -25,7 +25,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
        if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
                goto out;
-        if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
+        if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
                goto out;
        mtu = dst_mtu(skb_dst(skb));