53 files changed, 1323 insertions, 1529 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index d183262943d9..20f1cb5c8aba 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -262,8 +262,8 @@ config ARPD
        bool "IP: ARP daemon support"
        ---help---
          The kernel maintains an internal cache which maps IP addresses to
-          hardware addresses on the local network, so that Ethernet/Token Ring/
+          hardware addresses on the local network, so that Ethernet
-          etc. frames are sent to the proper address on the physical networking
+          frames are sent to the proper address on the physical networking
          layer. Normally, kernel uses the ARP protocol to resolve these
          mappings.
@@ -312,7 +312,7 @@ config SYN_COOKIES
 config INET_AH
        tristate "IP: AH transformation"
-        select XFRM
+        select XFRM_ALGO
        select CRYPTO
        select CRYPTO_HMAC
        select CRYPTO_MD5
@@ -324,7 +324,7 @@ config INET_AH
 config INET_ESP
        tristate "IP: ESP transformation"
-        select XFRM
+        select XFRM_ALGO
        select CRYPTO
        select CRYPTO_AUTHENC
        select CRYPTO_HMAC
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 10e3751466b5..c8f7aee587d1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -350,7 +350,7 @@ lookup_protocol:
        err = 0;
        sk->sk_no_check = answer_no_check;
        if (INET_PROTOSW_REUSE & answer_flags)
-                sk->sk_reuse = 1;
+                sk->sk_reuse = SK_CAN_REUSE;
        inet = inet_sk(sk);
        inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
@@ -541,7 +541,7 @@ out:
 }
 EXPORT_SYMBOL(inet_bind);
-int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
+int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
                       int addr_len, int flags)
 {
        struct sock *sk = sock->sk;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index fd508b526014..e8f2617ecd47 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -77,7 +77,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
 static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
 {
-        unsigned char * optptr = (unsigned char*)(iph+1);
+        unsigned char *optptr = (unsigned char *)(iph+1);
        int  l = iph->ihl*4 - sizeof(struct iphdr);
        int  optlen;
@@ -406,8 +406,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
                              ah->spi, IPPROTO_AH, AF_INET);
        if (!x)
                return;
-        printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
+        pr_debug("pmtu discovery on SA AH/%08x/%08x\n",
-               ntohl(ah->spi), ntohl(iph->daddr));
+                 ntohl(ah->spi), ntohl(iph->daddr));
        xfrm_state_put(x);
 }
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 18d9b81ecb1a..cda37be02f8d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -73,6 +73,8 @@
 *              Jesper D. Brouer:       Proxy ARP PVLAN RFC 3069 support.
 */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -89,7 +91,6 @@
 #include <linux/etherdevice.h>
 #include <linux/fddidevice.h>
 #include <linux/if_arp.h>
-#include <linux/trdevice.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -193,9 +194,6 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
        case ARPHRD_IEEE802:
                ip_eth_mc_map(addr, haddr);
                return 0;
-        case ARPHRD_IEEE802_TR:
-                ip_tr_mc_map(addr, haddr);
-                return 0;
        case ARPHRD_INFINIBAND:
                ip_ib_mc_map(addr, dev->broadcast, haddr);
                return 0;
@@ -364,8 +362,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
        probes -= neigh->parms->ucast_probes;
        if (probes < 0) {
                if (!(neigh->nud_state & NUD_VALID))
-                        printk(KERN_DEBUG
+                        pr_debug("trying to ucast probe in NUD_INVALID\n");
-                               "trying to ucast probe in NUD_INVALID\n");
                dst_ha = neigh->ha;
                read_lock_bh(&neigh->lock);
        } else {
@@ -452,7 +449,7 @@ static int arp_set_predefined(int addr_hint, unsigned char *haddr,
 {
        switch (addr_hint) {
        case RTN_LOCAL:
-                printk(KERN_DEBUG "ARP: arp called for own IP address\n");
+                pr_debug("arp called for own IP address\n");
                memcpy(haddr, dev->dev_addr, dev->addr_len);
                return 1;
        case RTN_MULTICAST:
@@ -473,7 +470,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
        struct neighbour *n;
        if (!skb_dst(skb)) {
-                printk(KERN_DEBUG "arp_find is called with dst==NULL\n");
+                pr_debug("arp_find is called with dst==NULL\n");
                kfree_skb(skb);
                return 1;
        }
@@ -648,12 +645,6 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
                arp->ar_pro = htons(ETH_P_IP);
                break;
 #endif
-#if IS_ENABLED(CONFIG_TR)
-        case ARPHRD_IEEE802_TR:
-                arp->ar_hrd = htons(ARPHRD_IEEE802);
-                arp->ar_pro = htons(ETH_P_IP);
-                break;
-#endif
        }
        arp->ar_hln = dev->addr_len;
@@ -751,11 +742,10 @@ static int arp_process(struct sk_buff *skb)
                        goto out;
                break;
        case ARPHRD_ETHER:
-        case ARPHRD_IEEE802_TR:
        case ARPHRD_FDDI:
        case ARPHRD_IEEE802:
                /*
-                 * ETHERNET, Token Ring and Fibre Channel (which are IEEE 802
+                 * ETHERNET, and Fibre Channel (which are IEEE 802
                 * devices, according to RFC 2625) devices will accept ARP
                 * hardware types of either 1 (Ethernet) or 6 (IEEE 802.2).
                 * This is the case also of FDDI, where the RFC 1390 says that
@@ -1059,7 +1049,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
        neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
        err = PTR_ERR(neigh);
        if (!IS_ERR(neigh)) {
-                unsigned state = NUD_STALE;
+                unsigned int state = NUD_STALE;
                if (r->arp_flags & ATF_PERM)
                        state = NUD_PERMANENT;
                err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
@@ -1071,7 +1061,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
        return err;
 }
-static unsigned arp_state_to_flags(struct neighbour *neigh)
+static unsigned int arp_state_to_flags(struct neighbour *neigh)
 {
        if (neigh->nud_state&NUD_PERMANENT)
                return ATF_PERM | ATF_COM;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6e447ff94dfa..10e15a144e95 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -217,8 +217,7 @@ void in_dev_finish_destroy(struct in_device *idev)
        WARN_ON(idev->ifa_list);
        WARN_ON(idev->mc_list);
 #ifdef NET_REFCNT_DEBUG
-        printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
+        pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
-               idev, dev ? dev->name : "NIL");
 #endif
        dev_put(dev);
        if (!idev->dead)
@@ -1125,7 +1124,7 @@ skip:
        }
 }
-static inline bool inetdev_valid_mtu(unsigned mtu)
+static inline bool inetdev_valid_mtu(unsigned int mtu)
 {
        return mtu >= 68;
 }
@@ -1174,7 +1173,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
        switch (event) {
        case NETDEV_REGISTER:
-                printk(KERN_DEBUG "inetdev_event: bug\n");
+                pr_debug("%s: bug\n", __func__);
                RCU_INIT_POINTER(dev->ip_ptr, NULL);
                break;
        case NETDEV_UP:
@@ -1266,17 +1265,15 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
        ifm->ifa_scope = ifa->ifa_scope;
        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
-        if (ifa->ifa_address)
+        if ((ifa->ifa_address &&
-                NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
+             nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
+            (ifa->ifa_local &&
-        if (ifa->ifa_local)
+             nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
-                NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
+            (ifa->ifa_broadcast &&
+             nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
-        if (ifa->ifa_broadcast)
+            (ifa->ifa_label[0] &&
-                NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
+             nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
+                goto nla_put_failure;
-        if (ifa->ifa_label[0])
-                NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
        return nlmsg_end(skb, nlh);
@@ -1587,7 +1584,6 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
 static struct devinet_sysctl_table {
        struct ctl_table_header *sysctl_header;
        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
-        char *dev_name;
 } devinet_sysctl = {
        .devinet_vars = {
                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
@@ -1629,16 +1625,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
 {
        int i;
        struct devinet_sysctl_table *t;
+        char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
-#define DEVINET_CTL_PATH_DEV    3
-        struct ctl_path devinet_ctl_path[] = {
-                { .procname = "net",  },
-                { .procname = "ipv4", },
-                { .procname = "conf", },
-                { /* to be set */ },
-                { },
-        };
        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
        if (!t)
@@ -1650,27 +1637,15 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
                t->devinet_vars[i].extra2 = net;
        }
-        /*
+        snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
-         * Make a copy of dev_name, because '.procname' is regarded as const
-         * by sysctl and we wouldn't want anyone to change it under our feet
-         * (see SIOCSIFNAME).
-         */
-        t->dev_name = kstrdup(dev_name, GFP_KERNEL);
-        if (!t->dev_name)
-                goto free;
-        devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
-        t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
+        t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
-                        t->devinet_vars);
        if (!t->sysctl_header)
-                goto free_procname;
+                goto free;
        p->sysctl = t;
        return 0;
-free_procname:
-        kfree(t->dev_name);
 free:
        kfree(t);
 out:
@@ -1686,7 +1661,6 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
        cnf->sysctl = NULL;
        unregister_net_sysctl_table(t->sysctl_header);
-        kfree(t->dev_name);
        kfree(t);
 }
@@ -1716,12 +1690,6 @@ static struct ctl_table ctl_forward_entry[] = {
        },
        { },
 };
-static __net_initdata struct ctl_path net_ipv4_path[] = {
-        { .procname = "net", },
-        { .procname = "ipv4", },
-        { },
-};
 #endif
 static __net_init int devinet_init_net(struct net *net)
@@ -1767,7 +1735,7 @@ static __net_init int devinet_init_net(struct net *net)
                goto err_reg_dflt;
        err = -ENOMEM;
-        forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
+        forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
        if (forw_hdr == NULL)
                goto err_reg_ctl;
        net->ipv4.forw_hdr = forw_hdr;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cbe3a68507cf..3854411fa37c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -136,13 +136,13 @@ static void fib_flush(struct net *net)
 * Find address type as if only "dev" was present in the system. If
 * on_dev is NULL then all interfaces are taken into consideration.
 */
-static inline unsigned __inet_dev_addr_type(struct net *net,
+static inline unsigned int __inet_dev_addr_type(struct net *net,
-                                            const struct net_device *dev,
+                                                const struct net_device *dev,
-                                            __be32 addr)
+                                                __be32 addr)
 {
        struct flowi4           fl4 = { .daddr = addr };
        struct fib_result       res;
-        unsigned ret = RTN_BROADCAST;
+        unsigned int ret = RTN_BROADCAST;
        struct fib_table *local_table;
        if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
@@ -740,7 +740,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 #define BRD_OK          2
 #define BRD0_OK         4
 #define BRD1_OK         8
-        unsigned ok = 0;
+        unsigned int ok = 0;
        int subnet = 0;         /* Primary network */
        int gone = 1;           /* Address is missing */
        int same_prefsrc = 0;   /* Another primary with same IP */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 799fc790b3cf..2d043f71ef70 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -221,15 +221,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
        frh->src_len = rule4->src_len;
        frh->tos = rule4->tos;
-        if (rule4->dst_len)
+        if ((rule4->dst_len &&
-                NLA_PUT_BE32(skb, FRA_DST, rule4->dst);
+             nla_put_be32(skb, FRA_DST, rule4->dst)) ||
+            (rule4->src_len &&
-        if (rule4->src_len)
+             nla_put_be32(skb, FRA_SRC, rule4->src)))
-                NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
+                goto nla_put_failure;
 #ifdef CONFIG_IP_ROUTE_CLASSID
-        if (rule4->tclassid)
+        if (rule4->tclassid &&
-                NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
+            nla_put_u32(skb, FRA_FLOW, rule4->tclassid))
+                goto nla_put_failure;
 #endif
        return 0;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5063fa38ac7b..a8bdf7405433 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -931,33 +931,36 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
                rtm->rtm_table = tb_id;
        else
                rtm->rtm_table = RT_TABLE_COMPAT;
-        NLA_PUT_U32(skb, RTA_TABLE, tb_id);
+        if (nla_put_u32(skb, RTA_TABLE, tb_id))
+                goto nla_put_failure;
        rtm->rtm_type = type;
        rtm->rtm_flags = fi->fib_flags;
        rtm->rtm_scope = fi->fib_scope;
        rtm->rtm_protocol = fi->fib_protocol;
-        if (rtm->rtm_dst_len)
+        if (rtm->rtm_dst_len &&
-                NLA_PUT_BE32(skb, RTA_DST, dst);
+            nla_put_be32(skb, RTA_DST, dst))
+                goto nla_put_failure;
-        if (fi->fib_priority)
+        if (fi->fib_priority &&
-                NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
+            nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
+                goto nla_put_failure;
        if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
                goto nla_put_failure;
-        if (fi->fib_prefsrc)
+        if (fi->fib_prefsrc &&
-                NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
+            nla_put_be32(skb, RTA_PREFSRC, fi->fib_prefsrc))
+                goto nla_put_failure;
        if (fi->fib_nhs == 1) {
-                if (fi->fib_nh->nh_gw)
+                if (fi->fib_nh->nh_gw &&
-                        NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
+                    nla_put_be32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
+                        goto nla_put_failure;
-                if (fi->fib_nh->nh_oif)
+                if (fi->fib_nh->nh_oif &&
-                        NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
+                    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
+                        goto nla_put_failure;
 #ifdef CONFIG_IP_ROUTE_CLASSID
-                if (fi->fib_nh[0].nh_tclassid)
+                if (fi->fib_nh[0].nh_tclassid &&
-                        NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
+                    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
+                        goto nla_put_failure;
 #endif
        }
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -978,11 +981,13 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
                        rtnh->rtnh_hops = nh->nh_weight - 1;
                        rtnh->rtnh_ifindex = nh->nh_oif;
-                        if (nh->nh_gw)
+                        if (nh->nh_gw &&
-                                NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
+                            nla_put_be32(skb, RTA_GATEWAY, nh->nh_gw))
+                                goto nla_put_failure;
 #ifdef CONFIG_IP_ROUTE_CLASSID
-                        if (nh->nh_tclassid)
+                        if (nh->nh_tclassid &&
-                                NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
+                            nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
+                                goto nla_put_failure;
 #endif
                        /* length of rtnetlink header + attributes */
                        rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index bce36f1a37b4..30b88d7b4bd6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1370,6 +1370,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
                        if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
                                continue;
+                        if (fi->fib_dead)
+                                continue;
                        if (fa->fa_info->fib_scope < flp->flowi4_scope)
                                continue;
                        fib_alias_accessed(fa);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 2cb2bf845641..c75efbdc71cb 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -713,11 +713,10 @@ static void icmp_unreach(struct sk_buff *skb)
        if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
            inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
-                if (net_ratelimit())
+                net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
-                        pr_warn("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
+                                     &ip_hdr(skb)->saddr,
-                                &ip_hdr(skb)->saddr,
+                                     icmph->type, icmph->code,
-                                icmph->type, icmph->code,
+                                     &iph->daddr, skb->dev->name);
-                                &iph->daddr, skb->dev->name);
                goto out;
        }
@@ -906,8 +905,7 @@ out_err:
 static void icmp_address(struct sk_buff *skb)
 {
 #if 0
-        if (net_ratelimit())
+        net_dbg_ratelimited("a guy asks for address mask. Who is it?\n");
-                printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n");
 #endif
 }
@@ -943,10 +941,10 @@ static void icmp_address_reply(struct sk_buff *skb)
                            inet_ifa_match(ip_hdr(skb)->saddr, ifa))
                                break;
                }
-                if (!ifa && net_ratelimit()) {
+                if (!ifa)
-                        pr_info("Wrong address mask %pI4 from %s/%pI4\n",
+                        net_info_ratelimited("Wrong address mask %pI4 from %s/%pI4\n",
-                                mp, dev->name, &ip_hdr(skb)->saddr);
+                                             mp,
-                }
+                                             dev->name, &ip_hdr(skb)->saddr);
        }
 }
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5dfecfd7d5e9..6699f23e6f55 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -344,10 +344,10 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
        pip->protocol = IPPROTO_IGMP;
        pip->tot_len  = 0;      /* filled in later */
        ip_select_ident(pip, &rt->dst, NULL);
-        ((u8*)&pip[1])[0] = IPOPT_RA;
+        ((u8 *)&pip[1])[0] = IPOPT_RA;
-        ((u8*)&pip[1])[1] = 4;
+        ((u8 *)&pip[1])[1] = 4;
-        ((u8*)&pip[1])[2] = 0;
+        ((u8 *)&pip[1])[2] = 0;
-        ((u8*)&pip[1])[3] = 0;
+        ((u8 *)&pip[1])[3] = 0;
        skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4;
        skb_put(skb, sizeof(*pig));
@@ -688,10 +688,10 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
        iph->saddr    = fl4.saddr;
        iph->protocol = IPPROTO_IGMP;
        ip_select_ident(iph, &rt->dst, NULL);
-        ((u8*)&iph[1])[0] = IPOPT_RA;
+        ((u8 *)&iph[1])[0] = IPOPT_RA;
-        ((u8*)&iph[1])[1] = 4;
+        ((u8 *)&iph[1])[1] = 4;
-        ((u8*)&iph[1])[2] = 0;
+        ((u8 *)&iph[1])[2] = 0;
-        ((u8*)&iph[1])[3] = 0;
+        ((u8 *)&iph[1])[3] = 0;
        ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
        ih->type = type;
@@ -774,7 +774,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
                        if (psf->sf_count[MCAST_INCLUDE] ||
                            pmc->sfcount[MCAST_EXCLUDE] !=
                            psf->sf_count[MCAST_EXCLUDE])
-                                continue;
+                                break;
                        if (srcs[i] == psf->sf_inaddr) {
                                scount++;
                                break;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 19d66cefd7d3..95e61596e605 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -42,7 +42,8 @@ EXPORT_SYMBOL(sysctl_local_reserved_ports);
 void inet_get_local_port_range(int *low, int *high)
 {
-        unsigned seq;
+        unsigned int seq;
        do {
                seq = read_seqbegin(&sysctl_local_ports.lock);
@@ -53,7 +54,7 @@ void inet_get_local_port_range(int *low, int *high)
 EXPORT_SYMBOL(inet_get_local_port_range);
 int inet_csk_bind_conflict(const struct sock *sk,
-                           const struct inet_bind_bucket *tb)
+                           const struct inet_bind_bucket *tb, bool relax)
 {
        struct sock *sk2;
        struct hlist_node *node;
@@ -79,6 +80,14 @@ int inet_csk_bind_conflict(const struct sock *sk,
                                    sk2_rcv_saddr == sk_rcv_saddr(sk))
                                        break;
                        }
+                        if (!relax && reuse && sk2->sk_reuse &&
+                            sk2->sk_state != TCP_LISTEN) {
+                                const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+                                if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
+                                    sk2_rcv_saddr == sk_rcv_saddr(sk))
+                                        break;
+                        }
                }
        }
        return node != NULL;
@@ -122,12 +131,13 @@ again:
                                            (tb->num_owners < smallest_size || smallest_size == -1)) {
                                                smallest_size = tb->num_owners;
                                                smallest_rover = rover;
-                                                if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
+                                                if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
+                                                    !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
                                                        snum = smallest_rover;
                                                        goto tb_found;
                                                }
                                        }
-                                        if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+                                        if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
                                                snum = rover;
                                                goto tb_found;
                                        }
@@ -172,18 +182,22 @@ have_snum:
        goto tb_not_found;
 tb_found:
        if (!hlist_empty(&tb->owners)) {
+                if (sk->sk_reuse == SK_FORCE_REUSE)
+                        goto success;
                if (tb->fastreuse > 0 &&
                    sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
                    smallest_size == -1) {
                        goto success;
                } else {
                        ret = 1;
-                        if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+                        if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
                                if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
                                    smallest_size != -1 && --attempts >= 0) {
                                        spin_unlock(&head->lock);
                                        goto again;
                                }
                                goto fail_unlock;
                        }
                }
@@ -514,7 +528,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
        /* Normally all the openreqs are young and become mature
         * (i.e. converted to established socket) for first timeout.
-         * If synack was not acknowledged for 3 seconds, it means
+         * If synack was not acknowledged for 1 second, it means
         * one of the following things: synack was lost, ack was lost,
         * rtt is high or nobody planned to ack (i.e. synflood).
         * When server is a bit loaded, queue is populated with old
@@ -555,8 +569,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
                                syn_ack_recalc(req, thresh, max_retries,
                                               queue->rskq_defer_accept,
                                               &expire, &resend);
-                                if (req->rsk_ops->syn_ack_timeout)
+                                req->rsk_ops->syn_ack_timeout(parent, req);
-                                        req->rsk_ops->syn_ack_timeout(parent, req);
                                if (!expire &&
                                    (!resend ||
                                     !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8d25a1c557eb..46d1e7199a8c 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -141,7 +141,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
                        goto rtattr_failure;
        if (icsk == NULL) {
-                r->idiag_rqueue = r->idiag_wqueue = 0;
+                handler->idiag_get_info(sk, r, NULL);
                goto out;
        }
@@ -999,12 +999,12 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
        return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h));
 }
-static struct sock_diag_handler inet_diag_handler = {
+static const struct sock_diag_handler inet_diag_handler = {
        .family = AF_INET,
        .dump = inet_diag_handler_dump,
 };
-static struct sock_diag_handler inet6_diag_handler = {
+static const struct sock_diag_handler inet6_diag_handler = {
        .family = AF_INET6,
        .dump = inet_diag_handler_dump,
 };
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 984ec656b03b..7880af970208 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -217,7 +217,7 @@ begin:
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
-struct sock * __inet_lookup_established(struct net *net,
+struct sock *__inet_lookup_established(struct net *net,
                                  struct inet_hashinfo *hashinfo,
                                  const __be32 saddr, const __be16 sport,
                                  const __be32 daddr, const u16 hnum,
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 89168c6351ff..2784db3155fb 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -89,8 +89,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 #ifdef SOCK_REFCNT_DEBUG
        if (atomic_read(&tw->tw_refcnt) != 1) {
-                printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n",
+                pr_debug("%s timewait_sock %p refcnt=%d\n",
-                       tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
+                         tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
        }
 #endif
        while (refcnt) {
@@ -263,7 +263,7 @@ rescan:
 void inet_twdr_hangman(unsigned long data)
 {
        struct inet_timewait_death_row *twdr;
-        int unsigned need_timer;
+        unsigned int need_timer;
        twdr = (struct inet_timewait_death_row *)data;
        spin_lock(&twdr->death_lock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 29a07b6c7168..e5c44fc586ab 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -41,7 +41,7 @@
 static int ip_forward_finish(struct sk_buff *skb)
 {
-        struct ip_options * opt = &(IPCB(skb)->opt);
+        struct ip_options *opt  = &(IPCB(skb)->opt);
        IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
@@ -55,7 +55,7 @@ int ip_forward(struct sk_buff *skb)
 {
        struct iphdr *iph;      /* Our header */
        struct rtable *rt;      /* Route we use */
-        struct ip_options * opt = &(IPCB(skb)->opt);
+        struct ip_options *opt  = &(IPCB(skb)->opt);
        if (skb_warn_if_lro(skb))
                goto drop;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3727e234c884..9dbd3dd6022d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -148,17 +148,17 @@ static unsigned int ip4_hashfn(struct inet_frag_queue *q)
        return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
 }
-static int ip4_frag_match(struct inet_frag_queue *q, void *a)
+static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
 {
        struct ipq *qp;
        struct ip4_create_arg *arg = a;
        qp = container_of(q, struct ipq, q);
        return  qp->id == arg->iph->id &&
-                        qp->saddr == arg->iph->saddr &&
+                qp->saddr == arg->iph->saddr &&
-                        qp->daddr == arg->iph->daddr &&
+                qp->daddr == arg->iph->daddr &&
-                        qp->protocol == arg->iph->protocol &&
+                qp->protocol == arg->iph->protocol &&
-                        qp->user == arg->user;
+                qp->user == arg->user;
 }
 /* Memory Tracking Functions. */
@@ -545,6 +545,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
        int len;
        int ihlen;
        int err;
+        int sum_truesize;
        u8 ecn;
        ipq_kill(qp);
@@ -569,7 +570,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
                skb_morph(head, qp->q.fragments);
                head->next = qp->q.fragments->next;
-                kfree_skb(qp->q.fragments);
+                consume_skb(qp->q.fragments);
                qp->q.fragments = head;
        }
@@ -611,19 +612,32 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
                atomic_add(clone->truesize, &qp->q.net->mem);
        }
-        skb_shinfo(head)->frag_list = head->next;
        skb_push(head, head->data - skb_network_header(head));
-        for (fp=head->next; fp; fp = fp->next) {
+        sum_truesize = head->truesize;
-                head->data_len += fp->len;
+        for (fp = head->next; fp;) {
-                head->len += fp->len;
+                bool headstolen;
+                int delta;
+                struct sk_buff *next = fp->next;
+                sum_truesize += fp->truesize;
                if (head->ip_summed != fp->ip_summed)
                        head->ip_summed = CHECKSUM_NONE;
                else if (head->ip_summed == CHECKSUM_COMPLETE)
                        head->csum = csum_add(head->csum, fp->csum);
-                head->truesize += fp->truesize;
+                if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
+                        kfree_skb_partial(fp, headstolen);
+                } else {
+                        if (!skb_shinfo(head)->frag_list)
+                                skb_shinfo(head)->frag_list = fp;
+                        head->data_len += fp->len;
+                        head->len += fp->len;
+                        head->truesize += fp->truesize;
+                }
+                fp = next;
        }
-        atomic_sub(head->truesize, &qp->q.net->mem);
+        atomic_sub(sum_truesize, &qp->q.net->mem);
        head->next = NULL;
        head->dev = dev;
@@ -644,8 +658,7 @@ out_nomem:
        err = -ENOMEM;
        goto out_fail;
 out_oversize:
-        if (net_ratelimit())
+        net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
-                pr_info("Oversized IP packet from %pI4\n", &qp->saddr);
 out_fail:
        IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
        return err;
@@ -782,7 +795,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
                table[2].data = &net->ipv4.frags.timeout;
        }
-        hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
+        hdr = register_net_sysctl(net, "net/ipv4", table);
        if (hdr == NULL)
                goto err_reg;
@@ -807,7 +820,7 @@ static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
 static void ip4_frags_ctl_register(void)
 {
-        register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table);
+        register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
 }
 #else
 static inline int ip4_frags_ns_ctl_register(struct net *net)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b57532d4742c..f49047b79609 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -169,37 +169,56 @@ struct ipgre_net {
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_tstats {
-        unsigned long   rx_packets;
+        u64     rx_packets;
-        unsigned long   rx_bytes;
+        u64     rx_bytes;
-        unsigned long   tx_packets;
+        u64     tx_packets;
-        unsigned long   tx_bytes;
+        u64     tx_bytes;
-} __attribute__((aligned(4*sizeof(unsigned long))));
+        struct u64_stats_sync   syncp;
+};
-static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
+                                                   struct rtnl_link_stats64 *tot)
 {
-        struct pcpu_tstats sum = { 0 };
        int i;
        for_each_possible_cpu(i) {
                const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
-                sum.rx_packets += tstats->rx_packets;
+                unsigned int start;
-                sum.rx_bytes   += tstats->rx_bytes;
-                sum.tx_packets += tstats->tx_packets;
+                do {
-                sum.tx_bytes   += tstats->tx_bytes;
+                        start = u64_stats_fetch_begin_bh(&tstats->syncp);
+                        rx_packets = tstats->rx_packets;
+                        tx_packets = tstats->tx_packets;
+                        rx_bytes = tstats->rx_bytes;
+                        tx_bytes = tstats->tx_bytes;
+                } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+                tot->rx_packets += rx_packets;
+                tot->tx_packets += tx_packets;
+                tot->rx_bytes   += rx_bytes;
+                tot->tx_bytes   += tx_bytes;
        }
-        dev->stats.rx_packets = sum.rx_packets;
-        dev->stats.rx_bytes   = sum.rx_bytes;
+        tot->multicast = dev->stats.multicast;
-        dev->stats.tx_packets = sum.tx_packets;
+        tot->rx_crc_errors = dev->stats.rx_crc_errors;
-        dev->stats.tx_bytes   = sum.tx_bytes;
+        tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
-        return &dev->stats;
+        tot->rx_length_errors = dev->stats.rx_length_errors;
+        tot->rx_errors = dev->stats.rx_errors;
+        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+        tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+        tot->tx_dropped = dev->stats.tx_dropped;
+        tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+        tot->tx_errors = dev->stats.tx_errors;
+        return tot;
 }
 /* Given src, dst and key, find appropriate for input tunnel. */
-static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
+static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
-                                              __be32 remote, __be32 local,
+                                             __be32 remote, __be32 local,
-                                              __be32 key, __be16 gre_proto)
+                                             __be32 key, __be16 gre_proto)
 {
        struct net *net = dev_net(dev);
        int link = dev->ifindex;
@@ -464,7 +483,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 */
        const struct iphdr *iph = (const struct iphdr *)skb->data;
-        __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
+        __be16       *p = (__be16 *)(skb->data+(iph->ihl<<2));
        int grehlen = (iph->ihl<<2) + 4;
        const int type = icmp_hdr(skb)->type;
        const int code = icmp_hdr(skb)->code;
@@ -574,7 +593,7 @@ static int ipgre_rcv(struct sk_buff *skb)
        iph = ip_hdr(skb);
        h = skb->data;
-        flags = *(__be16*)h;
+        flags = *(__be16 *)h;
        if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
                /* - Version must be 0.
@@ -598,11 +617,11 @@ static int ipgre_rcv(struct sk_buff *skb)
                        offset += 4;
                }
                if (flags&GRE_KEY) {
-                        key = *(__be32*)(h + offset);
+                        key = *(__be32 *)(h + offset);
                        offset += 4;
                }
                if (flags&GRE_SEQ) {
-                        seqno = ntohl(*(__be32*)(h + offset));
+                        seqno = ntohl(*(__be32 *)(h + offset));
                        offset += 4;
                }
        }
@@ -672,8 +691,10 @@ static int ipgre_rcv(struct sk_buff *skb)
                }
                tstats = this_cpu_ptr(tunnel->dev->tstats);
+                u64_stats_update_begin(&tstats->syncp);
                tstats->rx_packets++;
                tstats->rx_bytes += skb->len;
+                u64_stats_update_end(&tstats->syncp);
                __skb_tunnel_rx(skb, tunnel->dev);
@@ -900,7 +921,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                                   htons(ETH_P_TEB) : skb->protocol;
        if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
-                __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
+                __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
                if (tunnel->parms.o_flags&GRE_SEQ) {
                        ++tunnel->o_seqno;
@@ -913,7 +934,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                }
                if (tunnel->parms.o_flags&GRE_CSUM) {
                        *ptr = 0;
-                        *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
+                        *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
                }
        }
@@ -1169,7 +1190,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 {
        struct ip_tunnel *t = netdev_priv(dev);
        struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
-        __be16 *p = (__be16*)(iph+1);
+        __be16 *p = (__be16 *)(iph+1);
        memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
        p[0]            = t->parms.o_flags;
@@ -1253,7 +1274,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
        .ndo_start_xmit         = ipgre_tunnel_xmit,
        .ndo_do_ioctl           = ipgre_tunnel_ioctl,
        .ndo_change_mtu         = ipgre_tunnel_change_mtu,
-        .ndo_get_stats          = ipgre_get_stats,
+        .ndo_get_stats64        = ipgre_get_stats64,
 };
 static void ipgre_dev_free(struct net_device *dev)
@@ -1507,7 +1528,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_change_mtu         = ipgre_tunnel_change_mtu,
-        .ndo_get_stats          = ipgre_get_stats,
+        .ndo_get_stats64        = ipgre_get_stats64,
 };
 static void ipgre_tap_setup(struct net_device *dev)
@@ -1654,17 +1675,18 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
        struct ip_tunnel *t = netdev_priv(dev);
        struct ip_tunnel_parm *p = &t->parms;
-        NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
+        if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
-        NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
+            nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
-        NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
+            nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
-        NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
+            nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
-        NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
+            nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
-        NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
+            nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
-        NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
+            nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
-        NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
+            nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
-        NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
+            nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
-        NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
+            nla_put_u8(skb, IFLA_GRE_PMTUDISC,
+                       !!(p->iph.frag_off & htons(IP_DF))))
+                goto nla_put_failure;
        return 0;
 nla_put_failure:
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 26eccc5bab1c..8590144ca330 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -210,9 +210,8 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
                        int ret;
                        if (!net_eq(net, &init_net) && !ipprot->netns_ok) {
-                                if (net_ratelimit())
+                                net_info_ratelimited("%s: proto %d isn't netns-ready\n",
-                                        printk("%s: proto %d isn't netns-ready\n",
+                                                     __func__, protocol);
-                                                __func__, protocol);
                                kfree_skb(skb);
                                goto out;
                        }
@@ -298,10 +297,10 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
                if (in_dev) {
                        if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
-                                if (IN_DEV_LOG_MARTIANS(in_dev) &&
+                                if (IN_DEV_LOG_MARTIANS(in_dev))
-                                    net_ratelimit())
+                                        net_info_ratelimited("source route option %pI4 -> %pI4\n",
-                                        pr_info("source route option %pI4 -> %pI4\n",
+                                                             &iph->saddr,
-                                                &iph->saddr, &iph->daddr);
+                                                             &iph->daddr);
                                goto drop;
                        }
                }
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index a0d0d9d9b870..708b99494e23 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -210,10 +210,10 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
 *      Simple and stupid 8), but the most efficient way.
 */
-void ip_options_fragment(struct sk_buff * skb)
+void ip_options_fragment(struct sk_buff *skb)
 {
        unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
-        struct ip_options * opt = &(IPCB(skb)->opt);
+        struct ip_options *opt = &(IPCB(skb)->opt);
        int  l = opt->optlen;
        int  optlen;
@@ -248,13 +248,13 @@ void ip_options_fragment(struct sk_buff * skb)
 */
 int ip_options_compile(struct net *net,
-                       struct ip_options * opt, struct sk_buff * skb)
+                       struct ip_options *opt, struct sk_buff *skb)
 {
        int l;
-        unsigned char * iph;
+        unsigned char *iph;
-        unsigned char * optptr;
+        unsigned char *optptr;
        int optlen;
-        unsigned char * pp_ptr = NULL;
+        unsigned char *pp_ptr = NULL;
        struct rtable *rt = NULL;
        if (skb != NULL) {
@@ -413,7 +413,7 @@ int ip_options_compile(struct net *net,
                                        opt->is_changed = 1;
                                }
                        } else {
-                                unsigned overflow = optptr[3]>>4;
+                                unsigned int overflow = optptr[3]>>4;
                                if (overflow == 15) {
                                        pp_ptr = optptr + 3;
                                        goto error;
@@ -473,20 +473,20 @@ EXPORT_SYMBOL(ip_options_compile);
 *      Undo all the changes done by ip_options_compile().
 */
-void ip_options_undo(struct ip_options * opt)
+void ip_options_undo(struct ip_options *opt)
 {
        if (opt->srr) {
-                unsigned  char * optptr = opt->__data+opt->srr-sizeof(struct  iphdr);
+                unsigned  char *optptr = opt->__data+opt->srr-sizeof(struct  iphdr);
                memmove(optptr+7, optptr+3, optptr[1]-7);
                memcpy(optptr+3, &opt->faddr, 4);
        }
        if (opt->rr_needaddr) {
-                unsigned  char * optptr = opt->__data+opt->rr-sizeof(struct  iphdr);
+                unsigned  char *optptr = opt->__data+opt->rr-sizeof(struct  iphdr);
                optptr[2] -= 4;
                memset(&optptr[optptr[2]-1], 0, 4);
        }
        if (opt->ts) {
-                unsigned  char * optptr = opt->__data+opt->ts-sizeof(struct  iphdr);
+                unsigned  char *optptr = opt->__data+opt->ts-sizeof(struct  iphdr);
                if (opt->ts_needtime) {
                        optptr[2] -= 4;
                        memset(&optptr[optptr[2]-1], 0, 4);
@@ -549,8 +549,8 @@ int ip_options_get(struct net *net, struct ip_options_rcu **optp,
 void ip_forward_options(struct sk_buff *skb)
 {
-        struct   ip_options * opt       = &(IPCB(skb)->opt);
+        struct   ip_options *opt        = &(IPCB(skb)->opt);
-        unsigned char * optptr;
+        unsigned char *optptr;
        struct rtable *rt = skb_rtable(skb);
        unsigned char *raw = skb_network_header(skb);
@@ -578,8 +578,10 @@ void ip_forward_options(struct sk_buff *skb)
                        ip_hdr(skb)->daddr = opt->nexthop;
                        ip_rt_get_source(&optptr[srrptr-1], skb, rt);
                        optptr[2] = srrptr+4;
-                } else if (net_ratelimit())
+                } else {
-                        pr_crit("%s(): Argh! Destination lost!\n", __func__);
+                        net_crit_ratelimited("%s(): Argh! Destination lost!\n",
+                                             __func__);
+                }
                if (opt->ts_needaddr) {
                        optptr = raw + opt->ts;
                        ip_rt_get_source(&optptr[optptr[2]-9], skb, rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4910176d24ed..451f97c42eb4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -214,8 +214,8 @@ static inline int ip_finish_output2(struct sk_buff *skb)
        }
        rcu_read_unlock();
-        if (net_ratelimit())
+        net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
-                printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
+                            __func__);
        kfree_skb(skb);
        return -EINVAL;
 }
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 2fd0fba77124..0d11f234d615 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -90,7 +90,7 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
 static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
 {
        unsigned char optbuf[sizeof(struct ip_options) + 40];
-        struct ip_options * opt = (struct ip_options *)optbuf;
+        struct ip_options *opt = (struct ip_options *)optbuf;
        if (IPCB(skb)->opt.optlen == 0)
                return;
@@ -147,7 +147,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 {
        struct inet_sock *inet = inet_sk(skb->sk);
-        unsigned flags = inet->cmsg_flags;
+        unsigned int flags = inet->cmsg_flags;
        /* Ordered by supposed usage frequency */
        if (flags & 1)
@@ -673,10 +673,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                                break;
                } else {
                        memset(&mreq, 0, sizeof(mreq));
-                        if (optlen >= sizeof(struct in_addr) &&
+                        if (optlen >= sizeof(struct ip_mreq)) {
-                            copy_from_user(&mreq.imr_address, optval,
+                                if (copy_from_user(&mreq, optval,
-                                           sizeof(struct in_addr)))
+                                                   sizeof(struct ip_mreq)))
-                                break;
+                                        break;
+                        } else if (optlen >= sizeof(struct in_addr)) {
+                                if (copy_from_user(&mreq.imr_address, optval,
+                                                   sizeof(struct in_addr)))
+                                        break;
+                        }
                }
                if (!mreq.imr_ifindex) {
@@ -1094,7 +1099,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
 */
 static int do_ip_getsockopt(struct sock *sk, int level, int optname,
-                            char __user *optval, int __user *optlen, unsigned flags)
+                            char __user *optval, int __user *optlen, unsigned int flags)
 {
        struct inet_sock *inet = inet_sk(sk);
        int val;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 92ac7e7363a0..67e8a6b086ea 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -808,8 +808,6 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
        b->op = BOOTP_REQUEST;
        if (dev->type < 256) /* check for false types */
                b->htype = dev->type;
-        else if (dev->type == ARPHRD_IEEE802_TR) /* fix for token ring */
-                b->htype = ARPHRD_IEEE802;
        else if (dev->type == ARPHRD_FDDI)
                b->htype = ARPHRD_ETHER;
        else {
@@ -955,8 +953,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
        /* Fragments are not supported */
        if (ip_is_fragment(h)) {
-                if (net_ratelimit())
+                net_err_ratelimited("DHCP/BOOTP: Ignoring fragmented reply\n");
-                        pr_err("DHCP/BOOTP: Ignoring fragmented reply\n");
                goto drop;
        }
@@ -1004,16 +1001,14 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
        /* Is it a reply to our BOOTP request? */
        if (b->op != BOOTP_REPLY ||
            b->xid != d->xid) {
-                if (net_ratelimit())
+                net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n",
-                        pr_err("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n",
+                                    b->op, b->xid);
-                               b->op, b->xid);
                goto drop_unlock;
        }
        /* Is it a reply for the device we are configuring? */
        if (b->xid != ic_dev_xid) {
-                if (net_ratelimit())
+                net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n");
-                        pr_err("DHCP/BOOTP: Ignoring delayed packet\n");
                goto drop_unlock;
        }
@@ -1198,7 +1193,7 @@ static int __init ic_dynamic(void)
        d = ic_first_dev;
        retries = CONF_SEND_RETRIES;
        get_random_bytes(&timeout, sizeof(timeout));
-        timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
+        timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
        for (;;) {
                /* Track the device we are configuring */
                ic_dev_xid = d->xid;
@@ -1626,11 +1621,13 @@ static int __init ip_auto_config_setup(char *addrs)
        return 1;
 }
+__setup("ip=", ip_auto_config_setup);
 static int __init nfsaddrs_config_setup(char *addrs)
 {
        return ip_auto_config_setup(addrs);
 }
+__setup("nfsaddrs=", nfsaddrs_config_setup);
 static int __init vendor_class_identifier_setup(char *addrs)
 {
@@ -1641,7 +1638,4 @@ static int __init vendor_class_identifier_setup(char *addrs)
                        vendor_class_identifier);
        return 1;
 }
-__setup("ip=", ip_auto_config_setup);
-__setup("nfsaddrs=", nfsaddrs_config_setup);
 __setup("dhcpclass=", vendor_class_identifier_setup);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ae1413e3f2f8..2d0f99bf61b3 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -144,33 +144,48 @@ static void ipip_dev_free(struct net_device *dev);
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_tstats {
-        unsigned long   rx_packets;
+        u64     rx_packets;
-        unsigned long   rx_bytes;
+        u64     rx_bytes;
-        unsigned long   tx_packets;
+        u64     tx_packets;
-        unsigned long   tx_bytes;
+        u64     tx_bytes;
-} __attribute__((aligned(4*sizeof(unsigned long))));
+        struct u64_stats_sync   syncp;
+};
-static struct net_device_stats *ipip_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
+                                                  struct rtnl_link_stats64 *tot)
 {
-        struct pcpu_tstats sum = { 0 };
        int i;
        for_each_possible_cpu(i) {
                const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
-                sum.rx_packets += tstats->rx_packets;
+                unsigned int start;
-                sum.rx_bytes   += tstats->rx_bytes;
-                sum.tx_packets += tstats->tx_packets;
+                do {
-                sum.tx_bytes   += tstats->tx_bytes;
+                        start = u64_stats_fetch_begin_bh(&tstats->syncp);
+                        rx_packets = tstats->rx_packets;
+                        tx_packets = tstats->tx_packets;
+                        rx_bytes = tstats->rx_bytes;
+                        tx_bytes = tstats->tx_bytes;
+                } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+                tot->rx_packets += rx_packets;
+                tot->tx_packets += tx_packets;
+                tot->rx_bytes   += rx_bytes;
+                tot->tx_bytes   += tx_bytes;
        }
-        dev->stats.rx_packets = sum.rx_packets;
-        dev->stats.rx_bytes   = sum.rx_bytes;
+        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
-        dev->stats.tx_packets = sum.tx_packets;
+        tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
-        dev->stats.tx_bytes   = sum.tx_bytes;
+        tot->tx_dropped = dev->stats.tx_dropped;
-        return &dev->stats;
+        tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+        tot->tx_errors = dev->stats.tx_errors;
+        tot->collisions = dev->stats.collisions;
+        return tot;
 }
-static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
+static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
                __be32 remote, __be32 local)
 {
        unsigned int h0 = HASH(remote);
@@ -245,7 +260,7 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
        rcu_assign_pointer(*tp, t);
 }
-static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
+static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
                struct ip_tunnel_parm *parms, int create)
 {
        __be32 remote = parms->iph.daddr;
@@ -404,8 +419,10 @@ static int ipip_rcv(struct sk_buff *skb)
                skb->pkt_type = PACKET_HOST;
                tstats = this_cpu_ptr(tunnel->dev->tstats);
+                u64_stats_update_begin(&tstats->syncp);
                tstats->rx_packets++;
                tstats->rx_bytes += skb->len;
+                u64_stats_update_end(&tstats->syncp);
                __skb_tunnel_rx(skb, tunnel->dev);
@@ -730,7 +747,7 @@ static const struct net_device_ops ipip_netdev_ops = {
        .ndo_start_xmit = ipip_tunnel_xmit,
        .ndo_do_ioctl   = ipip_tunnel_ioctl,
        .ndo_change_mtu = ipip_tunnel_change_mtu,
-        .ndo_get_stats  = ipip_get_stats,
+        .ndo_get_stats64 = ipip_get_stats64,
 };
 static void ipip_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 960fbfc3e976..a9e519ad6db5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -949,8 +949,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
        ret = sock_queue_rcv_skb(mroute_sk, skb);
        rcu_read_unlock();
        if (ret < 0) {
-                if (net_ratelimit())
+                net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
-                        pr_warn("mroute: pending queue full, dropping entries\n");
                kfree_skb(skb);
        }
@@ -2119,15 +2118,16 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
        rtm->rtm_src_len  = 32;
        rtm->rtm_tos      = 0;
        rtm->rtm_table    = mrt->id;
-        NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
+        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
+                goto nla_put_failure;
        rtm->rtm_type     = RTN_MULTICAST;
        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
        rtm->rtm_protocol = RTPROT_UNSPEC;
        rtm->rtm_flags    = 0;
-        NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
+        if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) ||
-        NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
+            nla_put_be32(skb, RTA_DST, c->mfc_mcastgrp))
+                goto nla_put_failure;
        if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
                goto nla_put_failure;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4f47e064e262..ed1b36783192 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -12,7 +12,7 @@
 #include <net/netfilter/nf_queue.h>
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
+int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
 {
        struct net *net = dev_net(skb_dst(skb)->dev);
        const struct iphdr *iph = ip_hdr(skb);
@@ -237,13 +237,3 @@ static void ipv4_netfilter_fini(void)
 module_init(ipv4_netfilter_init);
 module_exit(ipv4_netfilter_fini);
-#ifdef CONFIG_SYSCTL
-struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = {
-        { .procname = "net", },
-        { .procname = "ipv4", },
-        { .procname = "netfilter", },
-        { }
-};
-EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path);
-#endif /* CONFIG_SYSCTL */
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 240b68469a7a..c20674dc9452 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
 # just filtering instance of ARP tables for now
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
-obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index fd7a3f68917f..97e61eadf580 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -221,9 +221,8 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
 static unsigned int
 arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
-        if (net_ratelimit())
+        net_err_ratelimited("arp_tables: error: '%s'\n",
-                pr_err("arp_tables: error: '%s'\n",
+                            (const char *)par->targinfo);
-                       (const char *)par->targinfo);
        return NF_DROP;
 }
@@ -303,7 +302,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
                        if (v < 0) {
                                /* Pop from stack? */
                                if (v != XT_RETURN) {
-                                        verdict = (unsigned)(-v) - 1;
+                                        verdict = (unsigned int)(-v) - 1;
                                        break;
                                }
                                e = back;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
deleted file mode 100644
index 94d45e1f8882..000000000000
--- a/net/ipv4/netfilter/ip_queue.c
+++ /dev/null
@@ -1,639 +0,0 @@
-/*
- * This is a module which is used for queueing IPv4 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
- * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/notifier.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/security.h>
-#include <linux/net.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <net/netfilter/nf_queue.h>
-#include <net/ip.h>
-#define IPQ_QMAX_DEFAULT 1024
-#define IPQ_PROC_FS_NAME "ip_queue"
-#define NET_IPQ_QMAX 2088
-#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
-typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
-static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
-static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_SPINLOCK(queue_lock);
-static int peer_pid __read_mostly;
-static unsigned int copy_range __read_mostly;
-static unsigned int queue_total;
-static unsigned int queue_dropped = 0;
-static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl __read_mostly;
-static LIST_HEAD(queue_list);
-static DEFINE_MUTEX(ipqnl_mutex);
-static inline void
-__ipq_enqueue_entry(struct nf_queue_entry *entry)
-{
-       list_add_tail(&entry->list, &queue_list);
-       queue_total++;
-}
-static inline int
-__ipq_set_mode(unsigned char mode, unsigned int range)
-{
-        int status = 0;
-        switch(mode) {
-        case IPQ_COPY_NONE:
-        case IPQ_COPY_META:
-                copy_mode = mode;
-                copy_range = 0;
-                break;
-        case IPQ_COPY_PACKET:
-                if (range > 0xFFFF)
-                        range = 0xFFFF;
-                copy_range = range;
-                copy_mode = mode;
-                break;
-        default:
-                status = -EINVAL;
-        }
-        return status;
-}
-static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
-static inline void
-__ipq_reset(void)
-{
-        peer_pid = 0;
-        net_disable_timestamp();
-        __ipq_set_mode(IPQ_COPY_NONE, 0);
-        __ipq_flush(NULL, 0);
-}
-static struct nf_queue_entry *
-ipq_find_dequeue_entry(unsigned long id)
-{
-        struct nf_queue_entry *entry = NULL, *i;
-        spin_lock_bh(&queue_lock);
-        list_for_each_entry(i, &queue_list, list) {
-                if ((unsigned long)i == id) {
-                        entry = i;
-                        break;
-                }
-        }
-        if (entry) {
-                list_del(&entry->list);
-                queue_total--;
-        }
-        spin_unlock_bh(&queue_lock);
-        return entry;
-}
-static void
-__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-        struct nf_queue_entry *entry, *next;
-        list_for_each_entry_safe(entry, next, &queue_list, list) {
-                if (!cmpfn || cmpfn(entry, data)) {
-                        list_del(&entry->list);
-                        queue_total--;
-                        nf_reinject(entry, NF_DROP);
-                }
-        }
-}
-static void
-ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-        spin_lock_bh(&queue_lock);
-        __ipq_flush(cmpfn, data);
-        spin_unlock_bh(&queue_lock);
-}
-static struct sk_buff *
-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
-{
-        sk_buff_data_t old_tail;
-        size_t size = 0;
-        size_t data_len = 0;
-        struct sk_buff *skb;
-        struct ipq_packet_msg *pmsg;
-        struct nlmsghdr *nlh;
-        struct timeval tv;
-        switch (ACCESS_ONCE(copy_mode)) {
-        case IPQ_COPY_META:
-        case IPQ_COPY_NONE:
-                size = NLMSG_SPACE(sizeof(*pmsg));
-                break;
-        case IPQ_COPY_PACKET:
-                if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-                    (*errp = skb_checksum_help(entry->skb)))
-                        return NULL;
-                data_len = ACCESS_ONCE(copy_range);
-                if (data_len == 0 || data_len > entry->skb->len)
-                        data_len = entry->skb->len;
-                size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
-                break;
-        default:
-                *errp = -EINVAL;
-                return NULL;
-        }
-        skb = alloc_skb(size, GFP_ATOMIC);
-        if (!skb)
-                goto nlmsg_failure;
-        old_tail = skb->tail;
-        nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
-        pmsg = NLMSG_DATA(nlh);
-        memset(pmsg, 0, sizeof(*pmsg));
-        pmsg->packet_id       = (unsigned long )entry;
-        pmsg->data_len        = data_len;
-        tv = ktime_to_timeval(entry->skb->tstamp);
-        pmsg->timestamp_sec   = tv.tv_sec;
-        pmsg->timestamp_usec  = tv.tv_usec;
-        pmsg->mark            = entry->skb->mark;
-        pmsg->hook            = entry->hook;
-        pmsg->hw_protocol     = entry->skb->protocol;
-        if (entry->indev)
-                strcpy(pmsg->indev_name, entry->indev->name);
-        else
-                pmsg->indev_name[0] = '\0';
-        if (entry->outdev)
-                strcpy(pmsg->outdev_name, entry->outdev->name);
-        else
-                pmsg->outdev_name[0] = '\0';
-        if (entry->indev && entry->skb->dev &&
-            entry->skb->mac_header != entry->skb->network_header) {
-                pmsg->hw_type = entry->skb->dev->type;
-                pmsg->hw_addrlen = dev_parse_header(entry->skb,
-                                                    pmsg->hw_addr);
-        }
-        if (data_len)
-                if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
-                        BUG();
-        nlh->nlmsg_len = skb->tail - old_tail;
-        return skb;
-nlmsg_failure:
-        kfree_skb(skb);
-        *errp = -EINVAL;
-        printk(KERN_ERR "ip_queue: error creating packet message\n");
-        return NULL;
-}
-static int
-ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
-{
-        int status = -EINVAL;
-        struct sk_buff *nskb;
-        if (copy_mode == IPQ_COPY_NONE)
-                return -EAGAIN;
-        nskb = ipq_build_packet_message(entry, &status);
-        if (nskb == NULL)
-                return status;
-        spin_lock_bh(&queue_lock);
-        if (!peer_pid)
-                goto err_out_free_nskb;
-        if (queue_total >= queue_maxlen) {
-                queue_dropped++;
-                status = -ENOSPC;
-                if (net_ratelimit())
-                          printk (KERN_WARNING "ip_queue: full at %d entries, "
-                                  "dropping packets(s). Dropped: %d\n", queue_total,
-                                  queue_dropped);
-                goto err_out_free_nskb;
-        }
-        /* netlink_unicast will either free the nskb or attach it to a socket */
-        status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
-        if (status < 0) {
-                queue_user_dropped++;
-                goto err_out_unlock;
-        }
-        __ipq_enqueue_entry(entry);
-        spin_unlock_bh(&queue_lock);
-        return status;
-err_out_free_nskb:
-        kfree_skb(nskb);
-err_out_unlock:
-        spin_unlock_bh(&queue_lock);
-        return status;
-}
-static int
-ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
-{
-        int diff;
-        struct iphdr *user_iph = (struct iphdr *)v->payload;
-        struct sk_buff *nskb;
-        if (v->data_len < sizeof(*user_iph))
-                return 0;
-        diff = v->data_len - e->skb->len;
-        if (diff < 0) {
-                if (pskb_trim(e->skb, v->data_len))
-                        return -ENOMEM;
-        } else if (diff > 0) {
-                if (v->data_len > 0xFFFF)
-                        return -EINVAL;
-                if (diff > skb_tailroom(e->skb)) {
-                        nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
-                                               diff, GFP_ATOMIC);
-                        if (!nskb) {
-                                printk(KERN_WARNING "ip_queue: error "
-                                      "in mangle, dropping packet\n");
-                                return -ENOMEM;
-                        }
-                        kfree_skb(e->skb);
-                        e->skb = nskb;
-                }
-                skb_put(e->skb, diff);
-        }
-        if (!skb_make_writable(e->skb, v->data_len))
-                return -ENOMEM;
-        skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
-        e->skb->ip_summed = CHECKSUM_NONE;
-        return 0;
-}
-static int
-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
-{
-        struct nf_queue_entry *entry;
-        if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
-                return -EINVAL;
-        entry = ipq_find_dequeue_entry(vmsg->id);
-        if (entry == NULL)
-                return -ENOENT;
-        else {
-                int verdict = vmsg->value;
-                if (vmsg->data_len && vmsg->data_len == len)
-                        if (ipq_mangle_ipv4(vmsg, entry) < 0)
-                                verdict = NF_DROP;
-                nf_reinject(entry, verdict);
-                return 0;
-        }
-}
-static int
-ipq_set_mode(unsigned char mode, unsigned int range)
-{
-        int status;
-        spin_lock_bh(&queue_lock);
-        status = __ipq_set_mode(mode, range);
-        spin_unlock_bh(&queue_lock);
-        return status;
-}
-static int
-ipq_receive_peer(struct ipq_peer_msg *pmsg,
-                 unsigned char type, unsigned int len)
-{
-        int status = 0;
-        if (len < sizeof(*pmsg))
-                return -EINVAL;
-        switch (type) {
-        case IPQM_MODE:
-                status = ipq_set_mode(pmsg->msg.mode.value,
-                                      pmsg->msg.mode.range);
-                break;
-        case IPQM_VERDICT:
-                status = ipq_set_verdict(&pmsg->msg.verdict,
-                                         len - sizeof(*pmsg));
-                break;
-        default:
-                status = -EINVAL;
-        }
-        return status;
-}
-static int
-dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
-{
-        if (entry->indev)
-                if (entry->indev->ifindex == ifindex)
-                        return 1;
-        if (entry->outdev)
-                if (entry->outdev->ifindex == ifindex)
-                        return 1;
-#ifdef CONFIG_BRIDGE_NETFILTER
-        if (entry->skb->nf_bridge) {
-                if (entry->skb->nf_bridge->physindev &&
-                    entry->skb->nf_bridge->physindev->ifindex == ifindex)
-                        return 1;
-                if (entry->skb->nf_bridge->physoutdev &&
-                    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
-                        return 1;
-        }
-#endif
-        return 0;
-}
-static void
-ipq_dev_drop(int ifindex)
-{
-        ipq_flush(dev_cmp, ifindex);
-}
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-static inline void
-__ipq_rcv_skb(struct sk_buff *skb)
-{
-        int status, type, pid, flags;
-        unsigned int nlmsglen, skblen;
-        struct nlmsghdr *nlh;
-        bool enable_timestamp = false;
-        skblen = skb->len;
-        if (skblen < sizeof(*nlh))
-                return;
-        nlh = nlmsg_hdr(skb);
-        nlmsglen = nlh->nlmsg_len;
-        if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
-                return;
-        pid = nlh->nlmsg_pid;
-        flags = nlh->nlmsg_flags;
-        if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
-                RCV_SKB_FAIL(-EINVAL);
-        if (flags & MSG_TRUNC)
-                RCV_SKB_FAIL(-ECOMM);
-        type = nlh->nlmsg_type;
-        if (type < NLMSG_NOOP || type >= IPQM_MAX)
-                RCV_SKB_FAIL(-EINVAL);
-        if (type <= IPQM_BASE)
-                return;
-        if (!capable(CAP_NET_ADMIN))
-                RCV_SKB_FAIL(-EPERM);
-        spin_lock_bh(&queue_lock);
-        if (peer_pid) {
-                if (peer_pid != pid) {
-                        spin_unlock_bh(&queue_lock);
-                        RCV_SKB_FAIL(-EBUSY);
-                }
-        } else {
-                enable_timestamp = true;
-                peer_pid = pid;
-        }
-        spin_unlock_bh(&queue_lock);
-        if (enable_timestamp)
-                net_enable_timestamp();
-        status = ipq_receive_peer(NLMSG_DATA(nlh), type,
-                                  nlmsglen - NLMSG_LENGTH(0));
-        if (status < 0)
-                RCV_SKB_FAIL(status);
-        if (flags & NLM_F_ACK)
-                netlink_ack(skb, nlh, 0);
-}
-static void
-ipq_rcv_skb(struct sk_buff *skb)
-{
-        mutex_lock(&ipqnl_mutex);
-        __ipq_rcv_skb(skb);
-        mutex_unlock(&ipqnl_mutex);
-}
-static int
-ipq_rcv_dev_event(struct notifier_block *this,
-                  unsigned long event, void *ptr)
-{
-        struct net_device *dev = ptr;
-        if (!net_eq(dev_net(dev), &init_net))
-                return NOTIFY_DONE;
-        /* Drop any packets associated with the downed device */
-        if (event == NETDEV_DOWN)
-                ipq_dev_drop(dev->ifindex);
-        return NOTIFY_DONE;
-}
-static struct notifier_block ipq_dev_notifier = {
-        .notifier_call  = ipq_rcv_dev_event,
-};
-static int
-ipq_rcv_nl_event(struct notifier_block *this,
-                 unsigned long event, void *ptr)
-{
-        struct netlink_notify *n = ptr;
-        if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
-                spin_lock_bh(&queue_lock);
-                if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
-                        __ipq_reset();
-                spin_unlock_bh(&queue_lock);
-        }
-        return NOTIFY_DONE;
-}
-static struct notifier_block ipq_nl_notifier = {
-        .notifier_call  = ipq_rcv_nl_event,
-};
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *ipq_sysctl_header;
-static ctl_table ipq_table[] = {
-        {
-                .procname       = NET_IPQ_QMAX_NAME,
-                .data           = &queue_maxlen,
-                .maxlen         = sizeof(queue_maxlen),
-                .mode           = 0644,
-                .proc_handler   = proc_dointvec
-        },
-        { }
-};
-#endif
-#ifdef CONFIG_PROC_FS
-static int ip_queue_show(struct seq_file *m, void *v)
-{
-        spin_lock_bh(&queue_lock);
-        seq_printf(m,
-                      "Peer PID          : %d\n"
-                      "Copy mode         : %hu\n"
-                      "Copy range        : %u\n"
-                      "Queue length      : %u\n"
-                      "Queue max. length : %u\n"
-                      "Queue dropped     : %u\n"
-                      "Netlink dropped   : %u\n",
-                      peer_pid,
-                      copy_mode,
-                      copy_range,
-                      queue_total,
-                      queue_maxlen,
-                      queue_dropped,
-                      queue_user_dropped);
-        spin_unlock_bh(&queue_lock);
-        return 0;
-}
-static int ip_queue_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, ip_queue_show, NULL);
-}
-static const struct file_operations ip_queue_proc_fops = {
-        .open           = ip_queue_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = single_release,
-        .owner          = THIS_MODULE,
-};
-#endif
-static const struct nf_queue_handler nfqh = {
-        .name   = "ip_queue",
-        .outfn  = &ipq_enqueue_packet,
-};
-static int __init ip_queue_init(void)
-{
-        int status = -ENOMEM;
-        struct proc_dir_entry *proc __maybe_unused;
-        netlink_register_notifier(&ipq_nl_notifier);
-        ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
-                                      ipq_rcv_skb, NULL, THIS_MODULE);
-        if (ipqnl == NULL) {
-                printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
-                goto cleanup_netlink_notifier;
-        }
-#ifdef CONFIG_PROC_FS
-        proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
-                           &ip_queue_proc_fops);
-        if (!proc) {
-                printk(KERN_ERR "ip_queue: failed to create proc entry\n");
-                goto cleanup_ipqnl;
-        }
-#endif
-        register_netdevice_notifier(&ipq_dev_notifier);
-#ifdef CONFIG_SYSCTL
-        ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
-#endif
-        status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
-        if (status < 0) {
-                printk(KERN_ERR "ip_queue: failed to register queue handler\n");
-                goto cleanup_sysctl;
-        }
-        return status;
-cleanup_sysctl:
-#ifdef CONFIG_SYSCTL
-        unregister_sysctl_table(ipq_sysctl_header);
-#endif
-        unregister_netdevice_notifier(&ipq_dev_notifier);
-        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-cleanup_ipqnl: __maybe_unused
-        netlink_kernel_release(ipqnl);
-        mutex_lock(&ipqnl_mutex);
-        mutex_unlock(&ipqnl_mutex);
-cleanup_netlink_notifier:
-        netlink_unregister_notifier(&ipq_nl_notifier);
-        return status;
-}
-static void __exit ip_queue_fini(void)
-{
-        nf_unregister_queue_handlers(&nfqh);
-        ipq_flush(NULL, 0);
-#ifdef CONFIG_SYSCTL
-        unregister_sysctl_table(ipq_sysctl_header);
-#endif
-        unregister_netdevice_notifier(&ipq_dev_notifier);
-        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-        netlink_kernel_release(ipqnl);
-        mutex_lock(&ipqnl_mutex);
-        mutex_unlock(&ipqnl_mutex);
-        netlink_unregister_notifier(&ipq_nl_notifier);
-}
-MODULE_DESCRIPTION("IPv4 packet queue handler");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
-module_init(ip_queue_init);
-module_exit(ip_queue_fini);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 24e556e83a3b..170b1fdd6b72 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -153,8 +153,7 @@ ip_checkentry(const struct ipt_ip *ip)
 static unsigned int
 ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
-        if (net_ratelimit())
+        net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
-                pr_info("error: `%s'\n", (const char *)par->targinfo);
        return NF_DROP;
 }
@@ -377,7 +376,7 @@ ipt_do_table(struct sk_buff *skb,
                        if (v < 0) {
                                /* Pop from stack? */
                                if (v != XT_RETURN) {
-                                        verdict = (unsigned)(-v) - 1;
+                                        verdict = (unsigned int)(-v) - 1;
                                        break;
                                }
                                if (*stackptr <= origptr) {
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a639967eb727..fe5daea5214d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -246,8 +246,7 @@ clusterip_hashfn(const struct sk_buff *skb,
                        dport = ports[1];
                }
        } else {
-                if (net_ratelimit())
+                net_info_ratelimited("unknown protocol %u\n", iph->protocol);
-                        pr_info("unknown protocol %u\n", iph->protocol);
        }
        switch (config->hash_mode) {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index de9da21113a1..91747d4ebc26 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -74,16 +74,24 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
        iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
        if (iph == NULL)
-                return -NF_DROP;
+                return -NF_ACCEPT;
        /* Conntrack defragments packets, we might still see fragments
         * inside ICMP packets though. */
        if (iph->frag_off & htons(IP_OFFSET))
-                return -NF_DROP;
+                return -NF_ACCEPT;
        *dataoff = nhoff + (iph->ihl << 2);
        *protonum = iph->protocol;
+        /* Check bogus IP headers */
+        if (*dataoff > skb->len) {
+                pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
+                         "nhoff %u, ihl %u, skblen %u\n",
+                         nhoff, iph->ihl << 2, skb->len);
+                return -NF_ACCEPT;
+        }
        return NF_ACCEPT;
 }
@@ -303,8 +311,9 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
                                const struct nf_conntrack_tuple *tuple)
 {
-        NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
+        if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
-        NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
+            nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+                goto nla_put_failure;
        return 0;
 nla_put_failure:
@@ -356,7 +365,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
        .nla_policy      = ipv4_nla_policy,
 #endif
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-        .ctl_table_path  = nf_net_ipv4_netfilter_sysctl_path,
+        .ctl_table_path  = "net/ipv4/netfilter",
        .ctl_table       = ip_ct_sysctl_table,
 #endif
        .me              = THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7cbe9cb261c2..0847e373d33c 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -228,10 +228,10 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
 static int icmp_tuple_to_nlattr(struct sk_buff *skb,
                                const struct nf_conntrack_tuple *t)
 {
-        NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id);
+        if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
-        NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type);
+            nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
-        NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code);
+            nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+                goto nla_put_failure;
        return 0;
 nla_put_failure:
@@ -293,8 +293,8 @@ icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
 {
        const unsigned int *timeout = data;
-        NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ));
+        if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+                goto nla_put_failure;
        return 0;
 nla_put_failure:
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 82536701e3a3..cad29c121318 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -42,9 +42,7 @@ static int set_addr(struct sk_buff *skb,
                if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
                                              addroff, sizeof(buf),
                                              (char *) &buf, sizeof(buf))) {
-                        if (net_ratelimit())
+                        net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n");
-                                pr_notice("nf_nat_h323: nf_nat_mangle_tcp_packet"
-                                       " error\n");
                        return -1;
                }
@@ -58,9 +56,7 @@ static int set_addr(struct sk_buff *skb,
                if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
                                              addroff, sizeof(buf),
                                              (char *) &buf, sizeof(buf))) {
-                        if (net_ratelimit())
+                        net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
-                                pr_notice("nf_nat_h323: nf_nat_mangle_udp_packet"
-                                       " error\n");
                        return -1;
                }
                /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
@@ -214,8 +210,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
        /* Run out of expectations */
        if (i >= H323_RTP_CHANNEL_MAX) {
-                if (net_ratelimit())
+                net_notice_ratelimited("nf_nat_h323: out of expectations\n");
-                        pr_notice("nf_nat_h323: out of expectations\n");
                return 0;
        }
@@ -244,8 +239,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
        }
        if (nated_port == 0) {  /* No port available */
-                if (net_ratelimit())
+                net_notice_ratelimited("nf_nat_h323: out of RTP ports\n");
-                        pr_notice("nf_nat_h323: out of RTP ports\n");
                return 0;
        }
@@ -308,8 +302,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
        }
        if (nated_port == 0) {  /* No port available */
-                if (net_ratelimit())
+                net_notice_ratelimited("nf_nat_h323: out of TCP ports\n");
-                        pr_notice("nf_nat_h323: out of TCP ports\n");
                return 0;
        }
@@ -365,8 +358,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
        }
        if (nated_port == 0) {  /* No port available */
-                if (net_ratelimit())
+                net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
-                        pr_notice("nf_nat_q931: out of TCP ports\n");
                return 0;
        }
@@ -456,8 +448,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
        }
        if (nated_port == 0) {  /* No port available */
-                if (net_ratelimit())
+                net_notice_ratelimited("nf_nat_ras: out of TCP ports\n");
-                        pr_notice("nf_nat_ras: out of TCP ports\n");
                return 0;
        }
@@ -545,8 +536,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
        }
        if (nated_port == 0) {  /* No port available */
-                if (net_ratelimit())
+                net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
-                        pr_notice("nf_nat_q931: out of TCP ports\n");
                return 0;
        }
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 57932c43960e..ea4a23813d26 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -283,7 +283,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
        __be32 newip;
        u_int16_t port;
        char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
-        unsigned buflen;
+        unsigned int buflen;
        /* Connection will come from reply */
        if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 2133c30a4a5f..746edec8b86e 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1206,8 +1206,7 @@ static int snmp_translate(struct nf_conn *ct,
        if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
                               paylen, &map, &udph->check)) {
-                if (net_ratelimit())
+                net_warn_ratelimited("bsalg: parser failed\n");
-                        printk(KERN_WARNING "bsalg: parser failed\n");
                return NF_DROP;
        }
        return NF_ACCEPT;
@@ -1241,9 +1240,8 @@ static int help(struct sk_buff *skb, unsigned int protoff,
         * can mess around with the payload.
         */
        if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
-                 if (net_ratelimit())
+                net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
-                         printk(KERN_WARNING "SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
+                                     &iph->saddr, &iph->daddr);
-                                &iph->saddr, &iph->daddr);
                 return NF_DROP;
        }
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 50009c787bcd..6e930c7174dd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -51,15 +51,16 @@ static struct ping_table ping_table;
 static u16 ping_port_rover;
-static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask)
+static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask)
 {
        int res = (num + net_hash_mix(net)) & mask;
        pr_debug("hash(%d) = %d\n", num, res);
        return res;
 }
 static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
-                                             struct net *net, unsigned num)
+                                             struct net *net, unsigned int num)
 {
        return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
 }
@@ -188,7 +189,8 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
                                          gid_t *high)
 {
        gid_t *data = net->ipv4.sysctl_ping_group_range;
-        unsigned seq;
+        unsigned int seq;
        do {
                seq = read_seqbegin(&sysctl_local_ports.lock);
@@ -410,7 +412,7 @@ struct pingfakehdr {
        __wsum wcheck;
 };
-static int ping_getfrag(void *from, char * to,
+static int ping_getfrag(void *from, char *to,
                        int offset, int fraglen, int odd, struct sk_buff *skb)
 {
        struct pingfakehdr *pfh = (struct pingfakehdr *)from;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bbd604c68e68..4032b818f3e4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -288,7 +288,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
        read_unlock(&raw_v4_hashinfo.lock);
 }
-static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
+static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
        /* Charge it to the socket. */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4dc1c104c942..ffcb3b016843 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,6 +109,7 @@
 #include <net/rtnetlink.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
+#include <linux/kmemleak.h>
 #endif
 #include <net/secure_seq.h>
@@ -229,7 +230,7 @@ const __u8 ip_tos2prio[16] = {
        TC_PRIO_INTERACTIVE_BULK,
        ECN_OR_COST(INTERACTIVE_BULK)
 };
+EXPORT_SYMBOL(ip_tos2prio);
 /*
 * Route cache.
@@ -296,7 +297,7 @@ static inline void rt_hash_lock_init(void)
 #endif
 static struct rt_hash_bucket    *rt_hash_table __read_mostly;
-static unsigned                 rt_hash_mask __read_mostly;
+static unsigned int             rt_hash_mask __read_mostly;
 static unsigned int             rt_hash_log  __read_mostly;
 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
@@ -959,8 +960,7 @@ void rt_cache_flush_batch(struct net *net)
 static void rt_emergency_hash_rebuild(struct net *net)
 {
-        if (net_ratelimit())
+        net_warn_ratelimited("Route hash chain too long!\n");
-                pr_warn("Route hash chain too long!\n");
        rt_cache_invalidate(net);
 }
@@ -1083,8 +1083,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
                goto out;
        if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
                goto out;
-        if (net_ratelimit())
+        net_warn_ratelimited("dst cache overflow\n");
-                pr_warn("dst cache overflow\n");
        RT_CACHE_STAT_INC(gc_dst_overflow);
        return 1;
@@ -1143,7 +1142,7 @@ static int rt_bind_neighbour(struct rtable *rt)
        return 0;
 }
-static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
+static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt,
                                     struct sk_buff *skb, int ifindex)
 {
        struct rtable   *rth, *cand;
@@ -1181,8 +1180,7 @@ restart:
                if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
                        int err = rt_bind_neighbour(rt);
                        if (err) {
-                                if (net_ratelimit())
+                                net_warn_ratelimited("Neighbour table failure & not caching routes\n");
-                                        pr_warn("Neighbour table failure & not caching routes\n");
                                ip_rt_put(rt);
                                return ERR_PTR(err);
                        }
@@ -1298,8 +1296,7 @@ restart:
                                goto restart;
                        }
-                        if (net_ratelimit())
+                        net_warn_ratelimited("Neighbour table overflow\n");
-                                pr_warn("Neighbour table overflow\n");
                        rt_drop(rt);
                        return ERR_PTR(-ENOBUFS);
                }
@@ -1377,14 +1374,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
                        return;
                }
        } else if (!rt)
-                printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
+                pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0));
-                       __builtin_return_address(0));
        ip_select_fb_ident(iph);
 }
 EXPORT_SYMBOL(__ip_select_ident);
-static void rt_del(unsigned hash, struct rtable *rt)
+static void rt_del(unsigned int hash, struct rtable *rt)
 {
        struct rtable __rcu **rthp;
        struct rtable *aux;
@@ -1502,11 +1498,11 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 reject_redirect:
 #ifdef CONFIG_IP_ROUTE_VERBOSE
-        if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
+        if (IN_DEV_LOG_MARTIANS(in_dev))
-                pr_info("Redirect from %pI4 on %s about %pI4 ignored\n"
+                net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
-                        "  Advised path = %pI4 -> %pI4\n",
+                                     "  Advised path = %pI4 -> %pI4\n",
-                        &old_gw, dev->name, &new_gw,
+                                     &old_gw, dev->name, &new_gw,
-                        &saddr, &daddr);
+                                     &saddr, &daddr);
 #endif
        ;
 }
@@ -1538,7 +1534,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
                        ip_rt_put(rt);
                        ret = NULL;
                } else if (rt->rt_flags & RTCF_REDIRECTED) {
-                        unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
+                        unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src,
                                                rt->rt_oif,
                                                rt_genid(dev_net(dst->dev)));
                        rt_del(hash, rt);
@@ -1616,11 +1612,10 @@ void ip_rt_send_redirect(struct sk_buff *skb)
                ++peer->rate_tokens;
 #ifdef CONFIG_IP_ROUTE_VERBOSE
                if (log_martians &&
-                    peer->rate_tokens == ip_rt_redirect_number &&
+                    peer->rate_tokens == ip_rt_redirect_number)
-                    net_ratelimit())
+                        net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
-                        pr_warn("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
+                                             &ip_hdr(skb)->saddr, rt->rt_iif,
-                                &ip_hdr(skb)->saddr, rt->rt_iif,
+                                             &rt->rt_dst, &rt->rt_gateway);
-                                &rt->rt_dst, &rt->rt_gateway);
 #endif
        }
 }
@@ -1843,9 +1838,9 @@ static void ipv4_link_failure(struct sk_buff *skb)
 static int ip_rt_bug(struct sk_buff *skb)
 {
-        printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n",
+        pr_debug("%s: %pI4 -> %pI4, %s\n",
-                &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
+                 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
-                skb->dev ? skb->dev->name : "?");
+                 skb->dev ? skb->dev->name : "?");
        kfree_skb(skb);
        WARN_ON(1);
        return 0;
@@ -2041,7 +2036,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                if (err < 0)
                        goto e_err;
        }
-        rth = rt_dst_alloc(init_net.loopback_dev,
+        rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
                           IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
        if (!rth)
                goto e_nobufs;
@@ -2134,8 +2129,7 @@ static int __mkroute_input(struct sk_buff *skb,
        /* get a working reference to the output device */
        out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
        if (out_dev == NULL) {
-                if (net_ratelimit())
+                net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
-                        pr_crit("Bug in ip_route_input_slow(). Please report.\n");
                return -EINVAL;
        }
@@ -2215,9 +2209,9 @@ static int ip_mkroute_input(struct sk_buff *skb,
                            struct in_device *in_dev,
                            __be32 daddr, __be32 saddr, u32 tos)
 {
-        struct rtable* rth = NULL;
+        struct rtable *rth = NULL;
        int err;
-        unsigned hash;
+        unsigned int hash;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        if (res->fi && res->fi->fib_nhs > 1)
@@ -2255,13 +2249,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        struct fib_result res;
        struct in_device *in_dev = __in_dev_get_rcu(dev);
        struct flowi4   fl4;
-        unsigned        flags = 0;
+        unsigned int    flags = 0;
        u32             itag = 0;
-        struct rtable * rth;
+        struct rtable   *rth;
-        unsigned        hash;
+        unsigned int    hash;
        __be32          spec_dst;
        int             err = -EINVAL;
-        struct net    * net = dev_net(dev);
+        struct net    *net = dev_net(dev);
        /* IP on this device is disabled. */
@@ -2406,9 +2400,9 @@ no_route:
 martian_destination:
        RT_CACHE_STAT_INC(in_martian_dst);
 #ifdef CONFIG_IP_ROUTE_VERBOSE
-        if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
+        if (IN_DEV_LOG_MARTIANS(in_dev))
-                pr_warn("martian destination %pI4 from %pI4, dev %s\n",
+                net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
-                        &daddr, &saddr, dev->name);
+                                     &daddr, &saddr, dev->name);
 #endif
 e_hostunreach:
@@ -2433,8 +2427,8 @@ martian_source_keep_err:
 int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                           u8 tos, struct net_device *dev, bool noref)
 {
-        struct rtable * rth;
+        struct rtable   *rth;
-        unsigned        hash;
+        unsigned int    hash;
        int iif = dev->ifindex;
        struct net *net;
        int res;
@@ -2972,7 +2966,8 @@ static int rt_fill_info(struct net *net,
        r->rtm_src_len  = 0;
        r->rtm_tos      = rt->rt_key_tos;
        r->rtm_table    = RT_TABLE_MAIN;
-        NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
+        if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
+                goto nla_put_failure;
        r->rtm_type     = rt->rt_type;
        r->rtm_scope    = RT_SCOPE_UNIVERSE;
        r->rtm_protocol = RTPROT_UNSPEC;
@@ -2980,31 +2975,38 @@ static int rt_fill_info(struct net *net,
        if (rt->rt_flags & RTCF_NOTIFY)
                r->rtm_flags |= RTM_F_NOTIFY;
-        NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
+        if (nla_put_be32(skb, RTA_DST, rt->rt_dst))
+                goto nla_put_failure;
        if (rt->rt_key_src) {
                r->rtm_src_len = 32;
-                NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src);
+                if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src))
+                        goto nla_put_failure;
        }
-        if (rt->dst.dev)
+        if (rt->dst.dev &&
-                NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
+            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
+                goto nla_put_failure;
 #ifdef CONFIG_IP_ROUTE_CLASSID
-        if (rt->dst.tclassid)
+        if (rt->dst.tclassid &&
-                NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
+            nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
+                goto nla_put_failure;
 #endif
-        if (rt_is_input_route(rt))
+        if (rt_is_input_route(rt)) {
-                NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
+                if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst))
-        else if (rt->rt_src != rt->rt_key_src)
+                        goto nla_put_failure;
-                NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
+        } else if (rt->rt_src != rt->rt_key_src) {
+                if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src))
-        if (rt->rt_dst != rt->rt_gateway)
+                        goto nla_put_failure;
-                NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
+        }
+        if (rt->rt_dst != rt->rt_gateway &&
+            nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
+                goto nla_put_failure;
        if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
                goto nla_put_failure;
-        if (rt->rt_mark)
+        if (rt->rt_mark &&
-                NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
+            nla_put_be32(skb, RTA_MARK, rt->rt_mark))
+                goto nla_put_failure;
        error = rt->dst.error;
        if (peer) {
@@ -3045,7 +3047,8 @@ static int rt_fill_info(struct net *net,
                        }
                } else
 #endif
-                        NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif);
+                        if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
+                                goto nla_put_failure;
        }
        if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
@@ -3059,7 +3062,7 @@ nla_put_failure:
        return -EMSGSIZE;
 }
-static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
 {
        struct net *net = sock_net(in_skb->sk);
        struct rtmsg *rtm;
@@ -3334,23 +3337,6 @@ static ctl_table ipv4_route_table[] = {
        { }
 };
-static struct ctl_table empty[1];
-static struct ctl_table ipv4_skeleton[] =
-{
-        { .procname = "route", 
-          .mode = 0555, .child = ipv4_route_table},
-        { .procname = "neigh", 
-          .mode = 0555, .child = empty},
-        { }
-};
-static __net_initdata struct ctl_path ipv4_path[] = {
-        { .procname = "net", },
-        { .procname = "ipv4", },
-        { },
-};
 static struct ctl_table ipv4_route_flush_table[] = {
        {
                .procname       = "flush",
@@ -3361,13 +3347,6 @@ static struct ctl_table ipv4_route_flush_table[] = {
        { },
 };
-static __net_initdata struct ctl_path ipv4_route_path[] = {
-        { .procname = "net", },
-        { .procname = "ipv4", },
-        { .procname = "route", },
-        { },
-};
 static __net_init int sysctl_route_net_init(struct net *net)
 {
        struct ctl_table *tbl;
@@ -3380,8 +3359,7 @@ static __net_init int sysctl_route_net_init(struct net *net)
        }
        tbl[0].extra1 = net;
-        net->ipv4.route_hdr =
+        net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
-                register_net_sysctl_table(net, ipv4_route_path, tbl);
        if (net->ipv4.route_hdr == NULL)
                goto err_reg;
        return 0;
@@ -3430,9 +3408,15 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
 static __initdata unsigned long rhash_entries;
 static int __init set_rhash_entries(char *str)
 {
+        ssize_t ret;
        if (!str)
                return 0;
-        rhash_entries = simple_strtoul(str, &str, 0);
+        ret = kstrtoul(str, 0, &rhash_entries);
+        if (ret)
+                return 0;
        return 1;
 }
 __setup("rhash_entries=", set_rhash_entries);
@@ -3505,6 +3489,6 @@ int __init ip_rt_init(void)
 */
 void __init ip_static_sysctl_init(void)
 {
-        register_sysctl_paths(ipv4_path, ipv4_skeleton);
+        register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
 }
 #endif
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7a7724da9bff..ef32956ed655 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -27,6 +27,7 @@
 #include <net/tcp_memcontrol.h>
 static int zero;
+static int two = 2;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -78,7 +79,7 @@ static int ipv4_local_port_range(ctl_table *table, int write,
 static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
 {
        gid_t *data = table->data;
-        unsigned seq;
+        unsigned int seq;
        do {
                seq = read_seqbegin(&sysctl_local_ports.lock);
@@ -677,6 +678,15 @@ static struct ctl_table ipv4_table[] = {
                .proc_handler   = proc_dointvec
        },
        {
+                .procname       = "tcp_early_retrans",
+                .data           = &sysctl_tcp_early_retrans,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
+                .extra2         = &two,
+        },
+        {
                .procname       = "udp_mem",
                .data           = &sysctl_udp_mem,
                .maxlen         = sizeof(sysctl_udp_mem),
@@ -768,13 +778,6 @@ static struct ctl_table ipv4_net_table[] = {
        { }
 };
-struct ctl_path net_ipv4_ctl_path[] = {
-        { .procname = "net", },
-        { .procname = "ipv4", },
-        { },
-};
-EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
 static __net_init int ipv4_sysctl_init_net(struct net *net)
 {
        struct ctl_table *table;
@@ -815,8 +818,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
        tcp_init_mem(net);
-        net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
+        net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
-                        net_ipv4_ctl_path, table);
        if (net->ipv4.ipv4_hdr == NULL)
                goto err_reg;
@@ -857,12 +859,12 @@ static __init int sysctl_ipv4_init(void)
        if (!i->procname)
                return -EINVAL;
-        hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
+        hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
        if (hdr == NULL)
                return -ENOMEM;
        if (register_pernet_subsys(&ipv4_sysctl_ops)) {
-                unregister_sysctl_table(hdr);
+                unregister_net_sysctl_table(hdr);
                return -ENOMEM;
        }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cfd7edda0a8e..bb485fcb077e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -363,6 +363,71 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
        return period;
 }
+/* Address-family independent initialization for a tcp_sock.
+ *
+ * NOTE: A lot of things set to zero explicitly by call to
+ *       sk_alloc() so need not be done here.
+ */
+void tcp_init_sock(struct sock *sk)
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        struct tcp_sock *tp = tcp_sk(sk);
+        skb_queue_head_init(&tp->out_of_order_queue);
+        tcp_init_xmit_timers(sk);
+        tcp_prequeue_init(tp);
+        icsk->icsk_rto = TCP_TIMEOUT_INIT;
+        tp->mdev = TCP_TIMEOUT_INIT;
+        /* So many TCP implementations out there (incorrectly) count the
+         * initial SYN frame in their delayed-ACK and congestion control
+         * algorithms that we must have the following bandaid to talk
+         * efficiently to them.  -DaveM
+         */
+        tp->snd_cwnd = TCP_INIT_CWND;
+        /* See draft-stevens-tcpca-spec-01 for discussion of the
+         * initialization of these values.
+         */
+        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+        tp->snd_cwnd_clamp = ~0;
+        tp->mss_cache = TCP_MSS_DEFAULT;
+        tp->reordering = sysctl_tcp_reordering;
+        tcp_enable_early_retrans(tp);
+        icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+        sk->sk_state = TCP_CLOSE;
+        sk->sk_write_space = sk_stream_write_space;
+        sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+        icsk->icsk_sync_mss = tcp_sync_mss;
+        /* TCP Cookie Transactions */
+        if (sysctl_tcp_cookie_size > 0) {
+                /* Default, cookies without s_data_payload. */
+                tp->cookie_values =
+                        kzalloc(sizeof(*tp->cookie_values),
+                                sk->sk_allocation);
+                if (tp->cookie_values != NULL)
+                        kref_init(&tp->cookie_values->kref);
+        }
+        /* Presumed zeroed, in order of appearance:
+         *      cookie_in_always, cookie_out_never,
+         *      s_data_constant, s_data_in, s_data_out
+         */
+        sk->sk_sndbuf = sysctl_tcp_wmem[1];
+        sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+        local_bh_disable();
+        sock_update_memcg(sk);
+        sk_sockets_allocated_inc(sk);
+        local_bh_enable();
+}
+EXPORT_SYMBOL(tcp_init_sock);
 /*
 *      Wait for a TCP event.
 *
@@ -528,7 +593,7 @@ static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
        tp->pushed_seq = tp->write_seq;
 }
-static inline int forced_push(const struct tcp_sock *tp)
+static inline bool forced_push(const struct tcp_sock *tp)
 {
        return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
 }
@@ -701,11 +766,12 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
        skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
        if (skb) {
                if (sk_wmem_schedule(sk, skb->truesize)) {
+                        skb_reserve(skb, sk->sk_prot->max_header);
                        /*
                         * Make sure that we have exactly size bytes
                         * available to the caller, no more, no less.
                         */
-                        skb_reserve(skb, skb_tailroom(skb) - size);
+                        skb->avail_size = size;
                        return skb;
                }
                __kfree_skb(skb);
@@ -783,9 +849,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
        while (psize > 0) {
                struct sk_buff *skb = tcp_write_queue_tail(sk);
                struct page *page = pages[poffset / PAGE_SIZE];
-                int copy, i, can_coalesce;
+                int copy, i;
                int offset = poffset % PAGE_SIZE;
                int size = min_t(size_t, psize, PAGE_SIZE - offset);
+                bool can_coalesce;
                if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
 new_segment:
@@ -850,8 +917,7 @@ new_segment:
 wait_for_sndbuf:
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
-                if (copied)
+                tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
-                        tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
                if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
                        goto do_error;
@@ -860,7 +926,7 @@ wait_for_memory:
        }
 out:
-        if (copied)
+        if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
                tcp_push(sk, flags, mss_now, tp->nonagle);
        return copied;
@@ -918,7 +984,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        int iovlen, flags, err, copied;
-        int mss_now, size_goal;
+        int mss_now = 0, size_goal;
        bool sg;
        long timeo;
@@ -932,6 +998,19 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
                        goto out_err;
+        if (unlikely(tp->repair)) {
+                if (tp->repair_queue == TCP_RECV_QUEUE) {
+                        copied = tcp_send_rcvq(sk, msg, size);
+                        goto out;
+                }
+                err = -EINVAL;
+                if (tp->repair_queue == TCP_NO_QUEUE)
+                        goto out_err;
+                /* 'common' sending to sendq */
+        }
        /* This should be in poll */
        clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
@@ -995,15 +1074,14 @@ new_segment:
                                copy = seglen;
                        /* Where to copy to? */
-                        if (skb_tailroom(skb) > 0) {
+                        if (skb_availroom(skb) > 0) {
                                /* We have some space in skb head. Superb! */
-                                if (copy > skb_tailroom(skb))
+                                copy = min_t(int, copy, skb_availroom(skb));
-                                        copy = skb_tailroom(skb);
                                err = skb_add_data_nocache(sk, skb, from, copy);
                                if (err)
                                        goto do_fault;
                        } else {
-                                int merge = 0;
+                                bool merge = false;
                                int i = skb_shinfo(skb)->nr_frags;
                                struct page *page = sk->sk_sndmsg_page;
                                int off;
@@ -1017,7 +1095,7 @@ new_segment:
                                    off != PAGE_SIZE) {
                                        /* We can extend the last page
                                         * fragment. */
-                                        merge = 1;
+                                        merge = true;
                                } else if (i == MAX_SKB_FRAGS || !sg) {
                                        /* Need to add new fragment and cannot
                                         * do this because interface is non-SG,
@@ -1089,7 +1167,7 @@ new_segment:
                        if ((seglen -= copy) == 0 && iovlen == 0)
                                goto out;
-                        if (skb->len < max || (flags & MSG_OOB))
+                        if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
                                continue;
                        if (forced_push(tp)) {
@@ -1102,7 +1180,7 @@ new_segment:
 wait_for_sndbuf:
                        set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
-                        if (copied)
+                        if (copied && likely(!tp->repair))
                                tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
                        if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
@@ -1113,7 +1191,7 @@ wait_for_memory:
        }
 out:
-        if (copied)
+        if (copied && likely(!tp->repair))
                tcp_push(sk, flags, mss_now, tp->nonagle);
        release_sock(sk);
        return copied;
@@ -1187,6 +1265,24 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
        return -EAGAIN;
 }
+static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
+{
+        struct sk_buff *skb;
+        int copied = 0, err = 0;
+        /* XXX -- need to support SO_PEEK_OFF */
+        skb_queue_walk(&sk->sk_write_queue, skb) {
+                err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
+                if (err)
+                        break;
+                copied += skb->len;
+        }
+        return err ?: copied;
+}
 /* Clean up the receive buffer for full frames taken by the user,
 * then send an ACK if necessary.  COPIED is the number of bytes
 * tcp_recvmsg has given to the user so far, it speeds up the
@@ -1196,7 +1292,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
 void tcp_cleanup_rbuf(struct sock *sk, int copied)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        int time_to_ack = 0;
+        bool time_to_ack = false;
        struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
@@ -1222,7 +1318,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
                      ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
                       !icsk->icsk_ack.pingpong)) &&
                      !atomic_read(&sk->sk_rmem_alloc)))
-                        time_to_ack = 1;
+                        time_to_ack = true;
        }
        /* We send an ACK if we can now advertise a non-zero window
@@ -1244,7 +1340,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
                         * "Lots" means "at least twice" here.
                         */
                        if (new_window && new_window >= 2 * rcv_window_now)
-                                time_to_ack = 1;
+                                time_to_ack = true;
                }
        }
        if (time_to_ack)
@@ -1376,11 +1472,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
                                break;
                }
                if (tcp_hdr(skb)->fin) {
-                        sk_eat_skb(sk, skb, 0);
+                        sk_eat_skb(sk, skb, false);
                        ++seq;
                        break;
                }
-                sk_eat_skb(sk, skb, 0);
+                sk_eat_skb(sk, skb, false);
                if (!desc->count)
                        break;
                tp->copied_seq = seq;
@@ -1416,7 +1512,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        int target;             /* Read at least this many bytes */
        long timeo;
        struct task_struct *user_recv = NULL;
-        int copied_early = 0;
+        bool copied_early = false;
        struct sk_buff *skb;
        u32 urg_hole = 0;
@@ -1432,6 +1528,21 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        if (flags & MSG_OOB)
                goto recv_urg;
+        if (unlikely(tp->repair)) {
+                err = -EPERM;
+                if (!(flags & MSG_PEEK))
+                        goto out;
+                if (tp->repair_queue == TCP_SEND_QUEUE)
+                        goto recv_sndq;
+                err = -EINVAL;
+                if (tp->repair_queue == TCP_NO_QUEUE)
+                        goto out;
+                /* 'common' recv queue MSG_PEEK-ing */
+        }
        seq = &tp->copied_seq;
        if (flags & MSG_PEEK) {
                peek_seq = tp->copied_seq;
@@ -1452,7 +1563,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                if ((available < target) &&
                    (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
                    !sysctl_tcp_low_latency &&
-                    dma_find_channel(DMA_MEMCPY)) {
+                    net_dma_find_channel()) {
                        preempt_enable_no_resched();
                        tp->ucopy.pinned_list =
                                        dma_pin_iovec_pages(msg->msg_iov, len);
@@ -1633,9 +1744,9 @@ do_prequeue:
                }
                if ((flags & MSG_PEEK) &&
                    (peek_seq - copied - urg_hole != tp->copied_seq)) {
-                        if (net_ratelimit())
+                        net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
-                                printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
+                                            current->comm,
-                                       current->comm, task_pid_nr(current));
+                                            task_pid_nr(current));
                        peek_seq = tp->copied_seq;
                }
                continue;
@@ -1667,7 +1778,7 @@ do_prequeue:
                if (!(flags & MSG_TRUNC)) {
 #ifdef CONFIG_NET_DMA
                        if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-                                tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
+                                tp->ucopy.dma_chan = net_dma_find_channel();
                        if (tp->ucopy.dma_chan) {
                                tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
@@ -1689,7 +1800,7 @@ do_prequeue:
                                dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
                                if ((offset + used) == skb->len)
-                                        copied_early = 1;
+                                        copied_early = true;
                        } else
 #endif
@@ -1723,7 +1834,7 @@ skip_copy:
                        goto found_fin_ok;
                if (!(flags & MSG_PEEK)) {
                        sk_eat_skb(sk, skb, copied_early);
-                        copied_early = 0;
+                        copied_early = false;
                }
                continue;
@@ -1732,7 +1843,7 @@ skip_copy:
                ++*seq;
                if (!(flags & MSG_PEEK)) {
                        sk_eat_skb(sk, skb, copied_early);
-                        copied_early = 0;
+                        copied_early = false;
                }
                break;
        } while (len > 0);
@@ -1783,6 +1894,10 @@ out:
 recv_urg:
        err = tcp_recv_urg(sk, msg, len, flags);
        goto out;
+recv_sndq:
+        err = tcp_peek_sndq(sk, msg, len);
+        goto out;
 }
 EXPORT_SYMBOL(tcp_recvmsg);
@@ -1886,10 +2001,10 @@ bool tcp_check_oom(struct sock *sk, int shift)
        too_many_orphans = tcp_too_many_orphans(sk, shift);
        out_of_socket_memory = tcp_out_of_memory(sk);
-        if (too_many_orphans && net_ratelimit())
+        if (too_many_orphans)
-                pr_info("too many orphaned sockets\n");
+                net_info_ratelimited("too many orphaned sockets\n");
-        if (out_of_socket_memory && net_ratelimit())
+        if (out_of_socket_memory)
-                pr_info("out of memory -- consider tuning tcp_mem\n");
+                net_info_ratelimited("out of memory -- consider tuning tcp_mem\n");
        return too_many_orphans || out_of_socket_memory;
 }
@@ -1935,7 +2050,9 @@ void tcp_close(struct sock *sk, long timeout)
         * advertise a zero window, then kill -9 the FTP client, wheee...
         * Note: timeout is always zero in such a case.
         */
-        if (data_was_unread) {
+        if (unlikely(tcp_sk(sk)->repair)) {
+                sk->sk_prot->disconnect(sk, 0);
+        } else if (data_was_unread) {
                /* Unread data was tossed, zap the connection. */
                NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
                tcp_set_state(sk, TCP_CLOSE);
@@ -2053,7 +2170,7 @@ EXPORT_SYMBOL(tcp_close);
 /* These states need RST on ABORT according to RFC793 */
-static inline int tcp_need_reset(int state)
+static inline bool tcp_need_reset(int state)
 {
        return (1 << state) &
               (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
@@ -2074,6 +2191,8 @@ int tcp_disconnect(struct sock *sk, int flags)
        /* ABORT function of RFC793 */
        if (old_state == TCP_LISTEN) {
                inet_csk_listen_stop(sk);
+        } else if (unlikely(tp->repair)) {
+                sk->sk_err = ECONNABORTED;
        } else if (tcp_need_reset(old_state) ||
                   (tp->snd_nxt != tp->write_seq &&
                    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2125,6 +2244,54 @@ int tcp_disconnect(struct sock *sk, int flags)
 }
 EXPORT_SYMBOL(tcp_disconnect);
+static inline bool tcp_can_repair_sock(const struct sock *sk)
+{
+        return capable(CAP_NET_ADMIN) &&
+                ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
+}
+static int tcp_repair_options_est(struct tcp_sock *tp,
+                struct tcp_repair_opt __user *optbuf, unsigned int len)
+{
+        struct tcp_repair_opt opt;
+        while (len >= sizeof(opt)) {
+                if (copy_from_user(&opt, optbuf, sizeof(opt)))
+                        return -EFAULT;
+                optbuf++;
+                len -= sizeof(opt);
+                switch (opt.opt_code) {
+                case TCPOPT_MSS:
+                        tp->rx_opt.mss_clamp = opt.opt_val;
+                        break;
+                case TCPOPT_WINDOW:
+                        if (opt.opt_val > 14)
+                                return -EFBIG;
+                        tp->rx_opt.snd_wscale = opt.opt_val;
+                        break;
+                case TCPOPT_SACK_PERM:
+                        if (opt.opt_val != 0)
+                                return -EINVAL;
+                        tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
+                        if (sysctl_tcp_fack)
+                                tcp_enable_fack(tp);
+                        break;
+                case TCPOPT_TIMESTAMP:
+                        if (opt.opt_val != 0)
+                                return -EINVAL;
+                        tp->rx_opt.tstamp_ok = 1;
+                        break;
+                }
+        }
+        return 0;
+}
 /*
 *      Socket option code for TCP.
 */
@@ -2295,6 +2462,55 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                        err = -EINVAL;
                else
                        tp->thin_dupack = val;
+                        if (tp->thin_dupack)
+                                tcp_disable_early_retrans(tp);
+                break;
+        case TCP_REPAIR:
+                if (!tcp_can_repair_sock(sk))
+                        err = -EPERM;
+                else if (val == 1) {
+                        tp->repair = 1;
+                        sk->sk_reuse = SK_FORCE_REUSE;
+                        tp->repair_queue = TCP_NO_QUEUE;
+                } else if (val == 0) {
+                        tp->repair = 0;
+                        sk->sk_reuse = SK_NO_REUSE;
+                        tcp_send_window_probe(sk);
+                } else
+                        err = -EINVAL;
+                break;
+        case TCP_REPAIR_QUEUE:
+                if (!tp->repair)
+                        err = -EPERM;
+                else if (val < TCP_QUEUES_NR)
+                        tp->repair_queue = val;
+                else
+                        err = -EINVAL;
+                break;
+        case TCP_QUEUE_SEQ:
+                if (sk->sk_state != TCP_CLOSE)
+                        err = -EPERM;
+                else if (tp->repair_queue == TCP_SEND_QUEUE)
+                        tp->write_seq = val;
+                else if (tp->repair_queue == TCP_RECV_QUEUE)
+                        tp->rcv_nxt = val;
+                else
+                        err = -EINVAL;
+                break;
+        case TCP_REPAIR_OPTIONS:
+                if (!tp->repair)
+                        err = -EINVAL;
+                else if (sk->sk_state == TCP_ESTABLISHED)
+                        err = tcp_repair_options_est(tp,
+                                        (struct tcp_repair_opt __user *)optval,
+                                        optlen);
+                else
+                        err = -EPERM;
                break;
        case TCP_CORK:
@@ -2530,6 +2746,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                val = tp->mss_cache;
                if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
                        val = tp->rx_opt.user_mss;
+                if (tp->repair)
+                        val = tp->rx_opt.mss_clamp;
                break;
        case TCP_NODELAY:
                val = !!(tp->nonagle&TCP_NAGLE_OFF);
@@ -2632,6 +2850,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                val = tp->thin_dupack;
                break;
+        case TCP_REPAIR:
+                val = tp->repair;
+                break;
+        case TCP_REPAIR_QUEUE:
+                if (tp->repair)
+                        val = tp->repair_queue;
+                else
+                        return -EINVAL;
+                break;
+        case TCP_QUEUE_SEQ:
+                if (tp->repair_queue == TCP_SEND_QUEUE)
+                        val = tp->write_seq;
+                else if (tp->repair_queue == TCP_RECV_QUEUE)
+                        val = tp->rcv_nxt;
+                else
+                        return -EINVAL;
+                break;
        case TCP_USER_TIMEOUT:
                val = jiffies_to_msecs(icsk->icsk_user_timeout);
                break;
@@ -2675,7 +2913,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 {
        struct sk_buff *segs = ERR_PTR(-EINVAL);
        struct tcphdr *th;
-        unsigned thlen;
+        unsigned int thlen;
        unsigned int seq;
        __be32 delta;
        unsigned int oldlen;
@@ -2933,13 +3171,13 @@ out_free:
 struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
 {
        struct tcp_md5sig_pool __percpu *pool;
-        int alloc = 0;
+        bool alloc = false;
 retry:
        spin_lock_bh(&tcp_md5sig_pool_lock);
        pool = tcp_md5sig_pool;
        if (tcp_md5sig_users++ == 0) {
-                alloc = 1;
+                alloc = true;
                spin_unlock_bh(&tcp_md5sig_pool_lock);
        } else if (!pool) {
                tcp_md5sig_users--;
@@ -3033,9 +3271,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
        struct scatterlist sg;
        const struct tcphdr *tp = tcp_hdr(skb);
        struct hash_desc *desc = &hp->md5_desc;
-        unsigned i;
+        unsigned int i;
-        const unsigned head_data_len = skb_headlen(skb) > header_len ?
+        const unsigned int head_data_len = skb_headlen(skb) > header_len ?
-                                       skb_headlen(skb) - header_len : 0;
+                                           skb_headlen(skb) - header_len : 0;
        const struct skb_shared_info *shi = skb_shinfo(skb);
        struct sk_buff *frag_iter;
@@ -3223,9 +3461,15 @@ extern struct tcp_congestion_ops tcp_reno;
 static __initdata unsigned long thash_entries;
 static int __init set_thash_entries(char *str)
 {
+        ssize_t ret;
        if (!str)
                return 0;
-        thash_entries = simple_strtoul(str, &str, 0);
+        ret = kstrtoul(str, 0, &thash_entries);
+        if (ret)
+                return 0;
        return 1;
 }
 __setup("thash_entries=", set_thash_entries);
@@ -3243,7 +3487,7 @@ void __init tcp_init(void)
 {
        struct sk_buff *skb = NULL;
        unsigned long limit;
-        int max_share, cnt;
+        int max_rshare, max_wshare, cnt;
        unsigned int i;
        unsigned long jiffy = jiffies;
@@ -3302,17 +3546,17 @@ void __init tcp_init(void)
        tcp_init_mem(&init_net);
        /* Set per-socket limits to no more than 1/128 the pressure threshold */
-        limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10);
+        limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
-        limit = max(limit, 128UL);
+        max_wshare = min(4UL*1024*1024, limit);
-        max_share = min(4UL*1024*1024, limit);
+        max_rshare = min(6UL*1024*1024, limit);
        sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_wmem[1] = 16*1024;
-        sysctl_tcp_wmem[2] = max(64*1024, max_share);
+        sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
        sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_rmem[1] = 87380;
-        sysctl_tcp_rmem[2] = max(87380, max_share);
+        sysctl_tcp_rmem[2] = max(87380, max_rshare);
        pr_info("Hash tables configured (established %u bind %u)\n",
                tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 272a84593c85..04dbd7ae7c62 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -280,19 +280,19 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 /* RFC2861 Check whether we are limited by application or congestion window
 * This is the inverse of cwnd check in tcp_tso_should_defer
 */
-int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
+bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        u32 left;
        if (in_flight >= tp->snd_cwnd)
-                return 1;
+                return true;
        left = tp->snd_cwnd - in_flight;
        if (sk_can_gso(sk) &&
            left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
            left * tp->mss_cache < sk->sk_gso_max_size)
-                return 1;
+                return true;
        return left <= tcp_max_tso_deferred_mss(tp);
 }
 EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index fe3ecf484b44..57bdd17dff4d 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -15,7 +15,7 @@
 /* Tcp Hybla structure. */
 struct hybla {
-        u8    hybla_en;
+        bool  hybla_en;
        u32   snd_cwnd_cents; /* Keeps increment values when it is <1, <<7 */
        u32   rho;            /* Rho parameter, integer part  */
        u32   rho2;           /* Rho * Rho, integer part */
@@ -24,8 +24,7 @@ struct hybla {
        u32   minrtt;         /* Minimum smoothed round trip time value seen */
 };
-/* Hybla reference round trip time (default= 1/40 sec = 25 ms),
+/* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */
-   expressed in jiffies */
 static int rtt0 = 25;
 module_param(rtt0, int, 0644);
 MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)");
@@ -39,7 +38,7 @@ static inline void hybla_recalc_param (struct sock *sk)
        ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8);
        ca->rho = ca->rho_3ls >> 3;
        ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
-        ca->rho2 = ca->rho2_7ls >>7;
+        ca->rho2 = ca->rho2_7ls >> 7;
 }
 static void hybla_init(struct sock *sk)
@@ -52,7 +51,7 @@ static void hybla_init(struct sock *sk)
        ca->rho_3ls = 0;
        ca->rho2_7ls = 0;
        ca->snd_cwnd_cents = 0;
-        ca->hybla_en = 1;
+        ca->hybla_en = true;
        tp->snd_cwnd = 2;
        tp->snd_cwnd_clamp = 65535;
@@ -67,6 +66,7 @@ static void hybla_init(struct sock *sk)
 static void hybla_state(struct sock *sk, u8 ca_state)
 {
        struct hybla *ca = inet_csk_ca(sk);
        ca->hybla_en = (ca_state == TCP_CA_Open);
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e886e2f7fa8d..cfa2aa128342 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -85,7 +85,7 @@ int sysctl_tcp_ecn __read_mostly = 2;
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
-int sysctl_tcp_adv_win_scale __read_mostly = 2;
+int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 int sysctl_tcp_stdurg __read_mostly;
@@ -99,6 +99,7 @@ int sysctl_tcp_thin_dupack __read_mostly;
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_abc __read_mostly;
+int sysctl_tcp_early_retrans __read_mostly = 2;
 #define FLAG_DATA               0x01 /* Incoming frame contained data.          */
 #define FLAG_WIN_UPDATE         0x02 /* Incoming ACK was a window update.       */
@@ -175,7 +176,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 static void tcp_incr_quickack(struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
-        unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
+        unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
        if (quickacks == 0)
                quickacks = 2;
@@ -195,9 +196,10 @@ static void tcp_enter_quickack_mode(struct sock *sk)
 * and the session is not interactive.
 */
-static inline int tcp_in_quickack_mode(const struct sock *sk)
+static inline bool tcp_in_quickack_mode(const struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
 }
@@ -252,11 +254,11 @@ static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
                tp->ecn_flags &= ~TCP_ECN_OK;
 }
-static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
+static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
 {
        if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
-                return 1;
+                return true;
-        return 0;
+        return false;
 }
 /* Buffer size and advertised window tuning.
@@ -335,6 +337,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
                        incr = __tcp_grow_window(sk, skb);
                if (incr) {
+                        incr = max_t(int, incr, 2 * skb->len);
                        tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
                                               tp->window_clamp);
                        inet_csk(sk)->icsk_ack.quick |= 1;
@@ -474,8 +477,11 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
                if (!win_dep) {
                        m -= (new_sample >> 3);
                        new_sample += m;
-                } else if (m < new_sample)
+                } else {
-                        new_sample = m << 3;
+                        m <<= 3;
+                        if (m < new_sample)
+                                new_sample = m;
+                }
        } else {
                /* No previous measure. */
                new_sample = m << 3;
@@ -491,7 +497,7 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
                goto new_measure;
        if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
                return;
-        tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
+        tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
 new_measure:
        tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
@@ -902,6 +908,7 @@ static void tcp_init_metrics(struct sock *sk)
        if (dst_metric(dst, RTAX_REORDERING) &&
            tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
                tcp_disable_fack(tp);
+                tcp_disable_early_retrans(tp);
                tp->reordering = dst_metric(dst, RTAX_REORDERING);
        }
@@ -933,7 +940,7 @@ static void tcp_init_metrics(struct sock *sk)
        tcp_set_rto(sk);
 reset:
        if (tp->srtt == 0) {
-                /* RFC2988bis: We've failed to get a valid RTT sample from
+                /* RFC6298: 5.7 We've failed to get a valid RTT sample from
                 * 3WHS. This is most likely due to retransmission,
                 * including spurious one. Reset the RTO back to 3secs
                 * from the more aggressive 1sec to avoid more spurious
@@ -943,7 +950,7 @@ reset:
                inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
        }
        /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
-         * retransmitted. In light of RFC2988bis' more aggressive 1sec
+         * retransmitted. In light of RFC6298 more aggressive 1sec
         * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
         * retransmission has occurred.
         */
@@ -975,15 +982,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
                NET_INC_STATS_BH(sock_net(sk), mib_idx);
 #if FASTRETRANS_DEBUG > 1
-                printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
+                pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
-                       tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
+                         tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
-                       tp->reordering,
+                         tp->reordering,
-                       tp->fackets_out,
+                         tp->fackets_out,
-                       tp->sacked_out,
+                         tp->sacked_out,
-                       tp->undo_marker ? tp->undo_retrans : 0);
+                         tp->undo_marker ? tp->undo_retrans : 0);
 #endif
                tcp_disable_fack(tp);
        }
+        if (metric > 0)
+                tcp_disable_early_retrans(tp);
 }
 /* This must be called before lost_out is incremented */
@@ -1114,36 +1124,36 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
 * the exact amount is rather hard to quantify. However, tp->max_window can
 * be used as an exaggerated estimate.
 */
-static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
+static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
-                                  u32 start_seq, u32 end_seq)
+                                   u32 start_seq, u32 end_seq)
 {
        /* Too far in future, or reversed (interpretation is ambiguous) */
        if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
-                return 0;
+                return false;
        /* Nasty start_seq wrap-around check (see comments above) */
        if (!before(start_seq, tp->snd_nxt))
-                return 0;
+                return false;
        /* In outstanding window? ...This is valid exit for D-SACKs too.
         * start_seq == snd_una is non-sensical (see comments above)
         */
        if (after(start_seq, tp->snd_una))
-                return 1;
+                return true;
        if (!is_dsack || !tp->undo_marker)
-                return 0;
+                return false;
        /* ...Then it's D-SACK, and must reside below snd_una completely */
        if (after(end_seq, tp->snd_una))
-                return 0;
+                return false;
        if (!before(start_seq, tp->undo_marker))
-                return 1;
+                return true;
        /* Too old */
        if (!after(end_seq, tp->undo_marker))
-                return 0;
+                return false;
        /* Undo_marker boundary crossing (overestimates a lot). Known already:
         *   start_seq < undo_marker and end_seq >= undo_marker.
@@ -1215,17 +1225,17 @@ static void tcp_mark_lost_retrans(struct sock *sk)
                tp->lost_retrans_low = new_low_seq;
 }
-static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
+static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
-                           struct tcp_sack_block_wire *sp, int num_sacks,
+                            struct tcp_sack_block_wire *sp, int num_sacks,
-                           u32 prior_snd_una)
+                            u32 prior_snd_una)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
        u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
-        int dup_sack = 0;
+        bool dup_sack = false;
        if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
-                dup_sack = 1;
+                dup_sack = true;
                tcp_dsack_seen(tp);
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
        } else if (num_sacks > 1) {
@@ -1234,7 +1244,7 @@ static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
                if (!after(end_seq_0, end_seq_1) &&
                    !before(start_seq_0, start_seq_1)) {
-                        dup_sack = 1;
+                        dup_sack = true;
                        tcp_dsack_seen(tp);
                        NET_INC_STATS_BH(sock_net(sk),
                                        LINUX_MIB_TCPDSACKOFORECV);
@@ -1265,9 +1275,10 @@ struct tcp_sacktag_state {
 * FIXME: this could be merged to shift decision code
 */
 static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
-                                 u32 start_seq, u32 end_seq)
+                                  u32 start_seq, u32 end_seq)
 {
-        int in_sack, err;
+        int err;
+        bool in_sack;
        unsigned int pkt_len;
        unsigned int mss;
@@ -1313,7 +1324,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 static u8 tcp_sacktag_one(struct sock *sk,
                          struct tcp_sacktag_state *state, u8 sacked,
                          u32 start_seq, u32 end_seq,
-                          int dup_sack, int pcount)
+                          bool dup_sack, int pcount)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int fack_count = state->fack_count;
@@ -1393,10 +1404,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
 /* Shift newly-SACKed bytes from this skb to the immediately previous
 * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
 */
-static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
-                           struct tcp_sacktag_state *state,
+                            struct tcp_sacktag_state *state,
-                           unsigned int pcount, int shifted, int mss,
+                            unsigned int pcount, int shifted, int mss,
-                           int dup_sack)
+                            bool dup_sack)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
@@ -1446,7 +1457,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
        if (skb->len > 0) {
                BUG_ON(!tcp_skb_pcount(skb));
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED);
-                return 0;
+                return false;
        }
        /* Whole SKB was eaten :-) */
@@ -1469,7 +1480,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED);
-        return 1;
+        return true;
 }
 /* I wish gso_size would have a bit more sane initialization than
@@ -1492,7 +1503,7 @@ static int skb_can_shift(const struct sk_buff *skb)
 static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
                                          struct tcp_sacktag_state *state,
                                          u32 start_seq, u32 end_seq,
-                                          int dup_sack)
+                                          bool dup_sack)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *prev;
@@ -1631,14 +1642,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
                                        struct tcp_sack_block *next_dup,
                                        struct tcp_sacktag_state *state,
                                        u32 start_seq, u32 end_seq,
-                                        int dup_sack_in)
+                                        bool dup_sack_in)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *tmp;
        tcp_for_write_queue_from(skb, sk) {
                int in_sack = 0;
-                int dup_sack = dup_sack_in;
+                bool dup_sack = dup_sack_in;
                if (skb == tcp_send_head(sk))
                        break;
@@ -1653,7 +1664,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
                                                        next_dup->start_seq,
                                                        next_dup->end_seq);
                        if (in_sack > 0)
-                                dup_sack = 1;
+                                dup_sack = true;
                }
                /* skb reference here is a bit tricky to get right, since
@@ -1758,7 +1769,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        struct sk_buff *skb;
        int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
        int used_sacks;
-        int found_dup_sack = 0;
+        bool found_dup_sack = false;
        int i, j;
        int first_sack_index;
@@ -1789,7 +1800,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        used_sacks = 0;
        first_sack_index = 0;
        for (i = 0; i < num_sacks; i++) {
-                int dup_sack = !i && found_dup_sack;
+                bool dup_sack = !i && found_dup_sack;
                sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
                sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
@@ -1856,7 +1867,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
        while (i < used_sacks) {
                u32 start_seq = sp[i].start_seq;
                u32 end_seq = sp[i].end_seq;
-                int dup_sack = (found_dup_sack && (i == first_sack_index));
+                bool dup_sack = (found_dup_sack && (i == first_sack_index));
                struct tcp_sack_block *next_dup = NULL;
                if (found_dup_sack && ((i + 1) == first_sack_index))
@@ -1958,9 +1969,9 @@ out:
 }
 /* Limits sacked_out so that sum with lost_out isn't ever larger than
- * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
+ * packets_out. Returns false if sacked_out adjustement wasn't necessary.
 */
-static int tcp_limit_reno_sacked(struct tcp_sock *tp)
+static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
 {
        u32 holes;
@@ -1969,9 +1980,9 @@ static int tcp_limit_reno_sacked(struct tcp_sock *tp)
        if ((tp->sacked_out + holes) > tp->packets_out) {
                tp->sacked_out = tp->packets_out - holes;
-                return 1;
+                return true;
        }
-        return 0;
+        return false;
 }
 /* If we receive more dupacks than we expected counting segments
@@ -2025,40 +2036,40 @@ static int tcp_is_sackfrto(const struct tcp_sock *tp)
 /* F-RTO can only be used if TCP has never retransmitted anything other than
 * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
 */
-int tcp_use_frto(struct sock *sk)
+bool tcp_use_frto(struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct sk_buff *skb;
        if (!sysctl_tcp_frto)
-                return 0;
+                return false;
        /* MTU probe and F-RTO won't really play nicely along currently */
        if (icsk->icsk_mtup.probe_size)
-                return 0;
+                return false;
        if (tcp_is_sackfrto(tp))
-                return 1;
+                return true;
        /* Avoid expensive walking of rexmit queue if possible */
        if (tp->retrans_out > 1)
-                return 0;
+                return false;
        skb = tcp_write_queue_head(sk);
        if (tcp_skb_is_last(sk, skb))
-                return 1;
+                return true;
        skb = tcp_write_queue_next(sk, skb);    /* Skips head */
        tcp_for_write_queue_from(skb, sk) {
                if (skb == tcp_send_head(sk))
                        break;
                if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
-                        return 0;
+                        return false;
                /* Short-circuit when first non-SACKed skb has been checked */
                if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                        break;
        }
-        return 1;
+        return true;
 }
 /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
@@ -2294,7 +2305,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 *
 * Do processing similar to RTO timeout.
 */
-static int tcp_check_sack_reneging(struct sock *sk, int flag)
+static bool tcp_check_sack_reneging(struct sock *sk, int flag)
 {
        if (flag & FLAG_SACK_RENEGING) {
                struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2305,9 +2316,9 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
                tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                          icsk->icsk_rto, TCP_RTO_MAX);
-                return 1;
+                return true;
        }
-        return 0;
+        return false;
 }
 static inline int tcp_fackets_out(const struct tcp_sock *tp)
@@ -2335,6 +2346,27 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
        return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
 }
+static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        unsigned long delay;
+        /* Delay early retransmit and entering fast recovery for
+         * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
+         * available, or RTO is scheduled to fire first.
+         */
+        if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
+                return false;
+        delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
+        if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
+                return false;
+        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
+        tp->early_retrans_delayed = 1;
+        return true;
+}
 static inline int tcp_skb_timedout(const struct sock *sk,
                                   const struct sk_buff *skb)
 {
@@ -2442,28 +2474,28 @@ static inline int tcp_head_timedout(const struct sock *sk)
 * Main question: may we further continue forward transmission
 * with the same cwnd?
 */
-static int tcp_time_to_recover(struct sock *sk)
+static bool tcp_time_to_recover(struct sock *sk, int flag)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        __u32 packets_out;
        /* Do not perform any recovery during F-RTO algorithm */
        if (tp->frto_counter)
-                return 0;
+                return false;
        /* Trick#1: The loss is proven. */
        if (tp->lost_out)
-                return 1;
+                return true;
        /* Not-A-Trick#2 : Classic rule... */
        if (tcp_dupack_heuristics(tp) > tp->reordering)
-                return 1;
+                return true;
        /* Trick#3 : when we use RFC2988 timer restart, fast
         * retransmit can be triggered by timeout of queue head.
         */
        if (tcp_is_fack(tp) && tcp_head_timedout(sk))
-                return 1;
+                return true;
        /* Trick#4: It is still not OK... But will it be useful to delay
         * recovery more?
@@ -2475,7 +2507,7 @@ static int tcp_time_to_recover(struct sock *sk)
                /* We have nothing to send. This connection is limited
                 * either by receiver window or by application.
                 */
-                return 1;
+                return true;
        }
        /* If a thin stream is detected, retransmit after first
@@ -2486,9 +2518,19 @@ static int tcp_time_to_recover(struct sock *sk)
        if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
            tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
            tcp_is_sack(tp) && !tcp_send_head(sk))
-                return 1;
+                return true;
-        return 0;
+        /* Trick#6: TCP early retransmit, per RFC5827.  To avoid spurious
+         * retransmissions due to small network reorderings, we implement
+         * Mitigation A.3 in the RFC and delay the retransmission for a short
+         * interval if appropriate.
+         */
+        if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
+            (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
+            !tcp_may_send_now(sk))
+                return !tcp_pause_early_retransmit(sk, flag);
+        return false;
 }
 /* New heuristics: it is possible only after we switched to restart timer
@@ -2676,22 +2718,22 @@ static void DBGUNDO(struct sock *sk, const char *msg)
        struct inet_sock *inet = inet_sk(sk);
        if (sk->sk_family == AF_INET) {
-                printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
+                pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
-                       msg,
+                         msg,
-                       &inet->inet_daddr, ntohs(inet->inet_dport),
+                         &inet->inet_daddr, ntohs(inet->inet_dport),
-                       tp->snd_cwnd, tcp_left_out(tp),
+                         tp->snd_cwnd, tcp_left_out(tp),
-                       tp->snd_ssthresh, tp->prior_ssthresh,
+                         tp->snd_ssthresh, tp->prior_ssthresh,
-                       tp->packets_out);
+                         tp->packets_out);
        }
 #if IS_ENABLED(CONFIG_IPV6)
        else if (sk->sk_family == AF_INET6) {
                struct ipv6_pinfo *np = inet6_sk(sk);
-                printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
+                pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
-                       msg,
+                         msg,
-                       &np->daddr, ntohs(inet->inet_dport),
+                         &np->daddr, ntohs(inet->inet_dport),
-                       tp->snd_cwnd, tcp_left_out(tp),
+                         tp->snd_cwnd, tcp_left_out(tp),
-                       tp->snd_ssthresh, tp->prior_ssthresh,
+                         tp->snd_ssthresh, tp->prior_ssthresh,
-                       tp->packets_out);
+                         tp->packets_out);
        }
 #endif
 }
@@ -2727,7 +2769,7 @@ static inline int tcp_may_undo(const struct tcp_sock *tp)
 }
 /* People celebrate: "We love our President!" */
-static int tcp_try_undo_recovery(struct sock *sk)
+static bool tcp_try_undo_recovery(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -2752,10 +2794,10 @@ static int tcp_try_undo_recovery(struct sock *sk)
                 * is ACKed. For Reno it is MUST to prevent false
                 * fast retransmits (RFC2582). SACK TCP is safe. */
                tcp_moderate_cwnd(tp);
-                return 1;
+                return true;
        }
        tcp_set_ca_state(sk, TCP_CA_Open);
-        return 0;
+        return false;
 }
 /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
@@ -2785,19 +2827,19 @@ static void tcp_try_undo_dsack(struct sock *sk)
 * that successive retransmissions of a segment must not advance
 * retrans_stamp under any conditions.
 */
-static int tcp_any_retrans_done(const struct sock *sk)
+static bool tcp_any_retrans_done(const struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        if (tp->retrans_out)
-                return 1;
+                return true;
        skb = tcp_write_queue_head(sk);
        if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
-                return 1;
+                return true;
-        return 0;
+        return false;
 }
 /* Undo during fast recovery after partial ACK. */
@@ -2831,7 +2873,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
 }
 /* Undo during loss recovery after partial ACK. */
-static int tcp_try_undo_loss(struct sock *sk)
+static bool tcp_try_undo_loss(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -2853,9 +2895,9 @@ static int tcp_try_undo_loss(struct sock *sk)
                tp->undo_marker = 0;
                if (tcp_is_sack(tp))
                        tcp_set_ca_state(sk, TCP_CA_Open);
-                return 1;
+                return true;
        }
-        return 0;
+        return false;
 }
 static inline void tcp_complete_cwr(struct sock *sk)
@@ -2864,11 +2906,14 @@ static inline void tcp_complete_cwr(struct sock *sk)
        /* Do not moderate cwnd if it's already undone in cwr or recovery. */
        if (tp->undo_marker) {
-                if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
+                if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
                        tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-                else /* PRR */
+                        tp->snd_cwnd_stamp = tcp_time_stamp;
+                } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) {
+                        /* PRR algorithm. */
                        tp->snd_cwnd = tp->snd_ssthresh;
-                tp->snd_cwnd_stamp = tcp_time_stamp;
+                        tp->snd_cwnd_stamp = tcp_time_stamp;
+                }
        }
        tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
@@ -3018,6 +3063,38 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
        tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
 }
+static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        int mib_idx;
+        if (tcp_is_reno(tp))
+                mib_idx = LINUX_MIB_TCPRENORECOVERY;
+        else
+                mib_idx = LINUX_MIB_TCPSACKRECOVERY;
+        NET_INC_STATS_BH(sock_net(sk), mib_idx);
+        tp->high_seq = tp->snd_nxt;
+        tp->prior_ssthresh = 0;
+        tp->undo_marker = tp->snd_una;
+        tp->undo_retrans = tp->retrans_out;
+        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+                if (!ece_ack)
+                        tp->prior_ssthresh = tcp_current_ssthresh(sk);
+                tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+                TCP_ECN_queue_cwr(tp);
+        }
+        tp->bytes_acked = 0;
+        tp->snd_cwnd_cnt = 0;
+        tp->prior_cwnd = tp->snd_cwnd;
+        tp->prr_delivered = 0;
+        tp->prr_out = 0;
+        tcp_set_ca_state(sk, TCP_CA_Recovery);
+}
 /* Process an event, which can update packets-in-flight not trivially.
 * Main goal of this function is to calculate new estimate for left_out,
 * taking into account both packets sitting in receiver's buffer and
@@ -3037,7 +3114,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
        struct tcp_sock *tp = tcp_sk(sk);
        int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
                                    (tcp_fackets_out(tp) > tp->reordering));
-        int fast_rexmit = 0, mib_idx;
+        int fast_rexmit = 0;
        if (WARN_ON(!tp->packets_out && tp->sacked_out))
                tp->sacked_out = 0;
@@ -3121,7 +3198,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                if (icsk->icsk_ca_state <= TCP_CA_Disorder)
                        tcp_try_undo_dsack(sk);
-                if (!tcp_time_to_recover(sk)) {
+                if (!tcp_time_to_recover(sk, flag)) {
                        tcp_try_to_open(sk, flag);
                        return;
                }
@@ -3138,32 +3215,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                }
                /* Otherwise enter Recovery state */
+                tcp_enter_recovery(sk, (flag & FLAG_ECE));
-                if (tcp_is_reno(tp))
-                        mib_idx = LINUX_MIB_TCPRENORECOVERY;
-                else
-                        mib_idx = LINUX_MIB_TCPSACKRECOVERY;
-                NET_INC_STATS_BH(sock_net(sk), mib_idx);
-                tp->high_seq = tp->snd_nxt;
-                tp->prior_ssthresh = 0;
-                tp->undo_marker = tp->snd_una;
-                tp->undo_retrans = tp->retrans_out;
-                if (icsk->icsk_ca_state < TCP_CA_CWR) {
-                        if (!(flag & FLAG_ECE))
-                                tp->prior_ssthresh = tcp_current_ssthresh(sk);
-                        tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-                        TCP_ECN_queue_cwr(tp);
-                }
-                tp->bytes_acked = 0;
-                tp->snd_cwnd_cnt = 0;
-                tp->prior_cwnd = tp->snd_cwnd;
-                tp->prr_delivered = 0;
-                tp->prr_out = 0;
-                tcp_set_ca_state(sk, TCP_CA_Recovery);
                fast_rexmit = 1;
        }
@@ -3245,16 +3297,47 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 /* Restart timer after forward progress on connection.
 * RFC2988 recommends to restart timer to now+rto.
 */
-static void tcp_rearm_rto(struct sock *sk)
+void tcp_rearm_rto(struct sock *sk)
 {
-        const struct tcp_sock *tp = tcp_sk(sk);
+        struct tcp_sock *tp = tcp_sk(sk);
        if (!tp->packets_out) {
                inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
        } else {
-                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                u32 rto = inet_csk(sk)->icsk_rto;
-                                          inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
+                /* Offset the time elapsed after installing regular RTO */
+                if (tp->early_retrans_delayed) {
+                        struct sk_buff *skb = tcp_write_queue_head(sk);
+                        const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
+                        s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
+                        /* delta may not be positive if the socket is locked
+                         * when the delayed ER timer fires and is rescheduled.
+                         */
+                        if (delta > 0)
+                                rto = delta;
+                }
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
+                                          TCP_RTO_MAX);
        }
+        tp->early_retrans_delayed = 0;
+}
+/* This function is called when the delayed ER timer fires. TCP enters
+ * fast recovery and performs fast-retransmit.
+ */
+void tcp_resume_early_retransmit(struct sock *sk)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        tcp_rearm_rto(sk);
+        /* Stop if ER is disabled after the delayed ER timer is scheduled */
+        if (!tp->do_early_retrans)
+                return;
+        tcp_enter_recovery(sk, false);
+        tcp_update_scoreboard(sk, 1);
+        tcp_xmit_retransmit_queue(sk);
 }
 /* If we get here, the whole TSO packet has not been acked. */
@@ -3289,7 +3372,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct sk_buff *skb;
        u32 now = tcp_time_stamp;
-        int fully_acked = 1;
+        int fully_acked = true;
        int flag = 0;
        u32 pkts_acked = 0;
        u32 reord = tp->packets_out;
@@ -3313,7 +3396,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                        if (!acked_pcount)
                                break;
-                        fully_acked = 0;
+                        fully_acked = false;
                } else {
                        acked_pcount = tcp_skb_pcount(skb);
                }
@@ -3430,18 +3513,18 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        if (!tp->packets_out && tcp_is_sack(tp)) {
                icsk = inet_csk(sk);
                if (tp->lost_out) {
-                        printk(KERN_DEBUG "Leak l=%u %d\n",
+                        pr_debug("Leak l=%u %d\n",
-                               tp->lost_out, icsk->icsk_ca_state);
+                                 tp->lost_out, icsk->icsk_ca_state);
                        tp->lost_out = 0;
                }
                if (tp->sacked_out) {
-                        printk(KERN_DEBUG "Leak s=%u %d\n",
+                        pr_debug("Leak s=%u %d\n",
-                               tp->sacked_out, icsk->icsk_ca_state);
+                                 tp->sacked_out, icsk->icsk_ca_state);
                        tp->sacked_out = 0;
                }
                if (tp->retrans_out) {
-                        printk(KERN_DEBUG "Leak r=%u %d\n",
+                        pr_debug("Leak r=%u %d\n",
-                               tp->retrans_out, icsk->icsk_ca_state);
+                                 tp->retrans_out, icsk->icsk_ca_state);
                        tp->retrans_out = 0;
                }
        }
@@ -3592,7 +3675,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 *     to prove that the RTO is indeed spurious. It transfers the control
 *     from F-RTO to the conventional RTO recovery
 */
-static int tcp_process_frto(struct sock *sk, int flag)
+static bool tcp_process_frto(struct sock *sk, int flag)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -3608,7 +3691,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
        if (!before(tp->snd_una, tp->frto_highmark)) {
                tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
-                return 1;
+                return true;
        }
        if (!tcp_is_sackfrto(tp)) {
@@ -3617,19 +3700,19 @@ static int tcp_process_frto(struct sock *sk, int flag)
                 * data, winupdate
                 */
                if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
-                        return 1;
+                        return true;
                if (!(flag & FLAG_DATA_ACKED)) {
                        tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
                                            flag);
-                        return 1;
+                        return true;
                }
        } else {
                if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
                        /* Prevent sending of new data. */
                        tp->snd_cwnd = min(tp->snd_cwnd,
                                           tcp_packets_in_flight(tp));
-                        return 1;
+                        return true;
                }
                if ((tp->frto_counter >= 2) &&
@@ -3639,10 +3722,10 @@ static int tcp_process_frto(struct sock *sk, int flag)
                        /* RFC4138 shortcoming (see comment above) */
                        if (!(flag & FLAG_FORWARD_PROGRESS) &&
                            (flag & FLAG_NOT_DUP))
-                                return 1;
+                                return true;
                        tcp_enter_frto_loss(sk, 3, flag);
-                        return 1;
+                        return true;
                }
        }
@@ -3654,7 +3737,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
                if (!tcp_may_send_now(sk))
                        tcp_enter_frto_loss(sk, 2, flag);
-                return 1;
+                return true;
        } else {
                switch (sysctl_tcp_frto_response) {
                case 2:
@@ -3671,7 +3754,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
                tp->undo_marker = 0;
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
        }
-        return 0;
+        return false;
 }
 /* This routine deals with incoming acks, but not outgoing ones. */
@@ -3689,7 +3772,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        int prior_sacked = tp->sacked_out;
        int pkts_acked = 0;
        int newly_acked_sacked = 0;
-        int frto_cwnd = 0;
+        bool frto_cwnd = false;
        /* If the ack is older than previous acks
         * then we can probably ignore it.
@@ -3703,6 +3786,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        if (after(ack, tp->snd_nxt))
                goto invalid_ack;
+        if (tp->early_retrans_delayed)
+                tcp_rearm_rto(sk);
        if (after(ack, prior_snd_una))
                flag |= FLAG_SND_UNA_ADVANCED;
@@ -3868,10 +3954,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
                                        __u8 snd_wscale = *(__u8 *)ptr;
                                        opt_rx->wscale_ok = 1;
                                        if (snd_wscale > 14) {
-                                                if (net_ratelimit())
+                                                net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
-                                                        pr_info("%s: Illegal window scaling value %d >14 received\n",
+                                                                     __func__,
-                                                                __func__,
+                                                                     snd_wscale);
-                                                                snd_wscale);
                                                snd_wscale = 14;
                                        }
                                        opt_rx->snd_wscale = snd_wscale;
@@ -3942,7 +4027,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
 }
 EXPORT_SYMBOL(tcp_parse_options);
-static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
+static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
 {
        const __be32 *ptr = (const __be32 *)(th + 1);
@@ -3953,31 +4038,31 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
                tp->rx_opt.rcv_tsval = ntohl(*ptr);
                ++ptr;
                tp->rx_opt.rcv_tsecr = ntohl(*ptr);
-                return 1;
+                return true;
        }
-        return 0;
+        return false;
 }
 /* Fast parse options. This hopes to only see timestamps.
 * If it is wrong it falls back on tcp_parse_options().
 */
-static int tcp_fast_parse_options(const struct sk_buff *skb,
+static bool tcp_fast_parse_options(const struct sk_buff *skb,
-                                  const struct tcphdr *th,
+                                   const struct tcphdr *th,
-                                  struct tcp_sock *tp, const u8 **hvpp)
+                                   struct tcp_sock *tp, const u8 **hvpp)
 {
        /* In the spirit of fast parsing, compare doff directly to constant
         * values.  Because equality is used, short doff can be ignored here.
         */
        if (th->doff == (sizeof(*th) / 4)) {
                tp->rx_opt.saw_tstamp = 0;
-                return 0;
+                return false;
        } else if (tp->rx_opt.tstamp_ok &&
                   th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
                if (tcp_parse_aligned_timestamp(tp, th))
-                        return 1;
+                        return true;
        }
        tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
-        return 1;
+        return true;
 }
 #ifdef CONFIG_TCP_MD5SIG
@@ -4218,7 +4303,7 @@ static void tcp_fin(struct sock *sk)
        }
 }
-static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
+static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
                                  u32 end_seq)
 {
        if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
@@ -4226,9 +4311,9 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
                        sp->start_seq = seq;
                if (after(end_seq, sp->end_seq))
                        sp->end_seq = end_seq;
-                return 1;
+                return true;
        }
-        return 0;
+        return false;
 }
 static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
@@ -4424,10 +4509,10 @@ static void tcp_ofo_queue(struct sock *sk)
        }
 }
-static int tcp_prune_ofo_queue(struct sock *sk);
+static bool tcp_prune_ofo_queue(struct sock *sk);
 static int tcp_prune_queue(struct sock *sk);
-static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
+static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
 {
        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
            !sk_rmem_schedule(sk, size)) {
@@ -4446,6 +4531,41 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
        return 0;
 }
+/**
+ * tcp_try_coalesce - try to merge skb to prior one
+ * @sk: socket
+ * @to: prior buffer
+ * @from: buffer to add in queue
+ * @fragstolen: pointer to boolean
+ *
+ * Before queueing skb @from after @to, try to merge them
+ * to reduce overall memory use and queue lengths, if cost is small.
+ * Packets in ofo or receive queues can stay a long time.
+ * Better try to coalesce them right now to avoid future collapses.
+ * Returns true if caller should free @from instead of queueing it
+ */
+static bool tcp_try_coalesce(struct sock *sk,
+                             struct sk_buff *to,
+                             struct sk_buff *from,
+                             bool *fragstolen)
+{
+        int delta;
+        *fragstolen = false;
+        if (tcp_hdr(from)->fin)
+                return false;
+        if (!skb_try_coalesce(to, from, fragstolen, &delta))
+                return false;
+        atomic_add(delta, &sk->sk_rmem_alloc);
+        sk_mem_charge(sk, delta);
+        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
+        TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
+        TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
+        return true;
+}
 static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -4484,23 +4604,13 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
        end_seq = TCP_SKB_CB(skb)->end_seq;
        if (seq == TCP_SKB_CB(skb1)->end_seq) {
-                /* Packets in ofo can stay in queue a long time.
+                bool fragstolen;
-                 * Better try to coalesce them right now
-                 * to avoid future tcp_collapse_ofo_queue(),
+                if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
-                 * probably the most expensive function in tcp stack.
-                 */
-                if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) {
-                        NET_INC_STATS_BH(sock_net(sk),
-                                         LINUX_MIB_TCPRCVCOALESCE);
-                        BUG_ON(skb_copy_bits(skb, 0,
-                                             skb_put(skb1, skb->len),
-                                             skb->len));
-                        TCP_SKB_CB(skb1)->end_seq = end_seq;
-                        TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
-                        __kfree_skb(skb);
-                        skb = NULL;
-                } else {
                        __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+                } else {
+                        kfree_skb_partial(skb, fragstolen);
+                        skb = NULL;
                }
                if (!tp->rx_opt.num_sacks ||
@@ -4576,12 +4686,65 @@ end:
                skb_set_owner_r(skb, sk);
 }
+static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
+                  bool *fragstolen)
+{
+        int eaten;
+        struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
+        __skb_pull(skb, hdrlen);
+        eaten = (tail &&
+                 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
+        tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+        if (!eaten) {
+                __skb_queue_tail(&sk->sk_receive_queue, skb);
+                skb_set_owner_r(skb, sk);
+        }
+        return eaten;
+}
+int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
+{
+        struct sk_buff *skb;
+        struct tcphdr *th;
+        bool fragstolen;
+        if (tcp_try_rmem_schedule(sk, size + sizeof(*th)))
+                goto err;
+        skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
+        if (!skb)
+                goto err;
+        th = (struct tcphdr *)skb_put(skb, sizeof(*th));
+        skb_reset_transport_header(skb);
+        memset(th, 0, sizeof(*th));
+        if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
+                goto err_free;
+        TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
+        TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
+        TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
+        if (tcp_queue_rcv(sk, skb, sizeof(*th), &fragstolen)) {
+                WARN_ON_ONCE(fragstolen); /* should not happen */
+                __kfree_skb(skb);
+        }
+        return size;
+err_free:
+        kfree_skb(skb);
+err:
+        return -ENOMEM;
+}
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
        const struct tcphdr *th = tcp_hdr(skb);
        struct tcp_sock *tp = tcp_sk(sk);
        int eaten = -1;
+        bool fragstolen = false;
        if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
                goto drop;
@@ -4626,8 +4789,7 @@ queue_and_out:
                            tcp_try_rmem_schedule(sk, skb->truesize))
                                goto drop;
-                        skb_set_owner_r(skb, sk);
+                        eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
-                        __skb_queue_tail(&sk->sk_receive_queue, skb);
                }
                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                if (skb->len)
@@ -4651,7 +4813,7 @@ queue_and_out:
                tcp_fast_path_check(sk);
                if (eaten > 0)
-                        __kfree_skb(skb);
+                        kfree_skb_partial(skb, fragstolen);
                else if (!sock_flag(sk, SOCK_DEAD))
                        sk->sk_data_ready(sk, 0);
                return;
@@ -4871,10 +5033,10 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 * Purge the out-of-order queue.
 * Return true if queue was pruned.
 */
-static int tcp_prune_ofo_queue(struct sock *sk)
+static bool tcp_prune_ofo_queue(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        int res = 0;
+        bool res = false;
        if (!skb_queue_empty(&tp->out_of_order_queue)) {
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
@@ -4888,7 +5050,7 @@ static int tcp_prune_ofo_queue(struct sock *sk)
                if (tp->rx_opt.sack_ok)
                        tcp_sack_reset(&tp->rx_opt);
                sk_mem_reclaim(sk);
-                res = 1;
+                res = true;
        }
        return res;
 }
@@ -4965,7 +5127,7 @@ void tcp_cwnd_application_limited(struct sock *sk)
        tp->snd_cwnd_stamp = tcp_time_stamp;
 }
-static int tcp_should_expand_sndbuf(const struct sock *sk)
+static bool tcp_should_expand_sndbuf(const struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
@@ -4973,21 +5135,21 @@ static int tcp_should_expand_sndbuf(const struct sock *sk)
         * not modify it.
         */
        if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
-                return 0;
+                return false;
        /* If we are under global TCP memory pressure, do not expand.  */
        if (sk_under_memory_pressure(sk))
-                return 0;
+                return false;
        /* If we are under soft global TCP memory pressure, do not expand.  */
        if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
-                return 0;
+                return false;
        /* If we filled the congestion window, do not expand.  */
        if (tp->packets_out >= tp->snd_cwnd)
-                return 0;
+                return false;
-        return 1;
+        return true;
 }
 /* When incoming ACK allowed to free some skb from write_queue,
@@ -5213,19 +5375,19 @@ static inline int tcp_checksum_complete_user(struct sock *sk,
 }
 #ifdef CONFIG_NET_DMA
-static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
+static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
                                  int hlen)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int chunk = skb->len - hlen;
        int dma_cookie;
-        int copied_early = 0;
+        bool copied_early = false;
        if (tp->ucopy.wakeup)
-                return 0;
+                return false;
        if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-                tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
+                tp->ucopy.dma_chan = net_dma_find_channel();
        if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
@@ -5238,7 +5400,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
                        goto out;
                tp->ucopy.dma_cookie = dma_cookie;
-                copied_early = 1;
+                copied_early = true;
                tp->ucopy.len -= chunk;
                tp->copied_seq += chunk;
@@ -5430,6 +5592,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                } else {
                        int eaten = 0;
                        int copied_early = 0;
+                        bool fragstolen = false;
                        if (tp->copied_seq == tp->rcv_nxt &&
                            len - tcp_header_len <= tp->ucopy.len) {
@@ -5487,10 +5650,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
                                /* Bulk data transfer: receiver */
-                                __skb_pull(skb, tcp_header_len);
+                                eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
-                                __skb_queue_tail(&sk->sk_receive_queue, skb);
+                                                      &fragstolen);
-                                skb_set_owner_r(skb, sk);
-                                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                        }
                        tcp_event_data_recv(sk, skb);
@@ -5512,7 +5673,7 @@ no_ack:
                        else
 #endif
                        if (eaten)
-                                __kfree_skb(skb);
+                                kfree_skb_partial(skb, fragstolen);
                        else
                                sk->sk_data_ready(sk, 0);
                        return 0;
@@ -5556,6 +5717,44 @@ discard:
 }
 EXPORT_SYMBOL(tcp_rcv_established);
+void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        tcp_set_state(sk, TCP_ESTABLISHED);
+        if (skb != NULL)
+                security_inet_conn_established(sk, skb);
+        /* Make sure socket is routed, for correct metrics.  */
+        icsk->icsk_af_ops->rebuild_header(sk);
+        tcp_init_metrics(sk);
+        tcp_init_congestion_control(sk);
+        /* Prevent spurious tcp_cwnd_restart() on first data
+         * packet.
+         */
+        tp->lsndtime = tcp_time_stamp;
+        tcp_init_buffer_space(sk);
+        if (sock_flag(sk, SOCK_KEEPOPEN))
+                inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
+        if (!tp->rx_opt.snd_wscale)
+                __tcp_fast_path_on(tp, tp->snd_wnd);
+        else
+                tp->pred_flags = 0;
+        if (!sock_flag(sk, SOCK_DEAD)) {
+                sk->sk_state_change(sk);
+                sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+        }
+}
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                                         const struct tcphdr *th, unsigned int len)
 {
@@ -5688,36 +5887,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                }
                smp_mb();
-                tcp_set_state(sk, TCP_ESTABLISHED);
-                security_inet_conn_established(sk, skb);
-                /* Make sure socket is routed, for correct metrics.  */
-                icsk->icsk_af_ops->rebuild_header(sk);
-                tcp_init_metrics(sk);
+                tcp_finish_connect(sk, skb);
-                tcp_init_congestion_control(sk);
-                /* Prevent spurious tcp_cwnd_restart() on first data
-                 * packet.
-                 */
-                tp->lsndtime = tcp_time_stamp;
-                tcp_init_buffer_space(sk);
-                if (sock_flag(sk, SOCK_KEEPOPEN))
-                        inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
-                if (!tp->rx_opt.snd_wscale)
-                        __tcp_fast_path_on(tp, tp->snd_wnd);
-                else
-                        tp->pred_flags = 0;
-                if (!sock_flag(sk, SOCK_DEAD)) {
-                        sk->sk_state_change(sk);
-                        sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
-                }
                if (sk->sk_write_pending ||
                    icsk->icsk_accept_queue.rskq_defer_accept ||
@@ -5731,8 +5902,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                         */
                        inet_csk_schedule_ack(sk);
                        icsk->icsk_ack.lrcvtime = tcp_time_stamp;
-                        icsk->icsk_ack.ato       = TCP_ATO_MIN;
-                        tcp_incr_quickack(sk);
                        tcp_enter_quickack_mode(sk);
                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
                                                  TCP_DELACK_MAX, TCP_RTO_MAX);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3a25cf743f8b..a43b87dfe800 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -138,6 +138,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
+static int tcp_repair_connect(struct sock *sk)
+{
+        tcp_connect_init(sk);
+        tcp_finish_connect(sk, NULL);
+        return 0;
+}
 /* This will initiate an outgoing connection. */
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -196,7 +204,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                /* Reset inherited state */
                tp->rx_opt.ts_recent       = 0;
                tp->rx_opt.ts_recent_stamp = 0;
-                tp->write_seq              = 0;
+                if (likely(!tp->repair))
+                        tp->write_seq      = 0;
        }
        if (tcp_death_row.sysctl_tw_recycle &&
@@ -247,7 +256,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        sk->sk_gso_type = SKB_GSO_TCPV4;
        sk_setup_caps(sk, &rt->dst);
-        if (!tp->write_seq)
+        if (!tp->write_seq && likely(!tp->repair))
                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
                                                           inet->inet_daddr,
                                                           inet->inet_sport,
@@ -255,7 +264,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        inet->inet_id = tp->write_seq ^ jiffies;
-        err = tcp_connect(sk);
+        if (likely(!tp->repair))
+                err = tcp_connect(sk);
+        else
+                err = tcp_repair_connect(sk);
        rt = NULL;
        if (err)
                goto failure;
@@ -853,14 +866,14 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
 }
 /*
- * Return 1 if a syncookie should be sent
+ * Return true if a syncookie should be sent
 */
-int tcp_syn_flood_action(struct sock *sk,
+bool tcp_syn_flood_action(struct sock *sk,
                         const struct sk_buff *skb,
                         const char *proto)
 {
        const char *msg = "Dropping request";
-        int want_cookie = 0;
+        bool want_cookie = false;
        struct listen_sock *lopt;
@@ -868,7 +881,7 @@ int tcp_syn_flood_action(struct sock *sk,
 #ifdef CONFIG_SYN_COOKIES
        if (sysctl_tcp_syncookies) {
                msg = "Sending cookies";
-                want_cookie = 1;
+                want_cookie = true;
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
        } else
 #endif
@@ -1183,7 +1196,7 @@ clear_hash_noput:
 }
 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
-static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 {
        /*
         * This gets called for each TCP segment that arrives
@@ -1206,16 +1219,16 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
        /* We've parsed the options - do we have a hash? */
        if (!hash_expected && !hash_location)
-                return 0;
+                return false;
        if (hash_expected && !hash_location) {
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
-                return 1;
+                return true;
        }
        if (!hash_expected && hash_location) {
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
-                return 1;
+                return true;
        }
        /* Okay, so this is hash_expected and hash_location -
@@ -1226,15 +1239,14 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
                                      NULL, NULL, skb);
        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
-                if (net_ratelimit()) {
+                net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
-                        pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
+                                     &iph->saddr, ntohs(th->source),
-                                &iph->saddr, ntohs(th->source),
+                                     &iph->daddr, ntohs(th->dest),
-                                &iph->daddr, ntohs(th->dest),
+                                     genhash ? " tcp_v4_calc_md5_hash failed"
-                                genhash ? " tcp_v4_calc_md5_hash failed" : "");
+                                     : "");
-                }
+                return true;
-                return 1;
        }
-        return 0;
+        return false;
 }
 #endif
@@ -1268,7 +1280,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        __be32 saddr = ip_hdr(skb)->saddr;
        __be32 daddr = ip_hdr(skb)->daddr;
        __u32 isn = TCP_SKB_CB(skb)->when;
-        int want_cookie = 0;
+        bool want_cookie = false;
        /* Never answer to SYNs send to broadcast or multicast */
        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1327,7 +1339,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                while (l-- > 0)
                        *c++ ^= *hash_location++;
-                want_cookie = 0;        /* not our kind of cookie */
+                want_cookie = false;    /* not our kind of cookie */
                tmp_ext.cookie_out_never = 0; /* false */
                tmp_ext.cookie_plus = tmp_opt.cookie_plus;
        } else if (!tp->rx_opt.cookie_in_always) {
@@ -1355,7 +1367,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                goto drop_and_free;
        if (!want_cookie || tmp_opt.tstamp_ok)
-                TCP_ECN_create_request(req, tcp_hdr(skb));
+                TCP_ECN_create_request(req, skb);
        if (want_cookie) {
                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
@@ -1730,7 +1742,7 @@ process:
 #ifdef CONFIG_NET_DMA
                struct tcp_sock *tp = tcp_sk(sk);
                if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-                        tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
+                        tp->ucopy.dma_chan = net_dma_find_channel();
                if (tp->ucopy.dma_chan)
                        ret = tcp_v4_do_rcv(sk, skb);
                else
@@ -1739,7 +1751,8 @@ process:
                        if (!tcp_prequeue(sk, skb))
                                ret = tcp_v4_do_rcv(sk, skb);
                }
-        } else if (unlikely(sk_add_backlog(sk, skb))) {
+        } else if (unlikely(sk_add_backlog(sk, skb,
+                                           sk->sk_rcvbuf + sk->sk_sndbuf))) {
                bh_unlock_sock(sk);
                NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
                goto discard_and_relse;
@@ -1875,64 +1888,15 @@ static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
 static int tcp_v4_init_sock(struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
-        struct tcp_sock *tp = tcp_sk(sk);
-        skb_queue_head_init(&tp->out_of_order_queue);
+        tcp_init_sock(sk);
-        tcp_init_xmit_timers(sk);
-        tcp_prequeue_init(tp);
-        icsk->icsk_rto = TCP_TIMEOUT_INIT;
-        tp->mdev = TCP_TIMEOUT_INIT;
-        /* So many TCP implementations out there (incorrectly) count the
-         * initial SYN frame in their delayed-ACK and congestion control
-         * algorithms that we must have the following bandaid to talk
-         * efficiently to them.  -DaveM
-         */
-        tp->snd_cwnd = TCP_INIT_CWND;
-        /* See draft-stevens-tcpca-spec-01 for discussion of the
-         * initialization of these values.
-         */
-        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
-        tp->snd_cwnd_clamp = ~0;
-        tp->mss_cache = TCP_MSS_DEFAULT;
-        tp->reordering = sysctl_tcp_reordering;
-        icsk->icsk_ca_ops = &tcp_init_congestion_ops;
-        sk->sk_state = TCP_CLOSE;
-        sk->sk_write_space = sk_stream_write_space;
-        sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
        icsk->icsk_af_ops = &ipv4_specific;
-        icsk->icsk_sync_mss = tcp_sync_mss;
 #ifdef CONFIG_TCP_MD5SIG
-        tp->af_specific = &tcp_sock_ipv4_specific;
+        tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
 #endif
-        /* TCP Cookie Transactions */
-        if (sysctl_tcp_cookie_size > 0) {
-                /* Default, cookies without s_data_payload. */
-                tp->cookie_values =
-                        kzalloc(sizeof(*tp->cookie_values),
-                                sk->sk_allocation);
-                if (tp->cookie_values != NULL)
-                        kref_init(&tp->cookie_values->kref);
-        }
-        /* Presumed zeroed, in order of appearance:
-         *      cookie_in_always, cookie_out_never,
-         *      s_data_constant, s_data_in, s_data_out
-         */
-        sk->sk_sndbuf = sysctl_tcp_wmem[1];
-        sk->sk_rcvbuf = sysctl_tcp_rmem[1];
-        local_bh_disable();
-        sock_update_memcg(sk);
-        sk_sockets_allocated_inc(sk);
-        local_bh_enable();
        return 0;
 }
@@ -2109,7 +2073,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
        return rc;
 }
-static inline int empty_bucket(struct tcp_iter_state *st)
+static inline bool empty_bucket(struct tcp_iter_state *st)
 {
        return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
                hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3cabafb5cdd1..b85d9fe7d663 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -55,7 +55,7 @@ EXPORT_SYMBOL_GPL(tcp_death_row);
 * state.
 */
-static int tcp_remember_stamp(struct sock *sk)
+static bool tcp_remember_stamp(struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
@@ -72,13 +72,13 @@ static int tcp_remember_stamp(struct sock *sk)
                }
                if (release_it)
                        inet_putpeer(peer);
-                return 1;
+                return true;
        }
-        return 0;
+        return false;
 }
-static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
+static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
 {
        struct sock *sk = (struct sock *) tw;
        struct inet_peer *peer;
@@ -94,17 +94,17 @@ static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
                        peer->tcp_ts       = tcptw->tw_ts_recent;
                }
                inet_putpeer(peer);
-                return 1;
+                return true;
        }
-        return 0;
+        return false;
 }
-static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
+static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 {
        if (seq == s_win)
-                return 1;
+                return true;
        if (after(end_seq, s_win) && before(seq, e_win))
-                return 1;
+                return true;
        return seq == e_win && seq == end_seq;
 }
@@ -143,7 +143,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
        struct tcp_options_received tmp_opt;
        const u8 *hash_location;
        struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-        int paws_reject = 0;
+        bool paws_reject = false;
        tmp_opt.saw_tstamp = 0;
        if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
@@ -316,7 +316,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
        struct inet_timewait_sock *tw = NULL;
        const struct inet_connection_sock *icsk = inet_csk(sk);
        const struct tcp_sock *tp = tcp_sk(sk);
-        int recycle_ok = 0;
+        bool recycle_ok = false;
        if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
                recycle_ok = tcp_remember_stamp(sk);
@@ -482,6 +482,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                newtp->sacked_out = 0;
                newtp->fackets_out = 0;
                newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+                tcp_enable_early_retrans(newtp);
                /* So many TCP implementations out there (incorrectly) count the
                 * initial SYN frame in their delayed-ACK and congestion control
@@ -574,7 +575,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
        struct sock *child;
        const struct tcphdr *th = tcp_hdr(skb);
        __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
-        int paws_reject = 0;
+        bool paws_reject = false;
        tmp_opt.saw_tstamp = 0;
        if (th->doff > (sizeof(struct tcphdr)>>2)) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 364784a91939..803cbfe82fbc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -34,6 +34,8 @@
 *
 */
+#define pr_fmt(fmt) "TCP: " fmt
 #include <net/tcp.h>
 #include <linux/compiler.h>
@@ -78,9 +80,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
                tp->frto_counter = 3;
        tp->packets_out += tcp_skb_pcount(skb);
-        if (!prior_packets)
+        if (!prior_packets || tp->early_retrans_delayed)
-                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                tcp_rearm_rto(sk);
-                                          inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
 }
 /* SND.NXT, if window was not shrunk.
@@ -369,7 +370,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
        TCP_SKB_CB(skb)->end_seq = seq;
 }
-static inline int tcp_urg_mode(const struct tcp_sock *tp)
+static inline bool tcp_urg_mode(const struct tcp_sock *tp)
 {
        return tp->snd_una != tp->snd_up;
 }
@@ -563,13 +564,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 /* Compute TCP options for SYN packets. This is not the final
 * network wire format yet.
 */
-static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
                                struct tcp_out_options *opts,
                                struct tcp_md5sig_key **md5)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct tcp_cookie_values *cvp = tp->cookie_values;
-        unsigned remaining = MAX_TCP_OPTION_SPACE;
+        unsigned int remaining = MAX_TCP_OPTION_SPACE;
        u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
                         tcp_cookie_size_check(cvp->cookie_desired) :
                         0;
@@ -663,15 +664,15 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 }
 /* Set up TCP options for SYN-ACKs. */
-static unsigned tcp_synack_options(struct sock *sk,
+static unsigned int tcp_synack_options(struct sock *sk,
                                   struct request_sock *req,
-                                   unsigned mss, struct sk_buff *skb,
+                                   unsigned int mss, struct sk_buff *skb,
                                   struct tcp_out_options *opts,
                                   struct tcp_md5sig_key **md5,
                                   struct tcp_extend_values *xvp)
 {
        struct inet_request_sock *ireq = inet_rsk(req);
-        unsigned remaining = MAX_TCP_OPTION_SPACE;
+        unsigned int remaining = MAX_TCP_OPTION_SPACE;
        u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
                         xvp->cookie_plus :
                         0;
@@ -742,13 +743,13 @@ static unsigned tcp_synack_options(struct sock *sk,
 /* Compute TCP options for ESTABLISHED sockets. This is not the
 * final wire format yet.
 */
-static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
                                        struct tcp_out_options *opts,
                                        struct tcp_md5sig_key **md5)
 {
        struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
        struct tcp_sock *tp = tcp_sk(sk);
-        unsigned size = 0;
+        unsigned int size = 0;
        unsigned int eff_sacks;
 #ifdef CONFIG_TCP_MD5SIG
@@ -770,9 +771,9 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
        eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
        if (unlikely(eff_sacks)) {
-                const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
+                const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
                opts->num_sack_blocks =
-                        min_t(unsigned, eff_sacks,
+                        min_t(unsigned int, eff_sacks,
                              (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
                              TCPOLEN_SACK_PERBLOCK);
                size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -801,7 +802,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
        struct tcp_sock *tp;
        struct tcp_skb_cb *tcb;
        struct tcp_out_options opts;
-        unsigned tcp_options_size, tcp_header_size;
+        unsigned int tcp_options_size, tcp_header_size;
        struct tcp_md5sig_key *md5;
        struct tcphdr *th;
        int err;
@@ -1096,6 +1097,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
        eat = min_t(int, len, skb_headlen(skb));
        if (eat) {
                __skb_pull(skb, eat);
+                skb->avail_size -= eat;
                len -= eat;
                if (!len)
                        return;
@@ -1149,7 +1151,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 }
 /* Calculate MSS. Not accounting for SACKs here.  */
-int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
+int tcp_mtu_to_mss(struct sock *sk, int pmtu)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1160,6 +1162,14 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
         */
        mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
+        /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
+        if (icsk->icsk_af_ops->net_frag_header_len) {
+                const struct dst_entry *dst = __sk_dst_get(sk);
+                if (dst && dst_allfrag(dst))
+                        mss_now -= icsk->icsk_af_ops->net_frag_header_len;
+        }
        /* Clamp it (mss_clamp does not include tcp options) */
        if (mss_now > tp->rx_opt.mss_clamp)
                mss_now = tp->rx_opt.mss_clamp;
@@ -1178,7 +1188,7 @@ int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
 }
 /* Inverse of above */
-int tcp_mss_to_mtu(const struct sock *sk, int mss)
+int tcp_mss_to_mtu(struct sock *sk, int mss)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1189,6 +1199,13 @@ int tcp_mss_to_mtu(const struct sock *sk, int mss)
              icsk->icsk_ext_hdr_len +
              icsk->icsk_af_ops->net_header_len;
+        /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
+        if (icsk->icsk_af_ops->net_frag_header_len) {
+                const struct dst_entry *dst = __sk_dst_get(sk);
+                if (dst && dst_allfrag(dst))
+                        mtu += icsk->icsk_af_ops->net_frag_header_len;
+        }
        return mtu;
 }
@@ -1258,7 +1275,7 @@ unsigned int tcp_current_mss(struct sock *sk)
        const struct tcp_sock *tp = tcp_sk(sk);
        const struct dst_entry *dst = __sk_dst_get(sk);
        u32 mss_now;
-        unsigned header_len;
+        unsigned int header_len;
        struct tcp_out_options opts;
        struct tcp_md5sig_key *md5;
@@ -1374,33 +1391,33 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
 }
 /* Minshall's variant of the Nagle send check. */
-static inline int tcp_minshall_check(const struct tcp_sock *tp)
+static inline bool tcp_minshall_check(const struct tcp_sock *tp)
 {
        return after(tp->snd_sml, tp->snd_una) &&
                !after(tp->snd_sml, tp->snd_nxt);
 }
-/* Return 0, if packet can be sent now without violation Nagle's rules:
+/* Return false, if packet can be sent now without violation Nagle's rules:
 * 1. It is full sized.
 * 2. Or it contains FIN. (already checked by caller)
 * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
 *    With Minshall's modification: all sent small packets are ACKed.
 */
-static inline int tcp_nagle_check(const struct tcp_sock *tp,
+static inline bool tcp_nagle_check(const struct tcp_sock *tp,
                                  const struct sk_buff *skb,
-                                  unsigned mss_now, int nonagle)
+                                  unsigned int mss_now, int nonagle)
 {
        return skb->len < mss_now &&
                ((nonagle & TCP_NAGLE_CORK) ||
                 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
 }
-/* Return non-zero if the Nagle test allows this packet to be
+/* Return true if the Nagle test allows this packet to be
 * sent now.
 */
-static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
+static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
-                                 unsigned int cur_mss, int nonagle)
+                                  unsigned int cur_mss, int nonagle)
 {
        /* Nagle rule does not apply to frames, which sit in the middle of the
         * write_queue (they have no chances to get new data).
@@ -1409,24 +1426,25 @@ static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff
         * argument based upon the location of SKB in the send queue.
         */
        if (nonagle & TCP_NAGLE_PUSH)
-                return 1;
+                return true;
        /* Don't use the nagle rule for urgent data (or for the final FIN).
         * Nagle can be ignored during F-RTO too (see RFC4138).
         */
        if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
            (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
-                return 1;
+                return true;
        if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
-                return 1;
+                return true;
-        return 0;
+        return false;
 }
 /* Does at least the first segment of SKB fit into the send window? */
-static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
+static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
-                                   unsigned int cur_mss)
+                             const struct sk_buff *skb,
+                             unsigned int cur_mss)
 {
        u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -1459,7 +1477,7 @@ static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
 }
 /* Test if sending is allowed right now. */
-int tcp_may_send_now(struct sock *sk)
+bool tcp_may_send_now(struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb = tcp_send_head(sk);
@@ -1529,7 +1547,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 *
 * This algorithm is from John Heffner.
 */
-static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1589,11 +1607,11 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
        /* Ok, it looks like it is advisable to defer.  */
        tp->tso_deferred = 1 | (jiffies << 1);
-        return 1;
+        return true;
 send_now:
        tp->tso_deferred = 0;
-        return 0;
+        return false;
 }
 /* Create a new MTU probe if we are ready.
@@ -1735,11 +1753,11 @@ static int tcp_mtu_probe(struct sock *sk)
 * snd_up-64k-mss .. snd_up cannot be large. However, taking into
 * account rare use of URG, this is not a big flaw.
 *
- * Returns 1, if no segments are in flight and we have queued segments, but
+ * Returns true, if no segments are in flight and we have queued segments,
- * cannot send anything now because of SWS or another problem.
+ * but cannot send anything now because of SWS or another problem.
 */
-static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
+static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
-                          int push_one, gfp_t gfp)
+                           int push_one, gfp_t gfp)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
@@ -1753,7 +1771,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                /* Do MTU probing. */
                result = tcp_mtu_probe(sk);
                if (!result) {
-                        return 0;
+                        return false;
                } else if (result > 0) {
                        sent_pkts = 1;
                }
@@ -1812,7 +1830,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
        if (likely(sent_pkts)) {
                tcp_cwnd_validate(sk);
-                return 0;
+                return false;
        }
        return !tp->packets_out && tcp_send_head(sk);
 }
@@ -2011,22 +2029,22 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 }
 /* Check if coalescing SKBs is legal. */
-static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
+static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
 {
        if (tcp_skb_pcount(skb) > 1)
-                return 0;
+                return false;
        /* TODO: SACK collapsing could be used to remove this condition */
        if (skb_shinfo(skb)->nr_frags != 0)
-                return 0;
+                return false;
        if (skb_cloned(skb))
-                return 0;
+                return false;
        if (skb == tcp_send_head(sk))
-                return 0;
+                return false;
        /* Some heurestics for collapsing over SACK'd could be invented */
        if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
-                return 0;
+                return false;
-        return 1;
+        return true;
 }
 /* Collapse packets in the retransmit queue to make to create
@@ -2037,7 +2055,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb = to, *tmp;
-        int first = 1;
+        bool first = true;
        if (!sysctl_tcp_retrans_collapse)
                return;
@@ -2051,7 +2069,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
                space -= skb->len;
                if (first) {
-                        first = 0;
+                        first = false;
                        continue;
                }
@@ -2060,7 +2078,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
                /* Punt if not enough space exists in the first SKB for
                 * the data in the second
                 */
-                if (skb->len > skb_tailroom(to))
+                if (skb->len > skb_availroom(to))
                        break;
                if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
@@ -2166,8 +2184,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 #if FASTRETRANS_DEBUG > 0
                if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
-                        if (net_ratelimit())
+                        net_dbg_ratelimited("retrans_out leaked\n");
-                                printk(KERN_DEBUG "retrans_out leaked.\n");
                }
 #endif
                if (!tp->retrans_out)
@@ -2192,18 +2209,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 /* Check if we forward retransmits are possible in the current
 * window/congestion state.
 */
-static int tcp_can_forward_retransmit(struct sock *sk)
+static bool tcp_can_forward_retransmit(struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        const struct tcp_sock *tp = tcp_sk(sk);
        /* Forward retransmissions are possible only during Recovery. */
        if (icsk->icsk_ca_state != TCP_CA_Recovery)
-                return 0;
+                return false;
        /* No forward retransmissions in Reno are possible. */
        if (tcp_is_reno(tp))
-                return 0;
+                return false;
        /* Yeah, we have to make difficult choice between forward transmission
         * and retransmission... Both ways have their merits...
@@ -2214,9 +2231,9 @@ static int tcp_can_forward_retransmit(struct sock *sk)
         */
        if (tcp_may_send_now(sk))
-                return 0;
+                return false;
-        return 1;
+        return true;
 }
 /* This gets called after a retransmit timeout, and the initially
@@ -2401,7 +2418,7 @@ int tcp_send_synack(struct sock *sk)
        skb = tcp_write_queue_head(sk);
        if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
-                printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
+                pr_debug("%s: wrong queue state\n", __func__);
                return -EFAULT;
        }
        if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
@@ -2561,7 +2578,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 EXPORT_SYMBOL(tcp_make_synack);
 /* Do all connect socket setups that can be done AF independent. */
-static void tcp_connect_init(struct sock *sk)
+void tcp_connect_init(struct sock *sk)
 {
        const struct dst_entry *dst = __sk_dst_get(sk);
        struct tcp_sock *tp = tcp_sk(sk);
@@ -2616,9 +2633,12 @@ static void tcp_connect_init(struct sock *sk)
        tp->snd_una = tp->write_seq;
        tp->snd_sml = tp->write_seq;
        tp->snd_up = tp->write_seq;
-        tp->rcv_nxt = 0;
+        tp->snd_nxt = tp->write_seq;
-        tp->rcv_wup = 0;
-        tp->copied_seq = 0;
+        if (likely(!tp->repair))
+                tp->rcv_nxt = 0;
+        tp->rcv_wup = tp->rcv_nxt;
+        tp->copied_seq = tp->rcv_nxt;
        inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
        inet_csk(sk)->icsk_retransmits = 0;
@@ -2641,7 +2661,6 @@ int tcp_connect(struct sock *sk)
        /* Reserve space for headers. */
        skb_reserve(buff, MAX_TCP_HEADER);
-        tp->snd_nxt = tp->write_seq;
        tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
        TCP_ECN_send_syn(sk, buff);
@@ -2790,6 +2809,15 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
        return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
+void tcp_send_window_probe(struct sock *sk)
+{
+        if (sk->sk_state == TCP_ESTABLISHED) {
+                tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
+                tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
+                tcp_xmit_probe_skb(sk, 0);
+        }
+}
 /* Initiate keepalive or window probe from timer. */
 int tcp_write_wakeup(struct sock *sk)
 {
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index a981cdc0a6e9..4526fe68e60e 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -91,7 +91,7 @@ static inline int tcp_probe_avail(void)
 * Note: arguments must match tcp_rcv_established()!
 */
 static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-                               struct tcphdr *th, unsigned len)
+                               struct tcphdr *th, unsigned int len)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_sock *inet = inet_sk(sk);
@@ -138,7 +138,7 @@ static struct jprobe tcp_jprobe = {
        .entry  = jtcp_rcv_established,
 };
-static int tcpprobe_open(struct inode * inode, struct file * file)
+static int tcpprobe_open(struct inode *inode, struct file *file)
 {
        /* Reset (empty) log */
        spin_lock_bh(&tcp_probe.lock);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 34d4a02c2f16..e911e6c523ec 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -319,6 +319,11 @@ void tcp_retransmit_timer(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
+        if (tp->early_retrans_delayed) {
+                tcp_resume_early_retransmit(sk);
+                return;
+        }
        if (!tp->packets_out)
                goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fe141052a1be..609397ee78fb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -107,6 +107,7 @@
 #include <net/checksum.h>
 #include <net/xfrm.h>
 #include <trace/events/udp.h>
+#include <linux/static_key.h>
 #include "udp_impl.h"
 struct udp_table udp_table __read_mostly;
@@ -206,7 +207,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
        if (!snum) {
                int low, high, remaining;
-                unsigned rand;
+                unsigned int rand;
                unsigned short first, last;
                DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
@@ -846,7 +847,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
         *      Get and verify the address.
         */
        if (msg->msg_name) {
-                struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name;
+                struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
                if (msg->msg_namelen < sizeof(*usin))
                        return -EINVAL;
                if (usin->sin_family != AF_INET) {
@@ -1379,6 +1380,14 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 }
+static struct static_key udp_encap_needed __read_mostly;
+void udp_encap_enable(void)
+{
+        if (!static_key_enabled(&udp_encap_needed))
+                static_key_slow_inc(&udp_encap_needed);
+}
+EXPORT_SYMBOL(udp_encap_enable);
 /* returns:
 *  -1: error
 *   0: success
@@ -1400,7 +1409,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                goto drop;
        nf_reset(skb);
-        if (up->encap_type) {
+        if (static_key_false(&udp_encap_needed) && up->encap_type) {
                int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
                /*
@@ -1470,7 +1479,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                goto drop;
-        if (sk_rcvqueues_full(sk, skb))
+        if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
                goto drop;
        rc = 0;
@@ -1479,7 +1488,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
        bh_lock_sock(sk);
        if (!sock_owned_by_user(sk))
                rc = __udp_queue_rcv_skb(sk, skb);
-        else if (sk_add_backlog(sk, skb)) {
+        else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
                bh_unlock_sock(sk);
                goto drop;
        }
@@ -1760,6 +1769,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
                        /* FALLTHROUGH */
                case UDP_ENCAP_L2TPINUDP:
                        up->encap_type = val;
+                        udp_encap_enable();
                        break;
                default:
                        err = -ENOPROTOOPT;
@@ -2163,9 +2173,15 @@ void udp4_proc_exit(void)
 static __initdata unsigned long uhash_entries;
 static int __init set_uhash_entries(char *str)
 {
+        ssize_t ret;
        if (!str)
                return 0;
-        uhash_entries = simple_strtoul(str, &str, 0);
+        ret = kstrtoul(str, 0, &uhash_entries);
+        if (ret)
+                return 0;
        if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
                uhash_entries = UDP_HTABLE_SIZE_MIN;
        return 1;
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 8a949f19deb6..a7f86a3cd502 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -146,9 +146,17 @@ static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
        return udp_dump_one(&udp_table, in_skb, nlh, req);
 }
+static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+                void *info)
+{
+        r->idiag_rqueue = sk_rmem_alloc_get(sk);
+        r->idiag_wqueue = sk_wmem_alloc_get(sk);
+}
 static const struct inet_diag_handler udp_diag_handler = {
        .dump            = udp_diag_dump,
        .dump_one        = udp_diag_dump_one,
+        .idiag_get_info  = udp_diag_get_info,
        .idiag_type      = IPPROTO_UDP,
 };
@@ -167,6 +175,7 @@ static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *
 static const struct inet_diag_handler udplite_diag_handler = {
        .dump            = udplite_diag_dump,
        .dump_one        = udplite_diag_dump_one,
+        .idiag_get_info  = udp_diag_get_info,
        .idiag_type      = IPPROTO_UDPLITE,
 };
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index aaad650d47d9..5a681e298b90 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -25,7 +25,7 @@ extern int	udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                            size_t len, int noblock, int flags, int *addr_len);
 extern int      udp_sendpage(struct sock *sk, struct page *page, int offset,
                             size_t size, int flags);
-extern int      udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
+extern int      udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 extern void     udp_destroy_sock(struct sock *sk);
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a0b4c5da8d43..0d3426cb5c4f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -152,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
                case IPPROTO_AH:
                        if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
-                                __be32 *ah_hdr = (__be32*)xprth;
+                                __be32 *ah_hdr = (__be32 *)xprth;
                                fl4->fl4_ipsec_spi = ah_hdr[1];
                        }
@@ -298,8 +298,8 @@ void __init xfrm4_init(int rt_max_size)
        xfrm4_state_init();
        xfrm4_policy_init();
 #ifdef CONFIG_SYSCTL
-        sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
+        sysctl_hdr = register_net_sysctl(&init_net, "net/ipv4",
-                                                xfrm4_policy_table);
+                                         xfrm4_policy_table);
 #endif
 }