42 files changed, 1342 insertions, 999 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 05c57f0fcabe..dbc10d84161f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -307,6 +307,10 @@ config NET_IPVTI
          the notion of a secure tunnel for IPSEC and then use routing protocol
          on top.
+config NET_UDP_TUNNEL
+        tristate
+        default n
 config INET_AH
        tristate "IP: AH transformation"
        select XFRM_ALGO
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f032688d20d3..8ee1cd4053ee 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_NET_IPIP) += ipip.o
 gre-y := gre_demux.o
 obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
 obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o
 obj-$(CONFIG_NET_IPVTI) += ip_vti.o
 obj-$(CONFIG_SYN_COOKIES) += syncookies.o
 obj-$(CONFIG_INET_AH) += ah4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d5e6836cf772..d156b3c5f363 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1429,6 +1429,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff)
        int proto = iph->protocol;
        int err = -ENOSYS;
+        if (skb->encapsulation)
+                skb_set_inner_network_header(skb, nhoff);
        csum_replace2(&iph->check, iph->tot_len, newlen);
        iph->tot_len = newlen;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index a3095fdefbed..90c0e8386116 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        inet->inet_daddr = fl4->daddr;
        inet->inet_dport = usin->sin_port;
        sk->sk_state = TCP_ESTABLISHED;
+        inet_set_txhash(sk);
        inet->inet_id = jiffies;
        sk_dst_set(sk, &rt->dst);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e9449376b58e..214882e7d6de 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -180,11 +180,12 @@ static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
                         int destroy);
 #ifdef CONFIG_SYSCTL
-static void devinet_sysctl_register(struct in_device *idev);
+static int devinet_sysctl_register(struct in_device *idev);
 static void devinet_sysctl_unregister(struct in_device *idev);
 #else
-static void devinet_sysctl_register(struct in_device *idev)
+static int devinet_sysctl_register(struct in_device *idev)
 {
+        return 0;
 }
 static void devinet_sysctl_unregister(struct in_device *idev)
 {
@@ -232,6 +233,7 @@ EXPORT_SYMBOL(in_dev_finish_destroy);
 static struct in_device *inetdev_init(struct net_device *dev)
 {
        struct in_device *in_dev;
+        int err = -ENOMEM;
        ASSERT_RTNL();
@@ -252,7 +254,13 @@ static struct in_device *inetdev_init(struct net_device *dev)
        /* Account for reference dev->ip_ptr (below) */
        in_dev_hold(in_dev);
-        devinet_sysctl_register(in_dev);
+        err = devinet_sysctl_register(in_dev);
+        if (err) {
+                in_dev->dead = 1;
+                in_dev_put(in_dev);
+                in_dev = NULL;
+                goto out;
+        }
        ip_mc_init_dev(in_dev);
        if (dev->flags & IFF_UP)
                ip_mc_up(in_dev);
@@ -260,7 +268,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
        /* we can receive as soon as ip_ptr is set -- do this last */
        rcu_assign_pointer(dev->ip_ptr, in_dev);
 out:
-        return in_dev;
+        return in_dev ?: ERR_PTR(err);
 out_kfree:
        kfree(in_dev);
        in_dev = NULL;
@@ -1347,8 +1355,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
        if (!in_dev) {
                if (event == NETDEV_REGISTER) {
                        in_dev = inetdev_init(dev);
-                        if (!in_dev)
+                        if (IS_ERR(in_dev))
-                                return notifier_from_errno(-ENOMEM);
+                                return notifier_from_errno(PTR_ERR(in_dev));
                        if (dev->flags & IFF_LOOPBACK) {
                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
@@ -2182,11 +2190,21 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
        kfree(t);
 }
-static void devinet_sysctl_register(struct in_device *idev)
+static int devinet_sysctl_register(struct in_device *idev)
 {
-        neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
+        int err;
-        __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
+        if (!sysctl_dev_name_is_allowed(idev->dev->name))
+                return -EINVAL;
+        err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
+        if (err)
+                return err;
+        err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
                                        &idev->cnf);
+        if (err)
+                neigh_sysctl_unregister(idev->arp_parms);
+        return err;
 }
 static void devinet_sysctl_unregister(struct in_device *idev)
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 4e9619bca732..0485bf7f8f03 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -68,6 +68,7 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
        skb_push(skb, hdr_len);
+        skb_reset_transport_header(skb);
        greh = (struct gre_base_hdr *)skb->data;
        greh->flags = tnl_flags_to_gre_flags(tpi->flags);
        greh->protocol = tpi->proto;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index eb92deb12666..6556263c8fa5 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -74,7 +74,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
        /* segment inner packet. */
        enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
        segs = skb_mac_gso_segment(skb, enc_features);
-        if (!segs || IS_ERR(segs)) {
+        if (IS_ERR_OR_NULL(segs)) {
                skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
                goto out;
        }
@@ -263,6 +263,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
        int err = -ENOENT;
        __be16 type;
+        skb->encapsulation = 1;
+        skb_shinfo(skb)->gso_type = SKB_GSO_GRE;
        type = greh->protocol;
        if (greh->flags & GRE_KEY)
                grehlen += GRE_HEADER_SECTION;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 79c3d947a481..42b7bcf8045b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -739,8 +739,6 @@ static void icmp_unreach(struct sk_buff *skb)
                                /* fall through */
                        case 0:
                                info = ntohs(icmph->un.frag.mtu);
-                                if (!info)
-                                        goto out;
                        }
                        break;
                case ICMP_SR_FAILED:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6748d420f714..f10eab462282 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1321,7 +1321,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
        atomic_set(&im->refcnt, 1);
        spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
-        setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
+        setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
        im->unsolicit_count = IGMP_Unsolicited_Report_Count;
 #endif
@@ -1944,6 +1944,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
        rtnl_lock();
        in_dev = ip_mc_find_dev(net, imr);
+        if (!in_dev) {
+                ret = -ENODEV;
+                goto out;
+        }
        ifindex = imr->imr_ifindex;
        for (imlp = &inet->mc_list;
             (iml = rtnl_dereference(*imlp)) != NULL;
@@ -1961,16 +1965,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
                *imlp = iml->next_rcu;
-                if (in_dev)
+                ip_mc_dec_group(in_dev, group);
-                        ip_mc_dec_group(in_dev, group);
                rtnl_unlock();
                /* decrease mem now to avoid the memleak warning */
                atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
                kfree_rcu(iml, rcu);
                return 0;
        }
-        if (!in_dev)
+out:
-                ret = -ENODEV;
        rtnl_unlock();
        return ret;
 }
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 3b01959bf4bb..62b1f73749dc 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,6 +25,12 @@
 #include <net/inet_frag.h>
 #include <net/inet_ecn.h>
+#define INETFRAGS_EVICT_BUCKETS   128
+#define INETFRAGS_EVICT_MAX       512
+/* don't rebuild inetfrag table with new secret more often than this */
+#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
 * Value : 0xff if frame should be dropped.
 *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -46,24 +52,39 @@ const u8 ip_frag_ecn_table[16] = {
 };
 EXPORT_SYMBOL(ip_frag_ecn_table);
-static void inet_frag_secret_rebuild(unsigned long dummy)
+static unsigned int
+inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
+{
+        return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
+}
+static bool inet_frag_may_rebuild(struct inet_frags *f)
+{
+        return time_after(jiffies,
+               f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
+}
+static void inet_frag_secret_rebuild(struct inet_frags *f)
 {
-        struct inet_frags *f = (struct inet_frags *)dummy;
-        unsigned long now = jiffies;
        int i;
-        /* Per bucket lock NOT needed here, due to write lock protection */
+        write_seqlock_bh(&f->rnd_seqlock);
-        write_lock(&f->lock);
+        if (!inet_frag_may_rebuild(f))
+                goto out;
        get_random_bytes(&f->rnd, sizeof(u32));
        for (i = 0; i < INETFRAGS_HASHSZ; i++) {
                struct inet_frag_bucket *hb;
                struct inet_frag_queue *q;
                struct hlist_node *n;
                hb = &f->hash[i];
+                spin_lock(&hb->chain_lock);
                hlist_for_each_entry_safe(q, n, &hb->chain, list) {
-                        unsigned int hval = f->hashfn(q);
+                        unsigned int hval = inet_frag_hashfn(f, q);
                        if (hval != i) {
                                struct inet_frag_bucket *hb_dest;
@@ -72,76 +93,195 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
                                /* Relink to new hash chain. */
                                hb_dest = &f->hash[hval];
+                                /* This is the only place where we take
+                                 * another chain_lock while already holding
+                                 * one.  As this will not run concurrently,
+                                 * we cannot deadlock on hb_dest lock below, if its
+                                 * already locked it will be released soon since
+                                 * other caller cannot be waiting for hb lock
+                                 * that we've taken above.
+                                 */
+                                spin_lock_nested(&hb_dest->chain_lock,
+                                                 SINGLE_DEPTH_NESTING);
                                hlist_add_head(&q->list, &hb_dest->chain);
+                                spin_unlock(&hb_dest->chain_lock);
                        }
                }
+                spin_unlock(&hb->chain_lock);
        }
-        write_unlock(&f->lock);
-        mod_timer(&f->secret_timer, now + f->secret_interval);
+        f->rebuild = false;
+        f->last_rebuild_jiffies = jiffies;
+out:
+        write_sequnlock_bh(&f->rnd_seqlock);
+}
+static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
+{
+        return q->net->low_thresh == 0 ||
+               frag_mem_limit(q->net) >= q->net->low_thresh;
+}
+static unsigned int
+inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
+{
+        struct inet_frag_queue *fq;
+        struct hlist_node *n;
+        unsigned int evicted = 0;
+        HLIST_HEAD(expired);
+evict_again:
+        spin_lock(&hb->chain_lock);
+        hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
+                if (!inet_fragq_should_evict(fq))
+                        continue;
+                if (!del_timer(&fq->timer)) {
+                        /* q expiring right now thus increment its refcount so
+                         * it won't be freed under us and wait until the timer
+                         * has finished executing then destroy it
+                         */
+                        atomic_inc(&fq->refcnt);
+                        spin_unlock(&hb->chain_lock);
+                        del_timer_sync(&fq->timer);
+                        WARN_ON(atomic_read(&fq->refcnt) != 1);
+                        inet_frag_put(fq, f);
+                        goto evict_again;
+                }
+                /* suppress xmit of (icmp) error packet */
+                fq->last_in &= ~INET_FRAG_FIRST_IN;
+                fq->last_in |= INET_FRAG_EVICTED;
+                hlist_del(&fq->list);
+                hlist_add_head(&fq->list, &expired);
+                ++evicted;
+        }
+        spin_unlock(&hb->chain_lock);
+        hlist_for_each_entry_safe(fq, n, &expired, list)
+                f->frag_expire((unsigned long) fq);
+        return evicted;
+}
+static void inet_frag_worker(struct work_struct *work)
+{
+        unsigned int budget = INETFRAGS_EVICT_BUCKETS;
+        unsigned int i, evicted = 0;
+        struct inet_frags *f;
+        f = container_of(work, struct inet_frags, frags_work);
+        BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
+        local_bh_disable();
+        for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+                evicted += inet_evict_bucket(f, &f->hash[i]);
+                i = (i + 1) & (INETFRAGS_HASHSZ - 1);
+                if (evicted > INETFRAGS_EVICT_MAX)
+                        break;
+        }
+        f->next_bucket = i;
+        local_bh_enable();
+        if (f->rebuild && inet_frag_may_rebuild(f))
+                inet_frag_secret_rebuild(f);
+}
+static void inet_frag_schedule_worker(struct inet_frags *f)
+{
+        if (unlikely(!work_pending(&f->frags_work)))
+                schedule_work(&f->frags_work);
 }
 void inet_frags_init(struct inet_frags *f)
 {
        int i;
+        INIT_WORK(&f->frags_work, inet_frag_worker);
        for (i = 0; i < INETFRAGS_HASHSZ; i++) {
                struct inet_frag_bucket *hb = &f->hash[i];
                spin_lock_init(&hb->chain_lock);
                INIT_HLIST_HEAD(&hb->chain);
        }
-        rwlock_init(&f->lock);
-        setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
+        seqlock_init(&f->rnd_seqlock);
-                        (unsigned long)f);
+        f->last_rebuild_jiffies = 0;
-        f->secret_timer.expires = jiffies + f->secret_interval;
-        add_timer(&f->secret_timer);
 }
 EXPORT_SYMBOL(inet_frags_init);
 void inet_frags_init_net(struct netns_frags *nf)
 {
-        nf->nqueues = 0;
        init_frag_mem_limit(nf);
-        INIT_LIST_HEAD(&nf->lru_list);
-        spin_lock_init(&nf->lru_lock);
 }
 EXPORT_SYMBOL(inet_frags_init_net);
 void inet_frags_fini(struct inet_frags *f)
 {
-        del_timer(&f->secret_timer);
+        cancel_work_sync(&f->frags_work);
 }
 EXPORT_SYMBOL(inet_frags_fini);
 void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
 {
-        nf->low_thresh = 0;
+        unsigned int seq;
+        int i;
+        nf->low_thresh = 0;
        local_bh_disable();
-        inet_frag_evictor(nf, f, true);
+evict_again:
+        seq = read_seqbegin(&f->rnd_seqlock);
+        for (i = 0; i < INETFRAGS_HASHSZ ; i++)
+                inet_evict_bucket(f, &f->hash[i]);
+        if (read_seqretry(&f->rnd_seqlock, seq))
+                goto evict_again;
        local_bh_enable();
        percpu_counter_destroy(&nf->mem);
 }
 EXPORT_SYMBOL(inet_frags_exit_net);
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+static struct inet_frag_bucket *
+get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
+__acquires(hb->chain_lock)
 {
        struct inet_frag_bucket *hb;
-        unsigned int hash;
+        unsigned int seq, hash;
+ restart:
+        seq = read_seqbegin(&f->rnd_seqlock);
-        read_lock(&f->lock);
+        hash = inet_frag_hashfn(f, fq);
-        hash = f->hashfn(fq);
        hb = &f->hash[hash];
        spin_lock(&hb->chain_lock);
+        if (read_seqretry(&f->rnd_seqlock, seq)) {
+                spin_unlock(&hb->chain_lock);
+                goto restart;
+        }
+        return hb;
+}
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+        struct inet_frag_bucket *hb;
+        hb = get_frag_bucket_locked(fq, f);
        hlist_del(&fq->list);
        spin_unlock(&hb->chain_lock);
-        read_unlock(&f->lock);
-        inet_frag_lru_del(fq);
 }
 void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -165,8 +305,7 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
        kfree_skb(skb);
 }
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
+void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
-                                        int *work)
 {
        struct sk_buff *fp;
        struct netns_frags *nf;
@@ -186,86 +325,30 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
                fp = xp;
        }
        sum = sum_truesize + f->qsize;
-        if (work)
-                *work -= sum;
        sub_frag_mem_limit(q, sum);
        if (f->destructor)
                f->destructor(q);
        kfree(q);
 }
 EXPORT_SYMBOL(inet_frag_destroy);
-int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
-{
-        struct inet_frag_queue *q;
-        int work, evicted = 0;
-        if (!force) {
-                if (frag_mem_limit(nf) <= nf->high_thresh)
-                        return 0;
-        }
-        work = frag_mem_limit(nf) - nf->low_thresh;
-        while (work > 0 || force) {
-                spin_lock(&nf->lru_lock);
-                if (list_empty(&nf->lru_list)) {
-                        spin_unlock(&nf->lru_lock);
-                        break;
-                }
-                q = list_first_entry(&nf->lru_list,
-                                struct inet_frag_queue, lru_list);
-                atomic_inc(&q->refcnt);
-                /* Remove q from list to avoid several CPUs grabbing it */
-                list_del_init(&q->lru_list);
-                spin_unlock(&nf->lru_lock);
-                spin_lock(&q->lock);
-                if (!(q->last_in & INET_FRAG_COMPLETE))
-                        inet_frag_kill(q, f);
-                spin_unlock(&q->lock);
-                if (atomic_dec_and_test(&q->refcnt))
-                        inet_frag_destroy(q, f, &work);
-                evicted++;
-        }
-        return evicted;
-}
-EXPORT_SYMBOL(inet_frag_evictor);
 static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
                struct inet_frag_queue *qp_in, struct inet_frags *f,
                void *arg)
 {
-        struct inet_frag_bucket *hb;
+        struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
        struct inet_frag_queue *qp;
-        unsigned int hash;
-        read_lock(&f->lock); /* Protects against hash rebuild */
-        /*
-         * While we stayed w/o the lock other CPU could update
-         * the rnd seed, so we need to re-calculate the hash
-         * chain. Fortunatelly the qp_in can be used to get one.
-         */
-        hash = f->hashfn(qp_in);
-        hb = &f->hash[hash];
-        spin_lock(&hb->chain_lock);
 #ifdef CONFIG_SMP
        /* With SMP race we have to recheck hash table, because
-         * such entry could be created on other cpu, while we
+         * such entry could have been created on other cpu before
-         * released the hash bucket lock.
+         * we acquired hash bucket lock.
         */
        hlist_for_each_entry(qp, &hb->chain, list) {
                if (qp->net == nf && f->match(qp, arg)) {
                        atomic_inc(&qp->refcnt);
                        spin_unlock(&hb->chain_lock);
-                        read_unlock(&f->lock);
                        qp_in->last_in |= INET_FRAG_COMPLETE;
                        inet_frag_put(qp_in, f);
                        return qp;
@@ -278,9 +361,8 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
        atomic_inc(&qp->refcnt);
        hlist_add_head(&qp->list, &hb->chain);
-        inet_frag_lru_add(nf, qp);
        spin_unlock(&hb->chain_lock);
-        read_unlock(&f->lock);
        return qp;
 }
@@ -290,6 +372,11 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 {
        struct inet_frag_queue *q;
+        if (frag_mem_limit(nf) > nf->high_thresh) {
+                inet_frag_schedule_worker(f);
+                return NULL;
+        }
        q = kzalloc(f->qsize, GFP_ATOMIC);
        if (q == NULL)
                return NULL;
@@ -301,7 +388,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
        setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
        spin_lock_init(&q->lock);
        atomic_set(&q->refcnt, 1);
-        INIT_LIST_HEAD(&q->lru_list);
        return q;
 }
@@ -320,12 +406,15 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
                struct inet_frags *f, void *key, unsigned int hash)
-        __releases(&f->lock)
 {
        struct inet_frag_bucket *hb;
        struct inet_frag_queue *q;
        int depth = 0;
+        if (frag_mem_limit(nf) > nf->low_thresh)
+                inet_frag_schedule_worker(f);
+        hash &= (INETFRAGS_HASHSZ - 1);
        hb = &f->hash[hash];
        spin_lock(&hb->chain_lock);
@@ -333,18 +422,22 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
                if (q->net == nf && f->match(q, key)) {
                        atomic_inc(&q->refcnt);
                        spin_unlock(&hb->chain_lock);
-                        read_unlock(&f->lock);
                        return q;
                }
                depth++;
        }
        spin_unlock(&hb->chain_lock);
-        read_unlock(&f->lock);
        if (depth <= INETFRAGS_MAXDEPTH)
                return inet_frag_create(nf, f, key);
-        else
-                return ERR_PTR(-ENOBUFS);
+        if (inet_frag_may_rebuild(f)) {
+                if (!f->rebuild)
+                        f->rebuild = true;
+                inet_frag_schedule_worker(f);
+        }
+        return ERR_PTR(-ENOBUFS);
 }
 EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index ed32313e307c..634fc31aa243 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -86,11 +86,6 @@ static inline u8 ip4_frag_ecn(u8 tos)
 static struct inet_frags ip4_frags;
-int ip_frag_nqueues(struct net *net)
-{
-        return net->ipv4.frags.nqueues;
-}
 int ip_frag_mem(struct net *net)
 {
        return sum_frag_mem_limit(&net->ipv4.frags);
@@ -109,21 +104,21 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
        net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
        return jhash_3words((__force u32)id << 16 | prot,
                            (__force u32)saddr, (__force u32)daddr,
-                            ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
+                            ip4_frags.rnd);
 }
-static unsigned int ip4_hashfn(struct inet_frag_queue *q)
+static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
 {
-        struct ipq *ipq;
+        const struct ipq *ipq;
        ipq = container_of(q, struct ipq, q);
        return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
 }
-static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
+static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
 {
-        struct ipq *qp;
+        const struct ipq *qp;
-        struct ip4_create_arg *arg = a;
+        const struct ip4_create_arg *arg = a;
        qp = container_of(q, struct ipq, q);
        return  qp->id == arg->iph->id &&
@@ -133,14 +128,14 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
                qp->user == arg->user;
 }
-static void ip4_frag_init(struct inet_frag_queue *q, void *a)
+static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
 {
        struct ipq *qp = container_of(q, struct ipq, q);
        struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
                                               frags);
        struct net *net = container_of(ipv4, struct net, ipv4);
-        struct ip4_create_arg *arg = a;
+        const struct ip4_create_arg *arg = a;
        qp->protocol = arg->iph->protocol;
        qp->id = arg->iph->id;
@@ -177,18 +172,6 @@ static void ipq_kill(struct ipq *ipq)
        inet_frag_kill(&ipq->q, &ip4_frags);
 }
-/* Memory limiting on fragments.  Evictor trashes the oldest
- * fragment queue until we are back under the threshold.
- */
-static void ip_evictor(struct net *net)
-{
-        int evicted;
-        evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
-        if (evicted)
-                IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
-}
 /*
 * Oops, a fragment queue timed out.  Kill it and send an ICMP reply.
 */
@@ -207,7 +190,8 @@ static void ip_expire(unsigned long arg)
        ipq_kill(qp);
-        IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+        if (!(qp->q.last_in & INET_FRAG_EVICTED))
+                IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
        IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
        if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
@@ -260,7 +244,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
        arg.iph = iph;
        arg.user = user;
-        read_lock(&ip4_frags.lock);
        hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
        q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
@@ -505,7 +488,6 @@ found:
        }
        skb_dst_drop(skb);
-        inet_frag_lru_move(&qp->q);
        return -EINPROGRESS;
 err:
@@ -655,9 +637,6 @@ int ip_defrag(struct sk_buff *skb, u32 user)
        net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
        IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
-        /* Start by cleaning up the memory. */
-        ip_evictor(net);
        /* Lookup (or create) queue header */
        if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
                int ret;
@@ -721,14 +700,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
                .data           = &init_net.ipv4.frags.high_thresh,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &init_net.ipv4.frags.low_thresh
        },
        {
                .procname       = "ipfrag_low_thresh",
                .data           = &init_net.ipv4.frags.low_thresh,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec
+                .proc_handler   = proc_dointvec_minmax,
+                .extra1         = &zero,
+                .extra2         = &init_net.ipv4.frags.high_thresh
        },
        {
                .procname       = "ipfrag_time",
@@ -740,10 +722,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
        { }
 };
+/* secret interval has been deprecated */
+static int ip4_frags_secret_interval_unused;
 static struct ctl_table ip4_frags_ctl_table[] = {
        {
                .procname       = "ipfrag_secret_interval",
-                .data           = &ip4_frags.secret_interval,
+                .data           = &ip4_frags_secret_interval_unused,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_jiffies,
@@ -771,7 +755,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
                        goto err_alloc;
                table[0].data = &net->ipv4.frags.high_thresh;
+                table[0].extra1 = &net->ipv4.frags.low_thresh;
+                table[0].extra2 = &init_net.ipv4.frags.high_thresh;
                table[1].data = &net->ipv4.frags.low_thresh;
+                table[1].extra2 = &net->ipv4.frags.high_thresh;
                table[2].data = &net->ipv4.frags.timeout;
                /* Don't export sysctls to unprivileged users */
@@ -873,6 +860,5 @@ void __init ipfrag_init(void)
        ip4_frags.qsize = sizeof(struct ipq);
        ip4_frags.match = ip4_frag_match;
        ip4_frags.frag_expire = ip_expire;
-        ip4_frags.secret_interval = 10 * 60 * HZ;
        inet_frags_init(&ip4_frags);
 }
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 5e7aecea05cd..ad382499bace 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net,
                        optptr++;
                        continue;
                }
+                if (unlikely(l < 2)) {
+                        pp_ptr = optptr;
+                        goto error;
+                }
                optlen = optptr[1];
                if (optlen < 2 || optlen > l) {
                        pp_ptr = optptr;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8d3b6b0e9857..b16556836d66 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -962,10 +962,6 @@ alloc_new_skb:
                                                           sk->sk_allocation);
                                if (unlikely(skb == NULL))
                                        err = -ENOBUFS;
-                                else
-                                        /* only the initial fragment is
-                                           time stamped */
-                                        cork->tx_flags = 0;
                        }
                        if (skb == NULL)
                                goto error;
@@ -976,7 +972,10 @@ alloc_new_skb:
                        skb->ip_summed = csummode;
                        skb->csum = 0;
                        skb_reserve(skb, hh_len);
+                        /* only the initial fragment is time stamped */
                        skb_shinfo(skb)->tx_flags = cork->tx_flags;
+                        cork->tx_flags = 0;
                        /*
                         *      Find where to start putting bytes.
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64741b938632..5cb830c78990 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1319,7 +1319,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
                if (sk->sk_type != SOCK_STREAM)
                        return -ENOPROTOOPT;
-                msg.msg_control = optval;
+                msg.msg_control = (__force void *) optval;
                msg.msg_controllen = len;
                msg.msg_flags = flags;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 097b3e7c1e8f..dd8c8c765799 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -73,12 +73,7 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
 {
        struct dst_entry *old_dst;
-        if (dst) {
+        dst_clone(dst);
-                if (dst->flags & DST_NOCACHE)
-                        dst = NULL;
-                else
-                        dst_clone(dst);
-        }
        old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
        dst_release(old_dst);
 }
@@ -108,13 +103,14 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
        rcu_read_lock();
        dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
+        if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+                dst = NULL;
        if (dst) {
                if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
-                        rcu_read_unlock();
                        tunnel_dst_reset(t);
-                        return NULL;
+                        dst_release(dst);
+                        dst = NULL;
                }
-                dst_hold(dst);
        }
        rcu_read_unlock();
        return (struct rtable *)dst;
@@ -173,6 +169,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
        hlist_for_each_entry_rcu(t, head, hash_node) {
                if (remote != t->parms.iph.daddr ||
+                    t->parms.iph.saddr != 0 ||
                    !(t->dev->flags & IFF_UP))
                        continue;
@@ -189,10 +186,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
        head = &itn->tunnels[hash];
        hlist_for_each_entry_rcu(t, head, hash_node) {
-                if ((local != t->parms.iph.saddr &&
+                if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
-                     (local != t->parms.iph.daddr ||
+                    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
-                      !ipv4_is_multicast(local))) ||
+                        continue;
-                    !(t->dev->flags & IFF_UP))
+                if (!(t->dev->flags & IFF_UP))
                        continue;
                if (!ip_tunnel_key_match(&t->parms, flags, key))
@@ -209,6 +207,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
        hlist_for_each_entry_rcu(t, head, hash_node) {
                if (t->parms.i_key != key ||
+                    t->parms.iph.saddr != 0 ||
+                    t->parms.iph.daddr != 0 ||
                    !(t->dev->flags & IFF_UP))
                        continue;
@@ -305,7 +305,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
        }
        ASSERT_RTNL();
-        dev = alloc_netdev(ops->priv_size, name, ops->setup);
+        dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
        if (!dev) {
                err = -ENOMEM;
                goto failed;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b3e86ea7b71b..5bbef4fdcb43 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -143,8 +143,6 @@ __be32 ic_servaddr = NONE;	/* Boot server IP address */
 __be32 root_server_addr = NONE; /* Address of NFS server */
 u8 root_server_path[256] = { 0, };      /* Path to mount as root */
-__be32 ic_dev_xid;              /* Device under configuration */
 /* vendor class identifier */
 static char vendor_class_identifier[253] __initdata;
@@ -654,6 +652,7 @@ static struct packet_type bootp_packet_type __initdata = {
        .func = ic_bootp_recv,
 };
+static __be32 ic_dev_xid;               /* Device under configuration */
 /*
 *  Initialize DHCP/BOOTP extension fields in the request.
@@ -1218,10 +1217,10 @@ static int __init ic_dynamic(void)
        get_random_bytes(&timeout, sizeof(timeout));
        timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
        for (;;) {
+#ifdef IPCONFIG_BOOTP
                /* Track the device we are configuring */
                ic_dev_xid = d->xid;
-#ifdef IPCONFIG_BOOTP
                if (do_bootp && (d->able & IC_BOOTP))
                        ic_bootp_send_if(d, jiffies - start_jiffies);
 #endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 65bcaa789043..c8034587859d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -500,7 +500,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
        else
                sprintf(name, "pimreg%u", mrt->id);
-        dev = alloc_netdev(0, name, reg_vif_setup);
+        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
        if (dev == NULL)
                return NULL;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index a26ce035e3fa..fb173126f03d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,6 +36,16 @@ config NF_CONNTRACK_PROC_COMPAT
          If unsure, say Y.
+config NF_LOG_ARP
+        tristate "ARP packet logging"
+        default m if NETFILTER_ADVANCED=n
+        select NF_LOG_COMMON
+config NF_LOG_IPV4
+        tristate "IPv4 packet logging"
+        default m if NETFILTER_ADVANCED=n
+        select NF_LOG_COMMON
 config NF_TABLES_IPV4
        depends on NF_TABLES
        tristate "IPv4 nf_tables support"
@@ -159,25 +169,6 @@ config IP_NF_TARGET_SYNPROXY
          To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_ULOG
-        tristate "ULOG target support (obsolete)"
-        default m if NETFILTER_ADVANCED=n
-        ---help---
-          This option enables the old IPv4-only "ipt_ULOG" implementation
-          which has been obsoleted by the new "nfnetlink_log" code (see
-          CONFIG_NETFILTER_NETLINK_LOG).
-          This option adds a `ULOG' target, which allows you to create rules in
-          any iptables table. The packet is passed to a userspace logging
-          daemon using netlink multicast sockets; unlike the LOG target
-          which can only be viewed through syslog.
-          The appropriate userspace logging daemon (ulogd) may be obtained from
-          <http://www.netfilter.org/projects/ulogd/index.html>
-          To compile it as a module, choose M here.  If unsure, say N.
 # NAT + specific targets: nf_conntrack
 config NF_NAT_IPV4
        tristate "IPv4 NAT"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 90b82405331e..245db9df3337 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -19,6 +19,10 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
 # defrag
 obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
+# logging
+obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
+obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
 # NAT helpers (nf_conntrack)
 obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
 obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
deleted file mode 100644
index 9cb993cd224b..000000000000
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ /dev/null
@@ -1,498 +0,0 @@
-/*
- * netfilter module for userspace packet logging daemons
- *
- * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This module accepts two parameters:
- *
- * nlbufsiz:
- *   The parameter specifies how big the buffer for each netlink multicast
- * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
- * get accumulated in the kernel until they are sent to userspace. It is
- * NOT possible to allocate more than 128kB, and it is strongly discouraged,
- * because atomically allocating 128kB inside the network rx softirq is not
- * reliable. Please also keep in mind that this buffer size is allocated for
- * each nlgroup you are using, so the total kernel memory usage increases
- * by that factor.
- *
- * Actually you should use nlbufsiz a bit smaller than PAGE_SIZE, since
- * nlbufsiz is used with alloc_skb, which adds another
- * sizeof(struct skb_shared_info).  Use NLMSG_GOODSIZE instead.
- *
- * flushtimeout:
- *   Specify, after how many hundredths of a second the queue should be
- *   flushed even if it is not full yet.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/skbuff.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <net/netlink.h>
-#include <linux/netdevice.h>
-#include <linux/mm.h>
-#include <linux/moduleparam.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_ULOG.h>
-#include <net/netfilter/nf_log.h>
-#include <net/netns/generic.h>
-#include <net/sock.h>
-#include <linux/bitops.h>
-#include <asm/unaligned.h>
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
-#define ULOG_NL_EVENT           111             /* Harald's favorite number */
-#define ULOG_MAXNLGROUPS        32              /* numer of nlgroups */
-static unsigned int nlbufsiz = NLMSG_GOODSIZE;
-module_param(nlbufsiz, uint, 0400);
-MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
-static unsigned int flushtimeout = 10;
-module_param(flushtimeout, uint, 0600);
-MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
-static bool nflog = true;
-module_param(nflog, bool, 0400);
-MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
-/* global data structures */
-typedef struct {
-        unsigned int qlen;              /* number of nlmsgs' in the skb */
-        struct nlmsghdr *lastnlh;       /* netlink header of last msg in skb */
-        struct sk_buff *skb;            /* the pre-allocated skb */
-        struct timer_list timer;        /* the timer function */
-} ulog_buff_t;
-static int ulog_net_id __read_mostly;
-struct ulog_net {
-        unsigned int nlgroup[ULOG_MAXNLGROUPS];
-        ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
-        struct sock *nflognl;
-        spinlock_t lock;
-};
-static struct ulog_net *ulog_pernet(struct net *net)
-{
-        return net_generic(net, ulog_net_id);
-}
-/* send one ulog_buff_t to userspace */
-static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
-{
-        ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
-        pr_debug("ulog_send: timer is deleting\n");
-        del_timer(&ub->timer);
-        if (!ub->skb) {
-                pr_debug("ulog_send: nothing to send\n");
-                return;
-        }
-        /* last nlmsg needs NLMSG_DONE */
-        if (ub->qlen > 1)
-                ub->lastnlh->nlmsg_type = NLMSG_DONE;
-        NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
-        pr_debug("throwing %d packets to netlink group %u\n",
-                 ub->qlen, nlgroupnum + 1);
-        netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
-                          GFP_ATOMIC);
-        ub->qlen = 0;
-        ub->skb = NULL;
-        ub->lastnlh = NULL;
-}
-/* timer function to flush queue in flushtimeout time */
-static void ulog_timer(unsigned long data)
-{
-        unsigned int groupnum = *((unsigned int *)data);
-        struct ulog_net *ulog = container_of((void *)data,
-                                             struct ulog_net,
-                                             nlgroup[groupnum]);
-        pr_debug("timer function called, calling ulog_send\n");
-        /* lock to protect against somebody modifying our structure
-         * from ipt_ulog_target at the same time */
-        spin_lock_bh(&ulog->lock);
-        ulog_send(ulog, groupnum);
-        spin_unlock_bh(&ulog->lock);
-}
-static struct sk_buff *ulog_alloc_skb(unsigned int size)
-{
-        struct sk_buff *skb;
-        unsigned int n;
-        /* alloc skb which should be big enough for a whole
-         * multipart message. WARNING: has to be <= 131000
-         * due to slab allocator restrictions */
-        n = max(size, nlbufsiz);
-        skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
-        if (!skb) {
-                if (n > size) {
-                        /* try to allocate only as much as we need for
-                         * current packet */
-                        skb = alloc_skb(size, GFP_ATOMIC);
-                        if (!skb)
-                                pr_debug("cannot even allocate %ub\n", size);
-                }
-        }
-        return skb;
-}
-static void ipt_ulog_packet(struct net *net,
-                            unsigned int hooknum,
-                            const struct sk_buff *skb,
-                            const struct net_device *in,
-                            const struct net_device *out,
-                            const struct ipt_ulog_info *loginfo,
-                            const char *prefix)
-{
-        ulog_buff_t *ub;
-        ulog_packet_msg_t *pm;
-        size_t size, copy_len;
-        struct nlmsghdr *nlh;
-        struct timeval tv;
-        struct ulog_net *ulog = ulog_pernet(net);
-        /* ffs == find first bit set, necessary because userspace
-         * is already shifting groupnumber, but we need unshifted.
-         * ffs() returns [1..32], we need [0..31] */
-        unsigned int groupnum = ffs(loginfo->nl_group) - 1;
-        /* calculate the size of the skb needed */
-        if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len)
-                copy_len = skb->len;
-        else
-                copy_len = loginfo->copy_range;
-        size = nlmsg_total_size(sizeof(*pm) + copy_len);
-        ub = &ulog->ulog_buffers[groupnum];
-        spin_lock_bh(&ulog->lock);
-        if (!ub->skb) {
-                if (!(ub->skb = ulog_alloc_skb(size)))
-                        goto alloc_failure;
-        } else if (ub->qlen >= loginfo->qthreshold ||
-                   size > skb_tailroom(ub->skb)) {
-                /* either the queue len is too high or we don't have
-                 * enough room in nlskb left. send it to userspace. */
-                ulog_send(ulog, groupnum);
-                if (!(ub->skb = ulog_alloc_skb(size)))
-                        goto alloc_failure;
-        }
-        pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold);
-        nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
-                        sizeof(*pm)+copy_len, 0);
-        if (!nlh) {
-                pr_debug("error during nlmsg_put\n");
-                goto out_unlock;
-        }
-        ub->qlen++;
-        pm = nlmsg_data(nlh);
-        memset(pm, 0, sizeof(*pm));
-        /* We might not have a timestamp, get one */
-        if (skb->tstamp.tv64 == 0)
-                __net_timestamp((struct sk_buff *)skb);
-        /* copy hook, prefix, timestamp, payload, etc. */
-        pm->data_len = copy_len;
-        tv = ktime_to_timeval(skb->tstamp);
-        put_unaligned(tv.tv_sec, &pm->timestamp_sec);
-        put_unaligned(tv.tv_usec, &pm->timestamp_usec);
-        put_unaligned(skb->mark, &pm->mark);
-        pm->hook = hooknum;
-        if (prefix != NULL) {
-                strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1);
-                pm->prefix[sizeof(pm->prefix) - 1] = '\0';
-        }
-        else if (loginfo->prefix[0] != '\0')
-                strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
-        if (in && in->hard_header_len > 0 &&
-            skb->mac_header != skb->network_header &&
-            in->hard_header_len <= ULOG_MAC_LEN) {
-                memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
-                pm->mac_len = in->hard_header_len;
-        } else
-                pm->mac_len = 0;
-        if (in)
-                strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
-        if (out)
-                strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
-        /* copy_len <= skb->len, so can't fail. */
-        if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
-                BUG();
-        /* check if we are building multi-part messages */
-        if (ub->qlen > 1)
-                ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
-        ub->lastnlh = nlh;
-        /* if timer isn't already running, start it */
-        if (!timer_pending(&ub->timer)) {
-                ub->timer.expires = jiffies + flushtimeout * HZ / 100;
-                add_timer(&ub->timer);
-        }
-        /* if threshold is reached, send message to userspace */
-        if (ub->qlen >= loginfo->qthreshold) {
-                if (loginfo->qthreshold > 1)
-                        nlh->nlmsg_type = NLMSG_DONE;
-                ulog_send(ulog, groupnum);
-        }
-out_unlock:
-        spin_unlock_bh(&ulog->lock);
-        return;
-alloc_failure:
-        pr_debug("Error building netlink message\n");
-        spin_unlock_bh(&ulog->lock);
-}
-static unsigned int
-ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-        struct net *net = dev_net(par->in ? par->in : par->out);
-        ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
-                        par->targinfo, NULL);
-        return XT_CONTINUE;
-}
-static void ipt_logfn(struct net *net,
-                      u_int8_t pf,
-                      unsigned int hooknum,
-                      const struct sk_buff *skb,
-                      const struct net_device *in,
-                      const struct net_device *out,
-                      const struct nf_loginfo *li,
-                      const char *prefix)
-{
-        struct ipt_ulog_info loginfo;
-        if (!li || li->type != NF_LOG_TYPE_ULOG) {
-                loginfo.nl_group = ULOG_DEFAULT_NLGROUP;
-                loginfo.copy_range = 0;
-                loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD;
-                loginfo.prefix[0] = '\0';
-        } else {
-                loginfo.nl_group = li->u.ulog.group;
-                loginfo.copy_range = li->u.ulog.copy_len;
-                loginfo.qthreshold = li->u.ulog.qthreshold;
-                strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
-        }
-        ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
-}
-static int ulog_tg_check(const struct xt_tgchk_param *par)
-{
-        const struct ipt_ulog_info *loginfo = par->targinfo;
-        if (!par->net->xt.ulog_warn_deprecated) {
-                pr_info("ULOG is deprecated and it will be removed soon, "
-                        "use NFLOG instead\n");
-                par->net->xt.ulog_warn_deprecated = true;
-        }
-        if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
-                pr_debug("prefix not null-terminated\n");
-                return -EINVAL;
-        }
-        if (loginfo->qthreshold > ULOG_MAX_QLEN) {
-                pr_debug("queue threshold %Zu > MAX_QLEN\n",
-                         loginfo->qthreshold);
-                return -EINVAL;
-        }
-        return 0;
-}
-#ifdef CONFIG_COMPAT
-struct compat_ipt_ulog_info {
-        compat_uint_t   nl_group;
-        compat_size_t   copy_range;
-        compat_size_t   qthreshold;
-        char            prefix[ULOG_PREFIX_LEN];
-};
-static void ulog_tg_compat_from_user(void *dst, const void *src)
-{
-        const struct compat_ipt_ulog_info *cl = src;
-        struct ipt_ulog_info l = {
-                .nl_group       = cl->nl_group,
-                .copy_range     = cl->copy_range,
-                .qthreshold     = cl->qthreshold,
-        };
-        memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
-        memcpy(dst, &l, sizeof(l));
-}
-static int ulog_tg_compat_to_user(void __user *dst, const void *src)
-{
-        const struct ipt_ulog_info *l = src;
-        struct compat_ipt_ulog_info cl = {
-                .nl_group       = l->nl_group,
-                .copy_range     = l->copy_range,
-                .qthreshold     = l->qthreshold,
-        };
-        memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
-        return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
-}
-#endif /* CONFIG_COMPAT */
-static struct xt_target ulog_tg_reg __read_mostly = {
-        .name           = "ULOG",
-        .family         = NFPROTO_IPV4,
-        .target         = ulog_tg,
-        .targetsize     = sizeof(struct ipt_ulog_info),
-        .checkentry     = ulog_tg_check,
-#ifdef CONFIG_COMPAT
-        .compatsize     = sizeof(struct compat_ipt_ulog_info),
-        .compat_from_user = ulog_tg_compat_from_user,
-        .compat_to_user = ulog_tg_compat_to_user,
-#endif
-        .me             = THIS_MODULE,
-};
-static struct nf_logger ipt_ulog_logger __read_mostly = {
-        .name           = "ipt_ULOG",
-        .logfn          = ipt_logfn,
-        .me             = THIS_MODULE,
-};
-static int __net_init ulog_tg_net_init(struct net *net)
-{
-        int i;
-        struct ulog_net *ulog = ulog_pernet(net);
-        struct netlink_kernel_cfg cfg = {
-                .groups = ULOG_MAXNLGROUPS,
-        };
-        spin_lock_init(&ulog->lock);
-        /* initialize ulog_buffers */
-        for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
-                ulog->nlgroup[i] = i;
-                setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer,
-                            (unsigned long)&ulog->nlgroup[i]);
-        }
-        ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
-        if (!ulog->nflognl)
-                return -ENOMEM;
-        if (nflog)
-                nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
-        return 0;
-}
-static void __net_exit ulog_tg_net_exit(struct net *net)
-{
-        ulog_buff_t *ub;
-        int i;
-        struct ulog_net *ulog = ulog_pernet(net);
-        if (nflog)
-                nf_log_unset(net, &ipt_ulog_logger);
-        netlink_kernel_release(ulog->nflognl);
-        /* remove pending timers and free allocated skb's */
-        for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
-                ub = &ulog->ulog_buffers[i];
-                pr_debug("timer is deleting\n");
-                del_timer(&ub->timer);
-                if (ub->skb) {
-                        kfree_skb(ub->skb);
-                        ub->skb = NULL;
-                }
-        }
-}
-static struct pernet_operations ulog_tg_net_ops = {
-        .init = ulog_tg_net_init,
-        .exit = ulog_tg_net_exit,
-        .id   = &ulog_net_id,
-        .size = sizeof(struct ulog_net),
-};
-static int __init ulog_tg_init(void)
-{
-        int ret;
-        pr_debug("init module\n");
-        if (nlbufsiz > 128*1024) {
-                pr_warn("Netlink buffer has to be <= 128kB\n");
-                return -EINVAL;
-        }
-        ret = register_pernet_subsys(&ulog_tg_net_ops);
-        if (ret)
-                goto out_pernet;
-        ret = xt_register_target(&ulog_tg_reg);
-        if (ret < 0)
-                goto out_target;
-        if (nflog)
-                nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
-        return 0;
-out_target:
-        unregister_pernet_subsys(&ulog_tg_net_ops);
-out_pernet:
-        return ret;
-}
-static void __exit ulog_tg_exit(void)
-{
-        pr_debug("cleanup_module\n");
-        if (nflog)
-                nf_log_unregister(&ipt_ulog_logger);
-        xt_unregister_target(&ulog_tg_reg);
-        unregister_pernet_subsys(&ulog_tg_net_ops);
-}
-module_init(ulog_tg_init);
-module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8127dc802865..4ce44c4bc57b 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -314,7 +314,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
        return -ENOENT;
 }
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
@@ -388,7 +388,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
        .invert_tuple    = ipv4_invert_tuple,
        .print_tuple     = ipv4_print_tuple,
        .get_l4proto     = ipv4_get_l4proto,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .tuple_to_nlattr = ipv4_tuple_to_nlattr,
        .nlattr_tuple_size = ipv4_nlattr_tuple_size,
        .nlattr_to_tuple = ipv4_nlattr_to_tuple,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index a338dad41b7d..b91b2641adda 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -226,7 +226,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
        return icmp_error_message(net, tmpl, skb, ctinfo, hooknum);
 }
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
@@ -408,7 +408,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
        .error                  = icmp_error,
        .destroy                = NULL,
        .me                     = NULL,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .tuple_to_nlattr        = icmp_tuple_to_nlattr,
        .nlattr_tuple_size      = icmp_nlattr_tuple_size,
        .nlattr_to_tuple        = icmp_nlattr_to_tuple,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index b8f6381c7d0b..76bd1aef257f 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <net/netfilter/nf_conntrack.h>
 #endif
 #include <net/netfilter/nf_conntrack_zones.h>
@@ -45,7 +45,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 {
        u16 zone = NF_CT_DEFAULT_ZONE;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
        if (skb->nfct)
                zone = nf_ct_zone((struct nf_conn *)skb->nfct);
 #endif
@@ -74,8 +74,8 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
            inet->nodefrag)
                return NF_ACCEPT;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
+#if !IS_ENABLED(CONFIG_NF_NAT)
        /* Previously seen (loopback)?  Ignore.  Do this before
           fragment check. */
        if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
new file mode 100644
index 000000000000..ccfc78db12ee
--- /dev/null
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -0,0 +1,149 @@
+/*
+ * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Based on code from ebt_log from:
+ *
+ * Bart De Schuymer <bdschuym@pandora.be>
+ * Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/route.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+static struct nf_loginfo default_loginfo = {
+        .type   = NF_LOG_TYPE_LOG,
+        .u = {
+                .log = {
+                        .level    = 5,
+                        .logflags = NF_LOG_MASK,
+                },
+        },
+};
+struct arppayload {
+        unsigned char mac_src[ETH_ALEN];
+        unsigned char ip_src[4];
+        unsigned char mac_dst[ETH_ALEN];
+        unsigned char ip_dst[4];
+};
+static void dump_arp_packet(struct nf_log_buf *m,
+                            const struct nf_loginfo *info,
+                            const struct sk_buff *skb, unsigned int nhoff)
+{
+        const struct arphdr *ah;
+        struct arphdr _arph;
+        const struct arppayload *ap;
+        struct arppayload _arpp;
+        ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+        if (ah == NULL) {
+                nf_log_buf_add(m, "TRUNCATED");
+                return;
+        }
+        nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
+                       ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
+        /* If it's for Ethernet and the lengths are OK, then log the ARP
+         * payload.
+         */
+        if (ah->ar_hrd != htons(1) ||
+            ah->ar_hln != ETH_ALEN ||
+            ah->ar_pln != sizeof(__be32))
+                return;
+        ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
+        if (ap == NULL) {
+                nf_log_buf_add(m, " INCOMPLETE [%Zu bytes]",
+                               skb->len - sizeof(_arph));
+                return;
+        }
+        nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4",
+                       ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+}
+void nf_log_arp_packet(struct net *net, u_int8_t pf,
+                      unsigned int hooknum, const struct sk_buff *skb,
+                      const struct net_device *in,
+                      const struct net_device *out,
+                      const struct nf_loginfo *loginfo,
+                      const char *prefix)
+{
+        struct nf_log_buf *m;
+        /* FIXME: Disabled from containers until syslog ns is supported */
+        if (!net_eq(net, &init_net))
+                return;
+        m = nf_log_buf_open();
+        if (!loginfo)
+                loginfo = &default_loginfo;
+        nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+                                  prefix);
+        dump_arp_packet(m, loginfo, skb, 0);
+        nf_log_buf_close(m);
+}
+static struct nf_logger nf_arp_logger __read_mostly = {
+        .name           = "nf_log_arp",
+        .type           = NF_LOG_TYPE_LOG,
+        .logfn          = nf_log_arp_packet,
+        .me             = THIS_MODULE,
+};
+static int __net_init nf_log_arp_net_init(struct net *net)
+{
+        nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
+        return 0;
+}
+static void __net_exit nf_log_arp_net_exit(struct net *net)
+{
+        nf_log_unset(net, &nf_arp_logger);
+}
+static struct pernet_operations nf_log_arp_net_ops = {
+        .init = nf_log_arp_net_init,
+        .exit = nf_log_arp_net_exit,
+};
+static int __init nf_log_arp_init(void)
+{
+        int ret;
+        ret = register_pernet_subsys(&nf_log_arp_net_ops);
+        if (ret < 0)
+                return ret;
+        nf_log_register(NFPROTO_ARP, &nf_arp_logger);
+        return 0;
+}
+static void __exit nf_log_arp_exit(void)
+{
+        unregister_pernet_subsys(&nf_log_arp_net_ops);
+        nf_log_unregister(&nf_arp_logger);
+}
+module_init(nf_log_arp_init);
+module_exit(nf_log_arp_exit);
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter ARP packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(3, 0);
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
new file mode 100644
index 000000000000..078bdca1b607
--- /dev/null
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -0,0 +1,385 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+static struct nf_loginfo default_loginfo = {
+        .type   = NF_LOG_TYPE_LOG,
+        .u = {
+                .log = {
+                        .level    = 5,
+                        .logflags = NF_LOG_MASK,
+                },
+        },
+};
+/* One level of recursion won't kill us */
+static void dump_ipv4_packet(struct nf_log_buf *m,
+                             const struct nf_loginfo *info,
+                             const struct sk_buff *skb, unsigned int iphoff)
+{
+        struct iphdr _iph;
+        const struct iphdr *ih;
+        unsigned int logflags;
+        if (info->type == NF_LOG_TYPE_LOG)
+                logflags = info->u.log.logflags;
+        else
+                logflags = NF_LOG_MASK;
+        ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+        if (ih == NULL) {
+                nf_log_buf_add(m, "TRUNCATED");
+                return;
+        }
+        /* Important fields:
+         * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+        /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+        nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr);
+        /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+        nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+                       ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+                       ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+        /* Max length: 6 "CE DF MF " */
+        if (ntohs(ih->frag_off) & IP_CE)
+                nf_log_buf_add(m, "CE ");
+        if (ntohs(ih->frag_off) & IP_DF)
+                nf_log_buf_add(m, "DF ");
+        if (ntohs(ih->frag_off) & IP_MF)
+                nf_log_buf_add(m, "MF ");
+        /* Max length: 11 "FRAG:65535 " */
+        if (ntohs(ih->frag_off) & IP_OFFSET)
+                nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+        if ((logflags & XT_LOG_IPOPT) &&
+            ih->ihl * 4 > sizeof(struct iphdr)) {
+                const unsigned char *op;
+                unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
+                unsigned int i, optsize;
+                optsize = ih->ihl * 4 - sizeof(struct iphdr);
+                op = skb_header_pointer(skb, iphoff+sizeof(_iph),
+                                        optsize, _opt);
+                if (op == NULL) {
+                        nf_log_buf_add(m, "TRUNCATED");
+                        return;
+                }
+                /* Max length: 127 "OPT (" 15*4*2chars ") " */
+                nf_log_buf_add(m, "OPT (");
+                for (i = 0; i < optsize; i++)
+                        nf_log_buf_add(m, "%02X", op[i]);
+                nf_log_buf_add(m, ") ");
+        }
+        switch (ih->protocol) {
+        case IPPROTO_TCP:
+                if (nf_log_dump_tcp_header(m, skb, ih->protocol,
+                                           ntohs(ih->frag_off) & IP_OFFSET,
+                                           iphoff+ih->ihl*4, logflags))
+                        return;
+                break;
+        case IPPROTO_UDP:
+        case IPPROTO_UDPLITE:
+                if (nf_log_dump_udp_header(m, skb, ih->protocol,
+                                           ntohs(ih->frag_off) & IP_OFFSET,
+                                           iphoff+ih->ihl*4))
+                        return;
+                break;
+        case IPPROTO_ICMP: {
+                struct icmphdr _icmph;
+                const struct icmphdr *ich;
+                static const size_t required_len[NR_ICMP_TYPES+1]
+                        = { [ICMP_ECHOREPLY] = 4,
+                            [ICMP_DEST_UNREACH]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_SOURCE_QUENCH]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_REDIRECT]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_ECHO] = 4,
+                            [ICMP_TIME_EXCEEDED]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_PARAMETERPROB]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_TIMESTAMP] = 20,
+                            [ICMP_TIMESTAMPREPLY] = 20,
+                            [ICMP_ADDRESS] = 12,
+                            [ICMP_ADDRESSREPLY] = 12 };
+                /* Max length: 11 "PROTO=ICMP " */
+                nf_log_buf_add(m, "PROTO=ICMP ");
+                if (ntohs(ih->frag_off) & IP_OFFSET)
+                        break;
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+                                         sizeof(_icmph), &_icmph);
+                if (ich == NULL) {
+                        nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+                                       skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                /* Max length: 18 "TYPE=255 CODE=255 " */
+                nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                if (ich->type <= NR_ICMP_TYPES &&
+                    required_len[ich->type] &&
+                    skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
+                        nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+                                       skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                switch (ich->type) {
+                case ICMP_ECHOREPLY:
+                case ICMP_ECHO:
+                        /* Max length: 19 "ID=65535 SEQ=65535 " */
+                        nf_log_buf_add(m, "ID=%u SEQ=%u ",
+                                       ntohs(ich->un.echo.id),
+                                       ntohs(ich->un.echo.sequence));
+                        break;
+                case ICMP_PARAMETERPROB:
+                        /* Max length: 14 "PARAMETER=255 " */
+                        nf_log_buf_add(m, "PARAMETER=%u ",
+                                       ntohl(ich->un.gateway) >> 24);
+                        break;
+                case ICMP_REDIRECT:
+                        /* Max length: 24 "GATEWAY=255.255.255.255 " */
+                        nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
+                        /* Fall through */
+                case ICMP_DEST_UNREACH:
+                case ICMP_SOURCE_QUENCH:
+                case ICMP_TIME_EXCEEDED:
+                        /* Max length: 3+maxlen */
+                        if (!iphoff) { /* Only recurse once. */
+                                nf_log_buf_add(m, "[");
+                                dump_ipv4_packet(m, info, skb,
+                                            iphoff + ih->ihl*4+sizeof(_icmph));
+                                nf_log_buf_add(m, "] ");
+                        }
+                        /* Max length: 10 "MTU=65535 " */
+                        if (ich->type == ICMP_DEST_UNREACH &&
+                            ich->code == ICMP_FRAG_NEEDED) {
+                                nf_log_buf_add(m, "MTU=%u ",
+                                               ntohs(ich->un.frag.mtu));
+                        }
+                }
+                break;
+        }
+        /* Max Length */
+        case IPPROTO_AH: {
+                struct ip_auth_hdr _ahdr;
+                const struct ip_auth_hdr *ah;
+                if (ntohs(ih->frag_off) & IP_OFFSET)
+                        break;
+                /* Max length: 9 "PROTO=AH " */
+                nf_log_buf_add(m, "PROTO=AH ");
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
+                                        sizeof(_ahdr), &_ahdr);
+                if (ah == NULL) {
+                        nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+                                       skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                /* Length: 15 "SPI=0xF1234567 " */
+                nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+                break;
+        }
+        case IPPROTO_ESP: {
+                struct ip_esp_hdr _esph;
+                const struct ip_esp_hdr *eh;
+                /* Max length: 10 "PROTO=ESP " */
+                nf_log_buf_add(m, "PROTO=ESP ");
+                if (ntohs(ih->frag_off) & IP_OFFSET)
+                        break;
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+                                        sizeof(_esph), &_esph);
+                if (eh == NULL) {
+                        nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+                                       skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                /* Length: 15 "SPI=0xF1234567 " */
+                nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi));
+                break;
+        }
+        /* Max length: 10 "PROTO 255 " */
+        default:
+                nf_log_buf_add(m, "PROTO=%u ", ih->protocol);
+        }
+        /* Max length: 15 "UID=4294967295 " */
+        if ((logflags & XT_LOG_UID) && !iphoff)
+                nf_log_dump_sk_uid_gid(m, skb->sk);
+        /* Max length: 16 "MARK=0xFFFFFFFF " */
+        if (!iphoff && skb->mark)
+                nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+        /* Proto    Max log string length */
+        /* IP:      40+46+6+11+127 = 230 */
+        /* TCP:     10+max(25,20+30+13+9+32+11+127) = 252 */
+        /* UDP:     10+max(25,20) = 35 */
+        /* UDPLITE: 14+max(25,20) = 39 */
+        /* ICMP:    11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+        /* ESP:     10+max(25)+15 = 50 */
+        /* AH:      9+max(25)+15 = 49 */
+        /* unknown: 10 */
+        /* (ICMP allows recursion one level deep) */
+        /* maxlen =  IP + ICMP +  IP + max(TCP,UDP,ICMP,unknown) */
+        /* maxlen = 230+   91  + 230 + 252 = 803 */
+}
+static void dump_ipv4_mac_header(struct nf_log_buf *m,
+                            const struct nf_loginfo *info,
+                            const struct sk_buff *skb)
+{
+        struct net_device *dev = skb->dev;
+        unsigned int logflags = 0;
+        if (info->type == NF_LOG_TYPE_LOG)
+                logflags = info->u.log.logflags;
+        if (!(logflags & XT_LOG_MACDECODE))
+                goto fallback;
+        switch (dev->type) {
+        case ARPHRD_ETHER:
+                nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+                               eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+                               ntohs(eth_hdr(skb)->h_proto));
+                return;
+        default:
+                break;
+        }
+fallback:
+        nf_log_buf_add(m, "MAC=");
+        if (dev->hard_header_len &&
+            skb->mac_header != skb->network_header) {
+                const unsigned char *p = skb_mac_header(skb);
+                unsigned int i;
+                nf_log_buf_add(m, "%02x", *p++);
+                for (i = 1; i < dev->hard_header_len; i++, p++)
+                        nf_log_buf_add(m, ":%02x", *p);
+        }
+        nf_log_buf_add(m, " ");
+}
+static void nf_log_ip_packet(struct net *net, u_int8_t pf,
+                             unsigned int hooknum, const struct sk_buff *skb,
+                             const struct net_device *in,
+                             const struct net_device *out,
+                             const struct nf_loginfo *loginfo,
+                             const char *prefix)
+{
+        struct nf_log_buf *m;
+        /* FIXME: Disabled from containers until syslog ns is supported */
+        if (!net_eq(net, &init_net))
+                return;
+        m = nf_log_buf_open();
+        if (!loginfo)
+                loginfo = &default_loginfo;
+        nf_log_dump_packet_common(m, pf, hooknum, skb, in,
+                                  out, loginfo, prefix);
+        if (in != NULL)
+                dump_ipv4_mac_header(m, loginfo, skb);
+        dump_ipv4_packet(m, loginfo, skb, 0);
+        nf_log_buf_close(m);
+}
+static struct nf_logger nf_ip_logger __read_mostly = {
+        .name           = "nf_log_ipv4",
+        .type           = NF_LOG_TYPE_LOG,
+        .logfn          = nf_log_ip_packet,
+        .me             = THIS_MODULE,
+};
+static int __net_init nf_log_ipv4_net_init(struct net *net)
+{
+        nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
+        return 0;
+}
+static void __net_exit nf_log_ipv4_net_exit(struct net *net)
+{
+        nf_log_unset(net, &nf_ip_logger);
+}
+static struct pernet_operations nf_log_ipv4_net_ops = {
+        .init = nf_log_ipv4_net_init,
+        .exit = nf_log_ipv4_net_exit,
+};
+static int __init nf_log_ipv4_init(void)
+{
+        int ret;
+        ret = register_pernet_subsys(&nf_log_ipv4_net_ops);
+        if (ret < 0)
+                return ret;
+        nf_log_register(NFPROTO_IPV4, &nf_ip_logger);
+        return 0;
+}
+static void __exit nf_log_ipv4_exit(void)
+{
+        unregister_pernet_subsys(&nf_log_ipv4_net_ops);
+        nf_log_unregister(&nf_ip_logger);
+}
+module_init(nf_log_ipv4_init);
+module_exit(nf_log_ipv4_exit);
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter IPv4 packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index d8b2e14efddc..14f5ccd06337 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -154,6 +154,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
                                         htons(oldlen), htons(datalen), 1);
 }
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
                                       struct nf_nat_range *range)
 {
@@ -169,6 +170,7 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
        return 0;
 }
+#endif
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
        .l3proto                = NFPROTO_IPV4,
@@ -177,7 +179,9 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
        .manip_pkt              = nf_nat_ipv4_manip_pkt,
        .csum_update            = nf_nat_ipv4_csum_update,
        .csum_recalc            = nf_nat_ipv4_csum_recalc,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .nlattr_to_range        = nf_nat_ipv4_nlattr_to_range,
+#endif
 #ifdef CONFIG_XFRM
        .decode_session         = nf_nat_ipv4_decode_session,
 #endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 690d890111bb..9414923f1e15 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -124,7 +124,7 @@ static const struct nf_nat_l4proto gre = {
        .manip_pkt              = gre_manip_pkt,
        .in_range               = nf_nat_l4proto_in_range,
        .unique_tuple           = gre_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .nlattr_to_range        = nf_nat_l4proto_nlattr_to_range,
 #endif
 };
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index eb303471bcf6..4557b4ab8342 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -77,7 +77,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
        .manip_pkt              = icmp_manip_pkt,
        .in_range               = icmp_in_range,
        .unique_tuple           = icmp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
        .nlattr_to_range        = nf_nat_l4proto_nlattr_to_range,
 #endif
 };
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ae0af9386f7c..8e3eb39f84e7 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -52,6 +52,7 @@
 static int sockstat_seq_show(struct seq_file *seq, void *v)
 {
        struct net *net = seq->private;
+        unsigned int frag_mem;
        int orphans, sockets;
        local_bh_disable();
@@ -71,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
                   sock_prot_inuse_get(net, &udplite_prot));
        seq_printf(seq, "RAW: inuse %d\n",
                   sock_prot_inuse_get(net, &raw_prot));
-        seq_printf(seq,  "FRAG: inuse %d memory %d\n",
+        frag_mem = ip_frag_mem(net);
-                        ip_frag_nqueues(net), ip_frag_mem(net));
+        seq_printf(seq,  "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
        return 0;
 }
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2c65160565e1..739db3100c23 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -58,6 +58,7 @@
 #include <linux/in_route.h>
 #include <linux/route.h>
 #include <linux/skbuff.h>
+#include <linux/igmp.h>
 #include <net/net_namespace.h>
 #include <net/dst.h>
 #include <net/sock.h>
@@ -174,7 +175,9 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
        while (sk) {
                delivered = 1;
-                if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
+                if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
+                    ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
+                                   skb->dev->ifindex)) {
                        struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
                        /* Not releasing hash table! */
@@ -365,6 +368,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
        skb->ip_summed = CHECKSUM_NONE;
+        sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
        skb->transport_header = skb->network_header;
        err = -EFAULT;
        if (memcpy_fromiovecend((void *)iph, from, 0, length))
@@ -606,6 +611,8 @@ back_from_confirm:
                                      &rt, msg->msg_flags);
         else {
+                sock_tx_timestamp(sk, &ipc.tx_flags);
                if (!ipc.addr)
                        ipc.addr = fl4.daddr;
                lock_sock(sk);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 082239ffe34a..190199851c9a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -457,8 +457,31 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
        return neigh_create(&arp_tbl, pkey, dev);
 }
-atomic_t *ip_idents __read_mostly;
+#define IP_IDENTS_SZ 2048u
-EXPORT_SYMBOL(ip_idents);
+struct ip_ident_bucket {
+        atomic_t        id;
+        u32             stamp32;
+};
+static struct ip_ident_bucket *ip_idents __read_mostly;
+/* In order to protect privacy, we add a perturbation to identifiers
+ * if one generator is seldom used. This makes hard for an attacker
+ * to infer how many packets were sent between two points in time.
+ */
+u32 ip_idents_reserve(u32 hash, int segs)
+{
+        struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
+        u32 old = ACCESS_ONCE(bucket->stamp32);
+        u32 now = (u32)jiffies;
+        u32 delta = 0;
+        if (old != now && cmpxchg(&bucket->stamp32, old, now) == old)
+                delta = prandom_u32_max(now - old);
+        return atomic_add_return(segs + delta, &bucket->id) - segs;
+}
+EXPORT_SYMBOL(ip_idents_reserve);
 void __ip_select_ident(struct iphdr *iph, int segs)
 {
@@ -467,7 +490,10 @@ void __ip_select_ident(struct iphdr *iph, int segs)
        net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
-        hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd);
+        hash = jhash_3words((__force u32)iph->daddr,
+                            (__force u32)iph->saddr,
+                            iph->protocol,
+                            ip_idents_hashrnd);
        id = ip_idents_reserve(hash, segs);
        iph->id = htons(id);
 }
@@ -1010,7 +1036,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
        const struct iphdr *iph = (const struct iphdr *) skb->data;
        struct flowi4 fl4;
        struct rtable *rt;
-        struct dst_entry *dst;
+        struct dst_entry *odst = NULL;
        bool new = false;
        bh_lock_sock(sk);
@@ -1018,16 +1044,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
        if (!ip_sk_accept_pmtu(sk))
                goto out;
-        rt = (struct rtable *) __sk_dst_get(sk);
+        odst = sk_dst_get(sk);
-        if (sock_owned_by_user(sk) || !rt) {
+        if (sock_owned_by_user(sk) || !odst) {
                __ipv4_sk_update_pmtu(skb, sk, mtu);
                goto out;
        }
        __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
-        if (!__sk_dst_check(sk, 0)) {
+        rt = (struct rtable *)odst;
+        if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
                rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
                if (IS_ERR(rt))
                        goto out;
@@ -1037,8 +1064,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
        __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
-        dst = dst_check(&rt->dst, 0);
+        if (!dst_check(&rt->dst, 0)) {
-        if (!dst) {
                if (new)
                        dst_release(&rt->dst);
@@ -1050,10 +1076,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
        }
        if (new)
-                __sk_dst_set(sk, &rt->dst);
+                sk_dst_set(sk, &rt->dst);
 out:
        bh_unlock_sock(sk);
+        dst_release(odst);
 }
 EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c86624b36a62..c0c75688896e 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -170,7 +170,8 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
 }
 EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
-__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+__u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb,
+                              __u16 *mssp)
 {
        const struct iphdr *iph = ip_hdr(skb);
        const struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eb1dde37e678..9d2118e5fbc7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1108,7 +1108,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        if (unlikely(tp->repair)) {
                if (tp->repair_queue == TCP_RECV_QUEUE) {
                        copied = tcp_send_rcvq(sk, msg, size);
-                        goto out;
+                        goto out_nopush;
                }
                err = -EINVAL;
@@ -1282,6 +1282,7 @@ wait_for_memory:
 out:
        if (copied)
                tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+out_nopush:
        release_sock(sk);
        return copied + copied_syn;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 62e48cf84e60..9771563ab564 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -131,7 +131,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
                                      struct dst_entry *dst,
                                      struct request_sock *req)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        struct tcp_sock *tp;
        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
        struct sock *child;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 40661fc1e233..7832d941dbcd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1106,7 +1106,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
        }
        /* D-SACK for already forgotten data... Do dumb counting. */
-        if (dup_sack && tp->undo_marker && tp->undo_retrans &&
+        if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
            !after(end_seq_0, prior_snd_una) &&
            after(end_seq_0, tp->undo_marker))
                tp->undo_retrans--;
@@ -1162,7 +1162,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
                        unsigned int new_len = (pkt_len / mss) * mss;
                        if (!in_sack && new_len < pkt_len) {
                                new_len += mss;
-                                if (new_len > skb->len)
+                                if (new_len >= skb->len)
                                        return 0;
                        }
                        pkt_len = new_len;
@@ -1187,7 +1187,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
        /* Account D-SACK for retransmitted packet. */
        if (dup_sack && (sacked & TCPCB_RETRANS)) {
-                if (tp->undo_marker && tp->undo_retrans &&
+                if (tp->undo_marker && tp->undo_retrans > 0 &&
                    after(end_seq, tp->undo_marker))
                        tp->undo_retrans--;
                if (sacked & TCPCB_SACKED_ACKED)
@@ -1893,7 +1893,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp)
        tp->lost_out = 0;
        tp->undo_marker = 0;
-        tp->undo_retrans = 0;
+        tp->undo_retrans = -1;
 }
 void tcp_clear_retrans(struct tcp_sock *tp)
@@ -2475,7 +2475,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
 *      losses and/or application stalls), do not perform any further cwnd
 *      reductions, but instead slow start up to ssthresh.
 */
-static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
+static void tcp_init_cwnd_reduction(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -2485,8 +2485,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
        tp->prior_cwnd = tp->snd_cwnd;
        tp->prr_delivered = 0;
        tp->prr_out = 0;
-        if (set_ssthresh)
+        tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
-                tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
        TCP_ECN_queue_cwr(tp);
 }
@@ -2528,14 +2527,14 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
 }
 /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
-void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
+void tcp_enter_cwr(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        tp->prior_ssthresh = 0;
        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
                tp->undo_marker = 0;
-                tcp_init_cwnd_reduction(sk, set_ssthresh);
+                tcp_init_cwnd_reduction(sk);
                tcp_set_ca_state(sk, TCP_CA_CWR);
        }
 }
@@ -2564,7 +2563,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
                tp->retrans_stamp = 0;
        if (flag & FLAG_ECE)
-                tcp_enter_cwr(sk, 1);
+                tcp_enter_cwr(sk);
        if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
                tcp_try_keep_open(sk);
@@ -2665,12 +2664,12 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
        tp->prior_ssthresh = 0;
        tp->undo_marker = tp->snd_una;
-        tp->undo_retrans = tp->retrans_out;
+        tp->undo_retrans = tp->retrans_out ? : -1;
        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
                if (!ece_ack)
                        tp->prior_ssthresh = tcp_current_ssthresh(sk);
-                tcp_init_cwnd_reduction(sk, true);
+                tcp_init_cwnd_reduction(sk);
        }
        tcp_set_ca_state(sk, TCP_CA_Recovery);
 }
@@ -3346,7 +3345,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
                tp->tlp_high_seq = 0;
                /* Don't reduce cwnd if DSACK arrives for TLP retrans. */
                if (!(flag & FLAG_DSACKING_ACK)) {
-                        tcp_init_cwnd_reduction(sk, true);
+                        tcp_init_cwnd_reduction(sk);
                        tcp_set_ca_state(sk, TCP_CA_CWR);
                        tcp_end_cwnd_reduction(sk);
                        tcp_try_keep_open(sk);
@@ -5877,3 +5876,153 @@ discard:
        return 0;
 }
 EXPORT_SYMBOL(tcp_rcv_state_process);
+static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
+{
+        struct inet_request_sock *ireq = inet_rsk(req);
+        if (family == AF_INET)
+                LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
+                               &ireq->ir_rmt_addr, port);
+#if IS_ENABLED(CONFIG_IPV6)
+        else if (family == AF_INET6)
+                LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"),
+                               &ireq->ir_v6_rmt_addr, port);
+#endif
+}
+int tcp_conn_request(struct request_sock_ops *rsk_ops,
+                     const struct tcp_request_sock_ops *af_ops,
+                     struct sock *sk, struct sk_buff *skb)
+{
+        struct tcp_options_received tmp_opt;
+        struct request_sock *req;
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct dst_entry *dst = NULL;
+        __u32 isn = TCP_SKB_CB(skb)->when;
+        bool want_cookie = false, fastopen;
+        struct flowi fl;
+        struct tcp_fastopen_cookie foc = { .len = -1 };
+        int err;
+        /* TW buckets are converted to open requests without
+         * limitations, they conserve resources and peer is
+         * evidently real one.
+         */
+        if ((sysctl_tcp_syncookies == 2 ||
+             inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+                want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
+                if (!want_cookie)
+                        goto drop;
+        }
+        /* Accept backlog is full. If we have already queued enough
+         * of warm entries in syn queue, drop request. It is better than
+         * clogging syn queue with openreqs with exponentially increasing
+         * timeout.
+         */
+        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+                goto drop;
+        }
+        req = inet_reqsk_alloc(rsk_ops);
+        if (!req)
+                goto drop;
+        tcp_rsk(req)->af_specific = af_ops;
+        tcp_clear_options(&tmp_opt);
+        tmp_opt.mss_clamp = af_ops->mss_clamp;
+        tmp_opt.user_mss  = tp->rx_opt.user_mss;
+        tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
+        if (want_cookie && !tmp_opt.saw_tstamp)
+                tcp_clear_options(&tmp_opt);
+        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
+        tcp_openreq_init(req, &tmp_opt, skb, sk);
+        af_ops->init_req(req, sk, skb);
+        if (security_inet_conn_request(sk, skb, req))
+                goto drop_and_free;
+        if (!want_cookie || tmp_opt.tstamp_ok)
+                TCP_ECN_create_request(req, skb, sock_net(sk));
+        if (want_cookie) {
+                isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
+                req->cookie_ts = tmp_opt.tstamp_ok;
+        } else if (!isn) {
+                /* VJ's idea. We save last timestamp seen
+                 * from the destination in peer table, when entering
+                 * state TIME-WAIT, and check against it before
+                 * accepting new connection request.
+                 *
+                 * If "isn" is not zero, this request hit alive
+                 * timewait bucket, so that all the necessary checks
+                 * are made in the function processing timewait state.
+                 */
+                if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) {
+                        bool strict;
+                        dst = af_ops->route_req(sk, &fl, req, &strict);
+                        if (dst && strict &&
+                            !tcp_peer_is_proven(req, dst, true)) {
+                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+                                goto drop_and_release;
+                        }
+                }
+                /* Kill the following clause, if you dislike this way. */
+                else if (!sysctl_tcp_syncookies &&
+                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+                          (sysctl_max_syn_backlog >> 2)) &&
+                         !tcp_peer_is_proven(req, dst, false)) {
+                        /* Without syncookies last quarter of
+                         * backlog is filled with destinations,
+                         * proven to be alive.
+                         * It means that we continue to communicate
+                         * to destinations, already remembered
+                         * to the moment of synflood.
+                         */
+                        pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
+                                    rsk_ops->family);
+                        goto drop_and_release;
+                }
+                isn = af_ops->init_seq(skb);
+        }
+        if (!dst) {
+                dst = af_ops->route_req(sk, &fl, req, NULL);
+                if (!dst)
+                        goto drop_and_free;
+        }
+        tcp_rsk(req)->snt_isn = isn;
+        tcp_openreq_init_rwin(req, sk, dst);
+        fastopen = !want_cookie &&
+                   tcp_try_fastopen(sk, skb, req, &foc, dst);
+        err = af_ops->send_synack(sk, dst, &fl, req,
+                                  skb_get_queue_mapping(skb), &foc);
+        if (!fastopen) {
+                if (err || want_cookie)
+                        goto drop_and_free;
+                tcp_rsk(req)->listener = NULL;
+                af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+        }
+        return 0;
+drop_and_release:
+        dst_release(dst);
+drop_and_free:
+        reqsk_free(req);
+drop:
+        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+        return 0;
+}
+EXPORT_SYMBOL(tcp_conn_request);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 180336d47df6..1edc739b9da5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -99,7 +99,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
-static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
+static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 {
        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
                                          ip_hdr(skb)->saddr,
@@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        inet->inet_dport = usin->sin_port;
        inet->inet_daddr = daddr;
+        inet_set_txhash(sk);
        inet_csk(sk)->icsk_ext_hdr_len = 0;
        if (inet_opt)
                inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
@@ -814,6 +816,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 *      socket.
 */
 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
+                              struct flowi *fl,
                              struct request_sock *req,
                              u16 queue_mapping,
                              struct tcp_fastopen_cookie *foc)
@@ -837,24 +840,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
                                            ireq->ir_rmt_addr,
                                            ireq->opt);
                err = net_xmit_eval(err);
-                if (!tcp_rsk(req)->snt_synack && !err)
-                        tcp_rsk(req)->snt_synack = tcp_time_stamp;
        }
        return err;
 }
-static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
-{
-        int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
-        if (!res) {
-                TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
-        }
-        return res;
-}
 /*
 *      IPv4 request_sock destructor.
 */
@@ -1237,160 +1227,68 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 #endif
+static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
+                            struct sk_buff *skb)
+{
+        struct inet_request_sock *ireq = inet_rsk(req);
+        ireq->ir_loc_addr = ip_hdr(skb)->daddr;
+        ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
+        ireq->no_srccheck = inet_sk(sk)->transparent;
+        ireq->opt = tcp_v4_save_options(skb);
+}
+static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
+                                          const struct request_sock *req,
+                                          bool *strict)
+{
+        struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
+        if (strict) {
+                if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
+                        *strict = true;
+                else
+                        *strict = false;
+        }
+        return dst;
+}
 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
        .family         =       PF_INET,
        .obj_size       =       sizeof(struct tcp_request_sock),
-        .rtx_syn_ack    =       tcp_v4_rtx_synack,
+        .rtx_syn_ack    =       tcp_rtx_synack,
        .send_ack       =       tcp_v4_reqsk_send_ack,
        .destructor     =       tcp_v4_reqsk_destructor,
        .send_reset     =       tcp_v4_send_reset,
        .syn_ack_timeout =      tcp_syn_ack_timeout,
 };
-#ifdef CONFIG_TCP_MD5SIG
 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+        .mss_clamp      =       TCP_MSS_DEFAULT,
+#ifdef CONFIG_TCP_MD5SIG
        .md5_lookup     =       tcp_v4_reqsk_md5_lookup,
        .calc_md5_hash  =       tcp_v4_md5_hash_skb,
-};
 #endif
+        .init_req       =       tcp_v4_init_req,
+#ifdef CONFIG_SYN_COOKIES
+        .cookie_init_seq =      cookie_v4_init_sequence,
+#endif
+        .route_req      =       tcp_v4_route_req,
+        .init_seq       =       tcp_v4_init_sequence,
+        .send_synack    =       tcp_v4_send_synack,
+        .queue_hash_add =       inet_csk_reqsk_queue_hash_add,
+};
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-        struct tcp_options_received tmp_opt;
-        struct request_sock *req;
-        struct inet_request_sock *ireq;
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct dst_entry *dst = NULL;
-        __be32 saddr = ip_hdr(skb)->saddr;
-        __be32 daddr = ip_hdr(skb)->daddr;
-        __u32 isn = TCP_SKB_CB(skb)->when;
-        bool want_cookie = false, fastopen;
-        struct flowi4 fl4;
-        struct tcp_fastopen_cookie foc = { .len = -1 };
-        int err;
        /* Never answer to SYNs send to broadcast or multicast */
        if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
                goto drop;
-        /* TW buckets are converted to open requests without
+        return tcp_conn_request(&tcp_request_sock_ops,
-         * limitations, they conserve resources and peer is
+                                &tcp_request_sock_ipv4_ops, sk, skb);
-         * evidently real one.
-         */
-        if ((sysctl_tcp_syncookies == 2 ||
-             inet_csk_reqsk_queue_is_full(sk)) && !isn) {
-                want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
-                if (!want_cookie)
-                        goto drop;
-        }
-        /* Accept backlog is full. If we have already queued enough
-         * of warm entries in syn queue, drop request. It is better than
-         * clogging syn queue with openreqs with exponentially increasing
-         * timeout.
-         */
-        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
-                goto drop;
-        }
-        req = inet_reqsk_alloc(&tcp_request_sock_ops);
-        if (!req)
-                goto drop;
-#ifdef CONFIG_TCP_MD5SIG
-        tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
-#endif
-        tcp_clear_options(&tmp_opt);
-        tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
-        tmp_opt.user_mss  = tp->rx_opt.user_mss;
-        tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
-        if (want_cookie && !tmp_opt.saw_tstamp)
-                tcp_clear_options(&tmp_opt);
-        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
-        tcp_openreq_init(req, &tmp_opt, skb, sk);
-        ireq = inet_rsk(req);
-        ireq->ir_loc_addr = daddr;
-        ireq->ir_rmt_addr = saddr;
-        ireq->no_srccheck = inet_sk(sk)->transparent;
-        ireq->opt = tcp_v4_save_options(skb);
-        if (security_inet_conn_request(sk, skb, req))
-                goto drop_and_free;
-        if (!want_cookie || tmp_opt.tstamp_ok)
-                TCP_ECN_create_request(req, skb, sock_net(sk));
-        if (want_cookie) {
-                isn = cookie_v4_init_sequence(sk, skb, &req->mss);
-                req->cookie_ts = tmp_opt.tstamp_ok;
-        } else if (!isn) {
-                /* VJ's idea. We save last timestamp seen
-                 * from the destination in peer table, when entering
-                 * state TIME-WAIT, and check against it before
-                 * accepting new connection request.
-                 *
-                 * If "isn" is not zero, this request hit alive
-                 * timewait bucket, so that all the necessary checks
-                 * are made in the function processing timewait state.
-                 */
-                if (tmp_opt.saw_tstamp &&
-                    tcp_death_row.sysctl_tw_recycle &&
-                    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
-                    fl4.daddr == saddr) {
-                        if (!tcp_peer_is_proven(req, dst, true)) {
-                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
-                                goto drop_and_release;
-                        }
-                }
-                /* Kill the following clause, if you dislike this way. */
-                else if (!sysctl_tcp_syncookies &&
-                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
-                          (sysctl_max_syn_backlog >> 2)) &&
-                         !tcp_peer_is_proven(req, dst, false)) {
-                        /* Without syncookies last quarter of
-                         * backlog is filled with destinations,
-                         * proven to be alive.
-                         * It means that we continue to communicate
-                         * to destinations, already remembered
-                         * to the moment of synflood.
-                         */
-                        LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
-                                       &saddr, ntohs(tcp_hdr(skb)->source));
-                        goto drop_and_release;
-                }
-                isn = tcp_v4_init_sequence(skb);
-        }
-        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
-                goto drop_and_free;
-        tcp_rsk(req)->snt_isn = isn;
-        tcp_rsk(req)->snt_synack = tcp_time_stamp;
-        tcp_openreq_init_rwin(req, sk, dst);
-        fastopen = !want_cookie &&
-                   tcp_try_fastopen(sk, skb, req, &foc, dst);
-        err = tcp_v4_send_synack(sk, dst, req,
-                                 skb_get_queue_mapping(skb), &foc);
-        if (!fastopen) {
-                if (err || want_cookie)
-                        goto drop_and_free;
-                tcp_rsk(req)->snt_synack = tcp_time_stamp;
-                tcp_rsk(req)->listener = NULL;
-                inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-        }
-        return 0;
-drop_and_release:
-        dst_release(dst);
-drop_and_free:
-        reqsk_free(req);
 drop:
        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
        return 0;
@@ -1438,6 +1336,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        newinet->mc_ttl       = ip_hdr(skb)->ttl;
        newinet->rcv_tos      = ip_hdr(skb)->tos;
        inet_csk(newsk)->icsk_ext_hdr_len = 0;
+        inet_set_txhash(newsk);
        if (inet_opt)
                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
        newinet->inet_id = newtp->write_seq ^ jiffies;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e68e0d4af6c9..1649988bd1b6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -298,7 +298,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                        tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
                        tw->tw_tclass = np->tclass;
                        tw->tw_flowlabel = np->flow_label >> 12;
-                        tw->tw_ipv6only = np->ipv6only;
+                        tw->tw_ipv6only = sk->sk_ipv6only;
                }
 #endif
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 4e86c59ec7f7..55046ecd083e 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -309,7 +309,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
        th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
                                  iph->daddr, 0);
-        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+        skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
        return tcp_gro_complete(skb);
 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d92bce0ea24e..8fcfc91964ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
        skb_orphan(skb);
        skb->sk = sk;
        skb->destructor = tcp_wfree;
+        skb_set_hash_from_sk(skb, sk);
        atomic_add(skb->truesize, &sk->sk_wmem_alloc);
        /* Build TCP header and checksum it. */
@@ -978,7 +979,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
        if (likely(err <= 0))
                return err;
-        tcp_enter_cwr(sk, 1);
+        tcp_enter_cwr(sk);
        return net_xmit_eval(err);
 }
@@ -2525,8 +2526,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                if (!tp->retrans_stamp)
                        tp->retrans_stamp = TCP_SKB_CB(skb)->when;
-                tp->undo_retrans += tcp_skb_pcount(skb);
                /* snd_nxt is stored to detect loss of retransmitted segment,
                 * see tcp_input.c tcp_sacktag_write_queue().
                 */
@@ -2534,6 +2533,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
        } else if (err != -EBUSY) {
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
        }
+        if (tp->undo_retrans < 0)
+                tp->undo_retrans = 0;
+        tp->undo_retrans += tcp_skb_pcount(skb);
        return err;
 }
@@ -3299,3 +3302,18 @@ void tcp_send_probe0(struct sock *sk)
                                          TCP_RTO_MAX);
        }
 }
+int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
+{
+        const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
+        struct flowi fl;
+        int res;
+        res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
+        if (!res) {
+                TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+        }
+        return res;
+}
+EXPORT_SYMBOL(tcp_rtx_synack);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d92f94b7e402..f57c0e4c2326 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -594,27 +594,6 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
        return true;
 }
-static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
-                                             __be16 loc_port, __be32 loc_addr,
-                                             __be16 rmt_port, __be32 rmt_addr,
-                                             int dif)
-{
-        struct hlist_nulls_node *node;
-        struct sock *s = sk;
-        unsigned short hnum = ntohs(loc_port);
-        sk_nulls_for_each_from(s, node) {
-                if (__udp_is_mcast_sock(net, s,
-                                        loc_port, loc_addr,
-                                        rmt_port, rmt_addr,
-                                        dif, hnum))
-                        goto found;
-        }
-        s = NULL;
-found:
-        return s;
-}
 /*
 * This routine is called by the ICMP module when it gets some
 * sort of error condition.  If err < 0 then the socket should
@@ -1588,8 +1567,11 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                goto csum_error;
-        if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
+        if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
+                UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+                                 is_udplite);
                goto drop;
+        }
        rc = 0;
@@ -1637,6 +1619,8 @@ static void flush_stack(struct sock **stack, unsigned int count,
                if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0)
                        skb1 = NULL;
+                sock_put(sk);
        }
        if (unlikely(skb1))
                kfree_skb(skb1);
@@ -1665,41 +1649,50 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
                                    struct udp_table *udptable)
 {
        struct sock *sk, *stack[256 / sizeof(struct sock *)];
-        struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
+        struct hlist_nulls_node *node;
-        int dif;
+        unsigned short hnum = ntohs(uh->dest);
-        unsigned int i, count = 0;
+        struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
+        int dif = skb->dev->ifindex;
+        unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+        unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
+        if (use_hash2) {
+                hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
+                            udp_table.mask;
+                hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+start_lookup:
+                hslot = &udp_table.hash2[hash2];
+                offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
+        }
        spin_lock(&hslot->lock);
-        sk = sk_nulls_head(&hslot->head);
+        sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
-        dif = skb->dev->ifindex;
+                if (__udp_is_mcast_sock(net, sk,
-        sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
+                                        uh->dest, daddr,
-        while (sk) {
+                                        uh->source, saddr,
-                stack[count++] = sk;
+                                        dif, hnum)) {
-                sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
+                        if (unlikely(count == ARRAY_SIZE(stack))) {
-                                       daddr, uh->source, saddr, dif);
+                                flush_stack(stack, count, skb, ~0);
-                if (unlikely(count == ARRAY_SIZE(stack))) {
+                                count = 0;
-                        if (!sk)
+                        }
-                                break;
+                        stack[count++] = sk;
-                        flush_stack(stack, count, skb, ~0);
+                        sock_hold(sk);
-                        count = 0;
                }
        }
-        /*
-         * before releasing chain lock, we must take a reference on sockets
-         */
-        for (i = 0; i < count; i++)
-                sock_hold(stack[i]);
        spin_unlock(&hslot->lock);
+        /* Also lookup *:port if we are using hash2 and haven't done so yet. */
+        if (use_hash2 && hash2 != hash2_any) {
+                hash2 = hash2_any;
+                goto start_lookup;
+        }
        /*
         * do the slow work with no lock held
         */
        if (count) {
                flush_stack(stack, count, skb, count - 1);
-                for (i = 0; i < count; i++)
-                        sock_put(stack[i]);
        } else {
                kfree_skb(skb);
        }
@@ -2523,79 +2516,3 @@ void __init udp_init(void)
        sysctl_udp_rmem_min = SK_MEM_QUANTUM;
        sysctl_udp_wmem_min = SK_MEM_QUANTUM;
 }
-struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
-                                       netdev_features_t features)
-{
-        struct sk_buff *segs = ERR_PTR(-EINVAL);
-        u16 mac_offset = skb->mac_header;
-        int mac_len = skb->mac_len;
-        int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
-        __be16 protocol = skb->protocol;
-        netdev_features_t enc_features;
-        int udp_offset, outer_hlen;
-        unsigned int oldlen;
-        bool need_csum;
-        oldlen = (u16)~skb->len;
-        if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
-                goto out;
-        skb->encapsulation = 0;
-        __skb_pull(skb, tnl_hlen);
-        skb_reset_mac_header(skb);
-        skb_set_network_header(skb, skb_inner_network_offset(skb));
-        skb->mac_len = skb_inner_network_offset(skb);
-        skb->protocol = htons(ETH_P_TEB);
-        need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
-        if (need_csum)
-                skb->encap_hdr_csum = 1;
-        /* segment inner packet. */
-        enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
-        segs = skb_mac_gso_segment(skb, enc_features);
-        if (!segs || IS_ERR(segs)) {
-                skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
-                                     mac_len);
-                goto out;
-        }
-        outer_hlen = skb_tnl_header_len(skb);
-        udp_offset = outer_hlen - tnl_hlen;
-        skb = segs;
-        do {
-                struct udphdr *uh;
-                int len;
-                skb_reset_inner_headers(skb);
-                skb->encapsulation = 1;
-                skb->mac_len = mac_len;
-                skb_push(skb, outer_hlen);
-                skb_reset_mac_header(skb);
-                skb_set_network_header(skb, mac_len);
-                skb_set_transport_header(skb, udp_offset);
-                len = skb->len - udp_offset;
-                uh = udp_hdr(skb);
-                uh->len = htons(len);
-                if (need_csum) {
-                        __be32 delta = htonl(oldlen + len);
-                        uh->check = ~csum_fold((__force __wsum)
-                                               ((__force u32)uh->check +
-                                                (__force u32)delta));
-                        uh->check = gso_make_checksum(skb, ~uh->check);
-                        if (uh->check == 0)
-                                uh->check = CSUM_MANGLED_0;
-                }
-                skb->protocol = protocol;
-        } while ((skb = skb->next));
-out:
-        return segs;
-}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 546d2d439dda..59035bc3008d 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -47,6 +47,82 @@ static int udp4_ufo_send_check(struct sk_buff *skb)
        return 0;
 }
+struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+                                       netdev_features_t features)
+{
+        struct sk_buff *segs = ERR_PTR(-EINVAL);
+        u16 mac_offset = skb->mac_header;
+        int mac_len = skb->mac_len;
+        int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+        __be16 protocol = skb->protocol;
+        netdev_features_t enc_features;
+        int udp_offset, outer_hlen;
+        unsigned int oldlen;
+        bool need_csum;
+        oldlen = (u16)~skb->len;
+        if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
+                goto out;
+        skb->encapsulation = 0;
+        __skb_pull(skb, tnl_hlen);
+        skb_reset_mac_header(skb);
+        skb_set_network_header(skb, skb_inner_network_offset(skb));
+        skb->mac_len = skb_inner_network_offset(skb);
+        skb->protocol = htons(ETH_P_TEB);
+        need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+        if (need_csum)
+                skb->encap_hdr_csum = 1;
+        /* segment inner packet. */
+        enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+        segs = skb_mac_gso_segment(skb, enc_features);
+        if (IS_ERR_OR_NULL(segs)) {
+                skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
+                                     mac_len);
+                goto out;
+        }
+        outer_hlen = skb_tnl_header_len(skb);
+        udp_offset = outer_hlen - tnl_hlen;
+        skb = segs;
+        do {
+                struct udphdr *uh;
+                int len;
+                skb_reset_inner_headers(skb);
+                skb->encapsulation = 1;
+                skb->mac_len = mac_len;
+                skb_push(skb, outer_hlen);
+                skb_reset_mac_header(skb);
+                skb_set_network_header(skb, mac_len);
+                skb_set_transport_header(skb, udp_offset);
+                len = skb->len - udp_offset;
+                uh = udp_hdr(skb);
+                uh->len = htons(len);
+                if (need_csum) {
+                        __be32 delta = htonl(oldlen + len);
+                        uh->check = ~csum_fold((__force __wsum)
+                                               ((__force u32)uh->check +
+                                                (__force u32)delta));
+                        uh->check = gso_make_checksum(skb, ~uh->check);
+                        if (uh->check == 0)
+                                uh->check = CSUM_MANGLED_0;
+                }
+                skb->protocol = protocol;
+        } while ((skb = skb->next));
+out:
+        return segs;
+}
 static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
                                         netdev_features_t features)
 {
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
new file mode 100644
index 000000000000..61ec1a65207e
--- /dev/null
+++ b/net/ipv4/udp_tunnel.c
@@ -0,0 +1,100 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/net_namespace.h>
+int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
+                    struct socket **sockp)
+{
+        int err = -EINVAL;
+        struct socket *sock = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+        if (cfg->family == AF_INET6) {
+                struct sockaddr_in6 udp6_addr;
+                err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
+                if (err < 0)
+                        goto error;
+                sk_change_net(sock->sk, net);
+                udp6_addr.sin6_family = AF_INET6;
+                memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
+                       sizeof(udp6_addr.sin6_addr));
+                udp6_addr.sin6_port = cfg->local_udp_port;
+                err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
+                                  sizeof(udp6_addr));
+                if (err < 0)
+                        goto error;
+                if (cfg->peer_udp_port) {
+                        udp6_addr.sin6_family = AF_INET6;
+                        memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
+                               sizeof(udp6_addr.sin6_addr));
+                        udp6_addr.sin6_port = cfg->peer_udp_port;
+                        err = kernel_connect(sock,
+                                             (struct sockaddr *)&udp6_addr,
+                                             sizeof(udp6_addr), 0);
+                }
+                if (err < 0)
+                        goto error;
+                udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
+                udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
+        } else
+#endif
+        if (cfg->family == AF_INET) {
+                struct sockaddr_in udp_addr;
+                err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
+                if (err < 0)
+                        goto error;
+                sk_change_net(sock->sk, net);
+                udp_addr.sin_family = AF_INET;
+                udp_addr.sin_addr = cfg->local_ip;
+                udp_addr.sin_port = cfg->local_udp_port;
+                err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
+                                  sizeof(udp_addr));
+                if (err < 0)
+                        goto error;
+                if (cfg->peer_udp_port) {
+                        udp_addr.sin_family = AF_INET;
+                        udp_addr.sin_addr = cfg->peer_ip;
+                        udp_addr.sin_port = cfg->peer_udp_port;
+                        err = kernel_connect(sock,
+                                             (struct sockaddr *)&udp_addr,
+                                             sizeof(udp_addr), 0);
+                        if (err < 0)
+                                goto error;
+                }
+                sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
+        } else {
+                return -EPFNOSUPPORT;
+        }
+        *sockp = sock;
+        return 0;
+error:
+        if (sock) {
+                kernel_sock_shutdown(sock, SHUT_RDWR);
+                sk_release_kernel(sock->sk);
+        }
+        *sockp = NULL;
+        return err;
+}
+EXPORT_SYMBOL(udp_sock_create);
+MODULE_LICENSE("GPL");