79 files changed, 1507 insertions, 13655 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 591ea23639ca..691268f3a359 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -630,5 +630,3 @@ config TCP_MD5SIG
          If unsure, say N.
-source "net/ipv4/ipvs/Kconfig"
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ad40ef3f9ebc..80ff87ce43aa 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -33,7 +33,6 @@ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
 obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
 obj-$(CONFIG_IP_PNP) += ipconfig.o
 obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
-obj-$(CONFIG_IP_VS) += ipvs/
 obj-$(CONFIG_INET_DIAG) += inet_diag.o 
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8a3ac1fa71a9..1fbff5fa4241 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -469,7 +469,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         */
        err = -EADDRNOTAVAIL;
        if (!sysctl_ip_nonlocal_bind &&
-            !inet->freebind &&
+            !(inet->freebind || inet->transparent) &&
            addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
            chk_addr_ret != RTN_LOCAL &&
            chk_addr_ret != RTN_MULTICAST &&
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f70fac612596..7f9e337e3908 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1234,6 +1234,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
        write_lock_bh(&in_dev->mc_list_lock);
        im->next=in_dev->mc_list;
        in_dev->mc_list=im;
+        in_dev->mc_count++;
        write_unlock_bh(&in_dev->mc_list_lock);
 #ifdef CONFIG_IP_MULTICAST
        igmpv3_del_delrec(in_dev, im->multiaddr);
@@ -1282,6 +1283,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
                        if (--i->users == 0) {
                                write_lock_bh(&in_dev->mc_list_lock);
                                *ip = i->next;
+                                in_dev->mc_count--;
                                write_unlock_bh(&in_dev->mc_list_lock);
                                igmp_group_dropped(i);
@@ -1330,6 +1332,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
        setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire,
                        (unsigned long)in_dev);
        in_dev->mr_ifc_count = 0;
+        in_dev->mc_count     = 0;
        setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
                        (unsigned long)in_dev);
        in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
@@ -1369,8 +1372,8 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
        write_lock_bh(&in_dev->mc_list_lock);
        while ((i = in_dev->mc_list) != NULL) {
                in_dev->mc_list = i->next;
+                in_dev->mc_count--;
                write_unlock_bh(&in_dev->mc_list_lock);
                igmp_group_dropped(i);
                ip_ma_put(i);
@@ -2383,7 +2386,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
                if (state->in_dev->mc_list == im) {
                        seq_printf(seq, "%d\t%-10s: %5d %7s\n",
-                                   state->dev->ifindex, state->dev->name, state->dev->mc_count, querier);
+                                   state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
                }
                seq_printf(seq,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0c1ae68ee84b..bd1278a2d828 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -30,20 +30,22 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
 #endif
 /*
- * This array holds the first and last local port number.
+ * This struct holds the first and last local port number.
 */
-int sysctl_local_port_range[2] = { 32768, 61000 };
+struct local_ports sysctl_local_ports __read_mostly = {
-DEFINE_SEQLOCK(sysctl_port_range_lock);
+        .lock = SEQLOCK_UNLOCKED,
+        .range = { 32768, 61000 },
+};
 void inet_get_local_port_range(int *low, int *high)
 {
        unsigned seq;
        do {
-                seq = read_seqbegin(&sysctl_port_range_lock);
+                seq = read_seqbegin(&sysctl_local_ports.lock);
-                *low = sysctl_local_port_range[0];
+                *low = sysctl_local_ports.range[0];
-                *high = sysctl_local_port_range[1];
+                *high = sysctl_local_ports.range[1];
-        } while (read_seqretry(&sysctl_port_range_lock, seq));
+        } while (read_seqretry(&sysctl_local_ports.lock, seq));
 }
 EXPORT_SYMBOL(inet_get_local_port_range);
@@ -335,6 +337,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
                                        .saddr = ireq->loc_addr,
                                        .tos = RT_CONN_FLAGS(sk) } },
                            .proto = sk->sk_protocol,
+                            .flags = inet_sk_flowi_flags(sk),
                            .uli_u = { .ports =
                                       { .sport = inet_sk(sk)->sport,
                                         .dport = ireq->rmt_port } } };
@@ -515,6 +518,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
                newicsk->icsk_bind_hash = NULL;
                inet_sk(newsk)->dport = inet_rsk(req)->rmt_port;
+                inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port);
+                inet_sk(newsk)->sport = inet_rsk(req)->loc_port;
                newsk->sk_write_space = sk_stream_write_space;
                newicsk->icsk_retransmits = 0;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c10036e7a463..89cb047ab314 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -782,11 +782,15 @@ skip_listen_ht:
                struct sock *sk;
                struct hlist_node *node;
+                num = 0;
+                if (hlist_empty(&head->chain) && hlist_empty(&head->twchain))
+                        continue;
                if (i > s_i)
                        s_num = 0;
                read_lock_bh(lock);
-                num = 0;
                sk_for_each(sk, node, &head->chain) {
                        struct inet_sock *inet = inet_sk(sk);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 743f011b9a84..1c5fd38f8824 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -126,6 +126,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
                tw->tw_reuse        = sk->sk_reuse;
                tw->tw_hash         = sk->sk_hash;
                tw->tw_ipv6only     = 0;
+                tw->tw_transparent  = inet->transparent;
                tw->tw_prot         = sk->sk_prot_creator;
                twsk_net_set(tw, hold_net(sock_net(sk)));
                atomic_set(&tw->tw_refcnt, 1);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2a61158ea722..85c487b8572b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -27,6 +27,7 @@
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
 #include <linux/if_ether.h>
 #include <net/sock.h>
@@ -41,6 +42,7 @@
 #include <net/xfrm.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/rtnetlink.h>
 #ifdef CONFIG_IPV6
 #include <net/ipv6.h>
@@ -117,8 +119,10 @@
   Alexey Kuznetsov.
 */
+static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
 static void ipgre_tunnel_setup(struct net_device *dev);
+static int ipgre_tunnel_bind_dev(struct net_device *dev);
 /* Fallback tunnel: no source, no destination, no key, no options */
@@ -163,38 +167,64 @@ static DEFINE_RWLOCK(ipgre_lock);
 /* Given src, dst and key, find appropriate for input tunnel. */
 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
-                __be32 remote, __be32 local, __be32 key)
+                                              __be32 remote, __be32 local,
+                                              __be32 key, __be16 gre_proto)
 {
        unsigned h0 = HASH(remote);
        unsigned h1 = HASH(key);
        struct ip_tunnel *t;
+        struct ip_tunnel *t2 = NULL;
        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+        int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+                       ARPHRD_ETHER : ARPHRD_IPGRE;
        for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
-                        if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+                        if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
-                                return t;
+                                if (t->dev->type == dev_type)
+                                        return t;
+                                if (t->dev->type == ARPHRD_IPGRE && !t2)
+                                        t2 = t;
+                        }
                }
        }
        for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
                if (remote == t->parms.iph.daddr) {
-                        if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+                        if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
-                                return t;
+                                if (t->dev->type == dev_type)
+                                        return t;
+                                if (t->dev->type == ARPHRD_IPGRE && !t2)
+                                        t2 = t;
+                        }
                }
        }
        for (t = ign->tunnels_l[h1]; t; t = t->next) {
                if (local == t->parms.iph.saddr ||
                     (local == t->parms.iph.daddr &&
                      ipv4_is_multicast(local))) {
-                        if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+                        if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
-                                return t;
+                                if (t->dev->type == dev_type)
+                                        return t;
+                                if (t->dev->type == ARPHRD_IPGRE && !t2)
+                                        t2 = t;
+                        }
                }
        }
        for (t = ign->tunnels_wc[h1]; t; t = t->next) {
-                if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
+                if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
-                        return t;
+                        if (t->dev->type == dev_type)
+                                return t;
+                        if (t->dev->type == ARPHRD_IPGRE && !t2)
+                                t2 = t;
+                }
        }
+        if (t2)
+                return t2;
        if (ign->fb_tunnel_dev->flags&IFF_UP)
                return netdev_priv(ign->fb_tunnel_dev);
        return NULL;
@@ -249,25 +279,37 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
        }
 }
-static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
+static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
-                struct ip_tunnel_parm *parms, int create)
+                                           struct ip_tunnel_parm *parms,
+                                           int type)
 {
        __be32 remote = parms->iph.daddr;
        __be32 local = parms->iph.saddr;
        __be32 key = parms->i_key;
-        struct ip_tunnel *t, **tp, *nt;
+        struct ip_tunnel *t, **tp;
+        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+        for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
+                if (local == t->parms.iph.saddr &&
+                    remote == t->parms.iph.daddr &&
+                    key == t->parms.i_key &&
+                    type == t->dev->type)
+                        break;
+        return t;
+}
+static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
+                struct ip_tunnel_parm *parms, int create)
+{
+        struct ip_tunnel *t, *nt;
        struct net_device *dev;
        char name[IFNAMSIZ];
        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
-        for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
+        t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
-                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
+        if (t || !create)
-                        if (key == t->parms.i_key)
+                return t;
-                                return t;
-                }
-        }
-        if (!create)
-                return NULL;
        if (parms->name[0])
                strlcpy(name, parms->name, IFNAMSIZ);
@@ -285,9 +327,11 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
                        goto failed_free;
        }
-        dev->init = ipgre_tunnel_init;
        nt = netdev_priv(dev);
        nt->parms = *parms;
+        dev->rtnl_link_ops = &ipgre_link_ops;
+        dev->mtu = ipgre_tunnel_bind_dev(dev);
        if (register_netdevice(dev) < 0)
                goto failed_free;
@@ -380,8 +424,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
        read_lock(&ipgre_lock);
        t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
-                        (flags&GRE_KEY) ?
+                                flags & GRE_KEY ?
-                        *(((__be32*)p) + (grehlen>>2) - 1) : 0);
+                                *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
+                                p[1]);
        if (t == NULL || t->parms.iph.daddr == 0 ||
            ipv4_is_multicast(t->parms.iph.daddr))
                goto out;
@@ -431,6 +476,8 @@ static int ipgre_rcv(struct sk_buff *skb)
        u32    seqno = 0;
        struct ip_tunnel *tunnel;
        int    offset = 4;
+        __be16 gre_proto;
+        unsigned int len;
        if (!pskb_may_pull(skb, 16))
                goto drop_nolock;
@@ -470,20 +517,22 @@ static int ipgre_rcv(struct sk_buff *skb)
                }
        }
+        gre_proto = *(__be16 *)(h + 2);
        read_lock(&ipgre_lock);
        if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
-                                        iph->saddr, iph->daddr, key)) != NULL) {
+                                          iph->saddr, iph->daddr, key,
+                                          gre_proto))) {
                struct net_device_stats *stats = &tunnel->dev->stats;
                secpath_reset(skb);
-                skb->protocol = *(__be16*)(h + 2);
+                skb->protocol = gre_proto;
                /* WCCP version 1 and 2 protocol decoding.
                 * - Change protocol to IP
                 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
                 */
-                if (flags == 0 &&
+                if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
-                    skb->protocol == htons(ETH_P_WCCP)) {
                        skb->protocol = htons(ETH_P_IP);
                        if ((*(h + offset) & 0xF0) != 0x40)
                                offset += 4;
@@ -491,7 +540,6 @@ static int ipgre_rcv(struct sk_buff *skb)
                skb->mac_header = skb->network_header;
                __pskb_pull(skb, offset);
-                skb_reset_network_header(skb);
                skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
                skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -519,13 +567,32 @@ static int ipgre_rcv(struct sk_buff *skb)
                        }
                        tunnel->i_seqno = seqno + 1;
                }
+                len = skb->len;
+                /* Warning: All skb pointers will be invalidated! */
+                if (tunnel->dev->type == ARPHRD_ETHER) {
+                        if (!pskb_may_pull(skb, ETH_HLEN)) {
+                                stats->rx_length_errors++;
+                                stats->rx_errors++;
+                                goto drop;
+                        }
+                        iph = ip_hdr(skb);
+                        skb->protocol = eth_type_trans(skb, tunnel->dev);
+                        skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+                }
                stats->rx_packets++;
-                stats->rx_bytes += skb->len;
+                stats->rx_bytes += len;
                skb->dev = tunnel->dev;
                dst_release(skb->dst);
                skb->dst = NULL;
                nf_reset(skb);
+                skb_reset_network_header(skb);
                ipgre_ecn_decapsulate(iph, skb);
                netif_rx(skb);
                read_unlock(&ipgre_lock);
                return(0);
@@ -560,7 +627,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
                goto tx_error;
        }
-        if (dev->header_ops) {
+        if (dev->type == ARPHRD_ETHER)
+                IPCB(skb)->flags = 0;
+        if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
                gre_hlen = 0;
                tiph = (struct iphdr*)skb->data;
        } else {
@@ -637,7 +707,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
        df = tiph->frag_off;
        if (df)
-                mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
+                mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
        else
                mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
@@ -703,7 +773,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
                old_iph = ip_hdr(skb);
        }
-        skb->transport_header = skb->network_header;
+        skb_reset_transport_header(skb);
        skb_push(skb, gre_hlen);
        skb_reset_network_header(skb);
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -736,8 +806,9 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
                        iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
        }
-        ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
+        ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
-        ((__be16*)(iph+1))[1] = skb->protocol;
+        ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
+                                   htons(ETH_P_TEB) : skb->protocol;
        if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
                __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
@@ -773,7 +844,7 @@ tx_error:
        return 0;
 }
-static void ipgre_tunnel_bind_dev(struct net_device *dev)
+static int ipgre_tunnel_bind_dev(struct net_device *dev)
 {
        struct net_device *tdev = NULL;
        struct ip_tunnel *tunnel;
@@ -785,7 +856,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
        tunnel = netdev_priv(dev);
        iph = &tunnel->parms.iph;
-        /* Guess output device to choose reasonable mtu and hard_header_len */
+        /* Guess output device to choose reasonable mtu and needed_headroom */
        if (iph->daddr) {
                struct flowi fl = { .oif = tunnel->parms.link,
@@ -799,14 +870,16 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
                        tdev = rt->u.dst.dev;
                        ip_rt_put(rt);
                }
-                dev->flags |= IFF_POINTOPOINT;
+                if (dev->type != ARPHRD_ETHER)
+                        dev->flags |= IFF_POINTOPOINT;
        }
        if (!tdev && tunnel->parms.link)
                tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
        if (tdev) {
-                hlen = tdev->hard_header_len;
+                hlen = tdev->hard_header_len + tdev->needed_headroom;
                mtu = tdev->mtu;
        }
        dev->iflink = tunnel->parms.link;
@@ -820,10 +893,15 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
                if (tunnel->parms.o_flags&GRE_SEQ)
                        addend += 4;
        }
-        dev->hard_header_len = hlen + addend;
+        dev->needed_headroom = addend + hlen;
-        dev->mtu = mtu - addend;
+        mtu -= dev->hard_header_len - addend;
+        if (mtu < 68)
+                mtu = 68;
        tunnel->hlen = addend;
+        return mtu;
 }
 static int
@@ -917,7 +995,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
                                t->parms.iph.frag_off = p.iph.frag_off;
                                if (t->parms.link != p.link) {
                                        t->parms.link = p.link;
-                                        ipgre_tunnel_bind_dev(dev);
+                                        dev->mtu = ipgre_tunnel_bind_dev(dev);
                                        netdev_state_change(dev);
                                }
                        }
@@ -959,7 +1037,8 @@ done:
 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 {
        struct ip_tunnel *tunnel = netdev_priv(dev);
-        if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
+        if (new_mtu < 68 ||
+            new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
                return -EINVAL;
        dev->mtu = new_mtu;
        return 0;
@@ -1078,6 +1157,7 @@ static int ipgre_close(struct net_device *dev)
 static void ipgre_tunnel_setup(struct net_device *dev)
 {
+        dev->init               = ipgre_tunnel_init;
        dev->uninit             = ipgre_tunnel_uninit;
        dev->destructor         = free_netdev;
        dev->hard_start_xmit    = ipgre_tunnel_xmit;
@@ -1085,7 +1165,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
        dev->change_mtu         = ipgre_tunnel_change_mtu;
        dev->type               = ARPHRD_IPGRE;
-        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
+        dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
        dev->flags              = IFF_NOARP;
        dev->iflink             = 0;
@@ -1107,8 +1187,6 @@ static int ipgre_tunnel_init(struct net_device *dev)
        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
-        ipgre_tunnel_bind_dev(dev);
        if (iph->daddr) {
 #ifdef CONFIG_NET_IPGRE_BROADCAST
                if (ipv4_is_multicast(iph->daddr)) {
@@ -1189,6 +1267,7 @@ static int ipgre_init_net(struct net *net)
        ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
        dev_net_set(ign->fb_tunnel_dev, net);
+        ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
        if ((err = register_netdev(ign->fb_tunnel_dev)))
                goto err_reg_dev;
@@ -1221,6 +1300,298 @@ static struct pernet_operations ipgre_net_ops = {
        .exit = ipgre_exit_net,
 };
+static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+        __be16 flags;
+        if (!data)
+                return 0;
+        flags = 0;
+        if (data[IFLA_GRE_IFLAGS])
+                flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+        if (data[IFLA_GRE_OFLAGS])
+                flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+        if (flags & (GRE_VERSION|GRE_ROUTING))
+                return -EINVAL;
+        return 0;
+}
+static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+        __be32 daddr;
+        if (tb[IFLA_ADDRESS]) {
+                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+                        return -EINVAL;
+                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+                        return -EADDRNOTAVAIL;
+        }
+        if (!data)
+                goto out;
+        if (data[IFLA_GRE_REMOTE]) {
+                memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
+                if (!daddr)
+                        return -EINVAL;
+        }
+out:
+        return ipgre_tunnel_validate(tb, data);
+}
+static void ipgre_netlink_parms(struct nlattr *data[],
+                                struct ip_tunnel_parm *parms)
+{
+        memset(parms, 0, sizeof(*parms));
+        parms->iph.protocol = IPPROTO_GRE;
+        if (!data)
+                return;
+        if (data[IFLA_GRE_LINK])
+                parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+        if (data[IFLA_GRE_IFLAGS])
+                parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+        if (data[IFLA_GRE_OFLAGS])
+                parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+        if (data[IFLA_GRE_IKEY])
+                parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+        if (data[IFLA_GRE_OKEY])
+                parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+        if (data[IFLA_GRE_LOCAL])
+                parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
+        if (data[IFLA_GRE_REMOTE])
+                parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
+        if (data[IFLA_GRE_TTL])
+                parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
+        if (data[IFLA_GRE_TOS])
+                parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
+        if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
+                parms->iph.frag_off = htons(IP_DF);
+}
+static int ipgre_tap_init(struct net_device *dev)
+{
+        struct ip_tunnel *tunnel;
+        tunnel = netdev_priv(dev);
+        tunnel->dev = dev;
+        strcpy(tunnel->parms.name, dev->name);
+        ipgre_tunnel_bind_dev(dev);
+        return 0;
+}
+static void ipgre_tap_setup(struct net_device *dev)
+{
+        ether_setup(dev);
+        dev->init               = ipgre_tap_init;
+        dev->uninit             = ipgre_tunnel_uninit;
+        dev->destructor         = free_netdev;
+        dev->hard_start_xmit    = ipgre_tunnel_xmit;
+        dev->change_mtu         = ipgre_tunnel_change_mtu;
+        dev->iflink             = 0;
+        dev->features           |= NETIF_F_NETNS_LOCAL;
+}
+static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
+                         struct nlattr *data[])
+{
+        struct ip_tunnel *nt;
+        struct net *net = dev_net(dev);
+        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+        int mtu;
+        int err;
+        nt = netdev_priv(dev);
+        ipgre_netlink_parms(data, &nt->parms);
+        if (ipgre_tunnel_find(net, &nt->parms, dev->type))
+                return -EEXIST;
+        if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+                random_ether_addr(dev->dev_addr);
+        mtu = ipgre_tunnel_bind_dev(dev);
+        if (!tb[IFLA_MTU])
+                dev->mtu = mtu;
+        err = register_netdevice(dev);
+        if (err)
+                goto out;
+        dev_hold(dev);
+        ipgre_tunnel_link(ign, nt);
+out:
+        return err;
+}
+static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
+                            struct nlattr *data[])
+{
+        struct ip_tunnel *t, *nt;
+        struct net *net = dev_net(dev);
+        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
+        struct ip_tunnel_parm p;
+        int mtu;
+        if (dev == ign->fb_tunnel_dev)
+                return -EINVAL;
+        nt = netdev_priv(dev);
+        ipgre_netlink_parms(data, &p);
+        t = ipgre_tunnel_locate(net, &p, 0);
+        if (t) {
+                if (t->dev != dev)
+                        return -EEXIST;
+        } else {
+                unsigned nflags = 0;
+                t = nt;
+                if (ipv4_is_multicast(p.iph.daddr))
+                        nflags = IFF_BROADCAST;
+                else if (p.iph.daddr)
+                        nflags = IFF_POINTOPOINT;
+                if ((dev->flags ^ nflags) &
+                    (IFF_POINTOPOINT | IFF_BROADCAST))
+                        return -EINVAL;
+                ipgre_tunnel_unlink(ign, t);
+                t->parms.iph.saddr = p.iph.saddr;
+                t->parms.iph.daddr = p.iph.daddr;
+                t->parms.i_key = p.i_key;
+                memcpy(dev->dev_addr, &p.iph.saddr, 4);
+                memcpy(dev->broadcast, &p.iph.daddr, 4);
+                ipgre_tunnel_link(ign, t);
+                netdev_state_change(dev);
+        }
+        t->parms.o_key = p.o_key;
+        t->parms.iph.ttl = p.iph.ttl;
+        t->parms.iph.tos = p.iph.tos;
+        t->parms.iph.frag_off = p.iph.frag_off;
+        if (t->parms.link != p.link) {
+                t->parms.link = p.link;
+                mtu = ipgre_tunnel_bind_dev(dev);
+                if (!tb[IFLA_MTU])
+                        dev->mtu = mtu;
+                netdev_state_change(dev);
+        }
+        return 0;
+}
+static size_t ipgre_get_size(const struct net_device *dev)
+{
+        return
+                /* IFLA_GRE_LINK */
+                nla_total_size(4) +
+                /* IFLA_GRE_IFLAGS */
+                nla_total_size(2) +
+                /* IFLA_GRE_OFLAGS */
+                nla_total_size(2) +
+                /* IFLA_GRE_IKEY */
+                nla_total_size(4) +
+                /* IFLA_GRE_OKEY */
+                nla_total_size(4) +
+                /* IFLA_GRE_LOCAL */
+                nla_total_size(4) +
+                /* IFLA_GRE_REMOTE */
+                nla_total_size(4) +
+                /* IFLA_GRE_TTL */
+                nla_total_size(1) +
+                /* IFLA_GRE_TOS */
+                nla_total_size(1) +
+                /* IFLA_GRE_PMTUDISC */
+                nla_total_size(1) +
+                0;
+}
+static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+        struct ip_tunnel *t = netdev_priv(dev);
+        struct ip_tunnel_parm *p = &t->parms;
+        NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
+        NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
+        NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
+        NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
+        NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
+        NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
+        NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
+        NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
+        NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
+        NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
+        return 0;
+nla_put_failure:
+        return -EMSGSIZE;
+}
+static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
+        [IFLA_GRE_LINK]         = { .type = NLA_U32 },
+        [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
+        [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
+        [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
+        [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
+        [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+        [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+        [IFLA_GRE_TTL]          = { .type = NLA_U8 },
+        [IFLA_GRE_TOS]          = { .type = NLA_U8 },
+        [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
+};
+static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
+        .kind           = "gre",
+        .maxtype        = IFLA_GRE_MAX,
+        .policy         = ipgre_policy,
+        .priv_size      = sizeof(struct ip_tunnel),
+        .setup          = ipgre_tunnel_setup,
+        .validate       = ipgre_tunnel_validate,
+        .newlink        = ipgre_newlink,
+        .changelink     = ipgre_changelink,
+        .get_size       = ipgre_get_size,
+        .fill_info      = ipgre_fill_info,
+};
+static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
+        .kind           = "gretap",
+        .maxtype        = IFLA_GRE_MAX,
+        .policy         = ipgre_policy,
+        .priv_size      = sizeof(struct ip_tunnel),
+        .setup          = ipgre_tap_setup,
+        .validate       = ipgre_tap_validate,
+        .newlink        = ipgre_newlink,
+        .changelink     = ipgre_changelink,
+        .get_size       = ipgre_get_size,
+        .fill_info      = ipgre_fill_info,
+};
 /*
 *      And now the modules code and kernel interface.
 */
@@ -1238,19 +1609,39 @@ static int __init ipgre_init(void)
        err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
        if (err < 0)
-                inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+                goto gen_device_failed;
+        err = rtnl_link_register(&ipgre_link_ops);
+        if (err < 0)
+                goto rtnl_link_failed;
+        err = rtnl_link_register(&ipgre_tap_ops);
+        if (err < 0)
+                goto tap_ops_failed;
+out:
        return err;
+tap_ops_failed:
+        rtnl_link_unregister(&ipgre_link_ops);
+rtnl_link_failed:
+        unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
+gen_device_failed:
+        inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+        goto out;
 }
 static void __exit ipgre_fini(void)
 {
+        rtnl_link_unregister(&ipgre_tap_ops);
+        rtnl_link_unregister(&ipgre_link_ops);
+        unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
        if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
                printk(KERN_INFO "ipgre close: can't remove protocol\n");
-        unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
 }
 module_init(ipgre_init);
 module_exit(ipgre_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("gre");
+MODULE_ALIAS_RTNL_LINK("gretap");
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d533a89e08de..d2a8f8bb78a6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -340,6 +340,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
                                                        .saddr = inet->saddr,
                                                        .tos = RT_CONN_FLAGS(sk) } },
                                            .proto = sk->sk_protocol,
+                                            .flags = inet_sk_flowi_flags(sk),
                                            .uli_u = { .ports =
                                                       { .sport = inet->sport,
                                                         .dport = inet->dport } } };
@@ -1371,7 +1372,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
                                    .uli_u = { .ports =
                                               { .sport = tcp_hdr(skb)->dest,
                                                 .dport = tcp_hdr(skb)->source } },
-                                    .proto = sk->sk_protocol };
+                                    .proto = sk->sk_protocol,
+                                    .flags = ip_reply_arg_flowi_flags(arg) };
                security_skb_classify_flow(skb, &fl);
                if (ip_route_output_key(sock_net(sk), &rt, &fl))
                        return;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 105d92a039b9..465abf0a9869 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -419,7 +419,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                             (1<<IP_TTL) | (1<<IP_HDRINCL) |
                             (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
                             (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
-                             (1<<IP_PASSSEC))) ||
+                             (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) ||
            optname == IP_MULTICAST_TTL ||
            optname == IP_MULTICAST_LOOP) {
                if (optlen >= sizeof(int)) {
@@ -878,6 +878,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                err = xfrm_user_policy(sk, optname, optval, optlen);
                break;
+        case IP_TRANSPARENT:
+                if (!capable(CAP_NET_ADMIN)) {
+                        err = -EPERM;
+                        break;
+                }
+                if (optlen < 1)
+                        goto e_inval;
+                inet->transparent = !!val;
+                break;
        default:
                err = -ENOPROTOOPT;
                break;
@@ -1130,6 +1140,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
        case IP_FREEBIND:
                val = inet->freebind;
                break;
+        case IP_TRANSPARENT:
+                val = inet->transparent;
+                break;
        default:
                release_sock(sk);
                return -ENOPROTOOPT;
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
deleted file mode 100644
index 09d0c3f35669..000000000000
--- a/net/ipv4/ipvs/Kconfig
+++ /dev/null
@@ -1,224 +0,0 @@
-#
-# IP Virtual Server configuration
-#
-menuconfig IP_VS
-        tristate "IP virtual server support (EXPERIMENTAL)"
-        depends on NETFILTER
-        ---help---
-          IP Virtual Server support will let you build a high-performance
-          virtual server based on cluster of two or more real servers. This
-          option must be enabled for at least one of the clustered computers
-          that will take care of intercepting incoming connections to a
-          single IP address and scheduling them to real servers.
-          Three request dispatching techniques are implemented, they are
-          virtual server via NAT, virtual server via tunneling and virtual
-          server via direct routing. The several scheduling algorithms can
-          be used to choose which server the connection is directed to,
-          thus load balancing can be achieved among the servers.  For more
-          information and its administration program, please visit the
-          following URL: <http://www.linuxvirtualserver.org/>.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-if IP_VS
-config  IP_VS_DEBUG
-        bool "IP virtual server debugging"
-        ---help---
-          Say Y here if you want to get additional messages useful in
-          debugging the IP virtual server code. You can change the debug
-          level in /proc/sys/net/ipv4/vs/debug_level
-config  IP_VS_TAB_BITS
-        int "IPVS connection table size (the Nth power of 2)"
-        default "12" 
-        ---help---
-          The IPVS connection hash table uses the chaining scheme to handle
-          hash collisions. Using a big IPVS connection hash table will greatly
-          reduce conflicts when there are hundreds of thousands of connections
-          in the hash table.
-          Note the table size must be power of 2. The table size will be the
-          value of 2 to the your input number power. The number to choose is
-          from 8 to 20, the default number is 12, which means the table size
-          is 4096. Don't input the number too small, otherwise you will lose
-          performance on it. You can adapt the table size yourself, according
-          to your virtual server application. It is good to set the table size
-          not far less than the number of connections per second multiplying
-          average lasting time of connection in the table.  For example, your
-          virtual server gets 200 connections per second, the connection lasts
-          for 200 seconds in average in the connection table, the table size
-          should be not far less than 200x200, it is good to set the table
-          size 32768 (2**15).
-          Another note that each connection occupies 128 bytes effectively and
-          each hash entry uses 8 bytes, so you can estimate how much memory is
-          needed for your box.
-comment "IPVS transport protocol load balancing support"
-config  IP_VS_PROTO_TCP
-        bool "TCP load balancing support"
-        ---help---
-          This option enables support for load balancing TCP transport
-          protocol. Say Y if unsure.
-config  IP_VS_PROTO_UDP
-        bool "UDP load balancing support"
-        ---help---
-          This option enables support for load balancing UDP transport
-          protocol. Say Y if unsure.
-config  IP_VS_PROTO_ESP
-        bool "ESP load balancing support"
-        ---help---
-          This option enables support for load balancing ESP (Encapsulation
-          Security Payload) transport protocol. Say Y if unsure.
-config  IP_VS_PROTO_AH
-        bool "AH load balancing support"
-        ---help---
-          This option enables support for load balancing AH (Authentication
-          Header) transport protocol. Say Y if unsure.
-comment "IPVS scheduler"
-config  IP_VS_RR
-        tristate "round-robin scheduling"
-        ---help---
-          The robin-robin scheduling algorithm simply directs network
-          connections to different real servers in a round-robin manner.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
- 
-config  IP_VS_WRR
-        tristate "weighted round-robin scheduling" 
-        ---help---
-          The weighted robin-robin scheduling algorithm directs network
-          connections to different real servers based on server weights
-          in a round-robin manner. Servers with higher weights receive
-          new connections first than those with less weights, and servers
-          with higher weights get more connections than those with less
-          weights and servers with equal weights get equal connections.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-config  IP_VS_LC
-        tristate "least-connection scheduling"
-        ---help---
-          The least-connection scheduling algorithm directs network
-          connections to the server with the least number of active 
-          connections.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-config  IP_VS_WLC
-        tristate "weighted least-connection scheduling"
-        ---help---
-          The weighted least-connection scheduling algorithm directs network
-          connections to the server with the least active connections
-          normalized by the server weight.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-config  IP_VS_LBLC
-        tristate "locality-based least-connection scheduling"
-        ---help---
-          The locality-based least-connection scheduling algorithm is for
-          destination IP load balancing. It is usually used in cache cluster.
-          This algorithm usually directs packet destined for an IP address to
-          its server if the server is alive and under load. If the server is
-          overloaded (its active connection numbers is larger than its weight)
-          and there is a server in its half load, then allocate the weighted
-          least-connection server to this IP address.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-config  IP_VS_LBLCR
-        tristate "locality-based least-connection with replication scheduling"
-        ---help---
-          The locality-based least-connection with replication scheduling
-          algorithm is also for destination IP load balancing. It is 
-          usually used in cache cluster. It differs from the LBLC scheduling
-          as follows: the load balancer maintains mappings from a target
-          to a set of server nodes that can serve the target. Requests for
-          a target are assigned to the least-connection node in the target's
-          server set. If all the node in the server set are over loaded,
-          it picks up a least-connection node in the cluster and adds it
-          in the sever set for the target. If the server set has not been
-          modified for the specified time, the most loaded node is removed
-          from the server set, in order to avoid high degree of replication.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-config  IP_VS_DH
-        tristate "destination hashing scheduling"
-        ---help---
-          The destination hashing scheduling algorithm assigns network
-          connections to the servers through looking up a statically assigned
-          hash table by their destination IP addresses.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-config  IP_VS_SH
-        tristate "source hashing scheduling"
-        ---help---
-          The source hashing scheduling algorithm assigns network
-          connections to the servers through looking up a statically assigned
-          hash table by their source IP addresses.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-config  IP_VS_SED
-        tristate "shortest expected delay scheduling"
-        ---help---
-          The shortest expected delay scheduling algorithm assigns network
-          connections to the server with the shortest expected delay. The 
-          expected delay that the job will experience is (Ci + 1) / Ui if 
-          sent to the ith server, in which Ci is the number of connections
-          on the ith server and Ui is the fixed service rate (weight)
-          of the ith server.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-config  IP_VS_NQ
-        tristate "never queue scheduling"
-        ---help---
-          The never queue scheduling algorithm adopts a two-speed model.
-          When there is an idle server available, the job will be sent to
-          the idle server, instead of waiting for a fast one. When there
-          is no idle server available, the job will be sent to the server
-          that minimize its expected delay (The Shortest Expected Delay
-          scheduling algorithm).
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-comment 'IPVS application helper'
-config  IP_VS_FTP
-        tristate "FTP protocol helper"
-        depends on IP_VS_PROTO_TCP
-        ---help---
-          FTP is a protocol that transfers IP address and/or port number in
-          the payload. In the virtual server via Network Address Translation,
-          the IP address and port number of real servers cannot be sent to
-          clients in ftp connections directly, so FTP protocol helper is
-          required for tracking the connection and mangling it back to that of
-          virtual service.
-          If you want to compile it in kernel, say Y. To compile it as a
-          module, choose M here. If unsure, say N.
-endif # IP_VS
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile
deleted file mode 100644
index 30e85de9ffff..000000000000
--- a/net/ipv4/ipvs/Makefile
+++ /dev/null
@@ -1,34 +0,0 @@
-#
-# Makefile for the IPVS modules on top of IPv4.
-#
-# IPVS transport protocol load balancing support
-ip_vs_proto-objs-y :=
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
-ip_vs-objs :=   ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o        \
-                ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o                      \
-                ip_vs_est.o ip_vs_proto.o                                  \
-                $(ip_vs_proto-objs-y)
-# IPVS core
-obj-$(CONFIG_IP_VS) += ip_vs.o
-# IPVS schedulers
-obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
-obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
-obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
-obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
-obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
-obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
-obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
-obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
-obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
-obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
-# IPVS application helpers
-obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
deleted file mode 100644
index 201b8ea3020d..000000000000
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * ip_vs_app.c: Application module support for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
- * is that ip_vs_app module handles the reverse direction (incoming requests
- * and outgoing responses).
- *
- *              IP_MASQ_APP application masquerading module
- *
- * Author:      Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <net/net_namespace.h>
-#include <net/protocol.h>
-#include <net/tcp.h>
-#include <asm/system.h>
-#include <linux/stat.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-#include <net/ip_vs.h>
-EXPORT_SYMBOL(register_ip_vs_app);
-EXPORT_SYMBOL(unregister_ip_vs_app);
-EXPORT_SYMBOL(register_ip_vs_app_inc);
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-/*
- *      Get an ip_vs_app object
- */
-static inline int ip_vs_app_get(struct ip_vs_app *app)
-{
-        return try_module_get(app->module);
-}
-static inline void ip_vs_app_put(struct ip_vs_app *app)
-{
-        module_put(app->module);
-}
-/*
- *      Allocate/initialize app incarnation and register it in proto apps.
- */
-static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
-{
-        struct ip_vs_protocol *pp;
-        struct ip_vs_app *inc;
-        int ret;
-        if (!(pp = ip_vs_proto_get(proto)))
-                return -EPROTONOSUPPORT;
-        if (!pp->unregister_app)
-                return -EOPNOTSUPP;
-        inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
-        if (!inc)
-                return -ENOMEM;
-        INIT_LIST_HEAD(&inc->p_list);
-        INIT_LIST_HEAD(&inc->incs_list);
-        inc->app = app;
-        inc->port = htons(port);
-        atomic_set(&inc->usecnt, 0);
-        if (app->timeouts) {
-                inc->timeout_table =
-                        ip_vs_create_timeout_table(app->timeouts,
-                                                   app->timeouts_size);
-                if (!inc->timeout_table) {
-                        ret = -ENOMEM;
-                        goto out;
-                }
-        }
-        ret = pp->register_app(inc);
-        if (ret)
-                goto out;
-        list_add(&inc->a_list, &app->incs_list);
-        IP_VS_DBG(9, "%s application %s:%u registered\n",
-                  pp->name, inc->name, inc->port);
-        return 0;
-  out:
-        kfree(inc->timeout_table);
-        kfree(inc);
-        return ret;
-}
-/*
- *      Release app incarnation
- */
-static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
-{
-        struct ip_vs_protocol *pp;
-        if (!(pp = ip_vs_proto_get(inc->protocol)))
-                return;
-        if (pp->unregister_app)
-                pp->unregister_app(inc);
-        IP_VS_DBG(9, "%s App %s:%u unregistered\n",
-                  pp->name, inc->name, inc->port);
-        list_del(&inc->a_list);
-        kfree(inc->timeout_table);
-        kfree(inc);
-}
-/*
- *      Get reference to app inc (only called from softirq)
- *
- */
-int ip_vs_app_inc_get(struct ip_vs_app *inc)
-{
-        int result;
-        atomic_inc(&inc->usecnt);
-        if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
-                atomic_dec(&inc->usecnt);
-        return result;
-}
-/*
- *      Put the app inc (only called from timer or net softirq)
- */
-void ip_vs_app_inc_put(struct ip_vs_app *inc)
-{
-        ip_vs_app_put(inc->app);
-        atomic_dec(&inc->usecnt);
-}
-/*
- *      Register an application incarnation in protocol applications
- */
-int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
-{
-        int result;
-        mutex_lock(&__ip_vs_app_mutex);
-        result = ip_vs_app_inc_new(app, proto, port);
-        mutex_unlock(&__ip_vs_app_mutex);
-        return result;
-}
-/*
- *      ip_vs_app registration routine
- */
-int register_ip_vs_app(struct ip_vs_app *app)
-{
-        /* increase the module use count */
-        ip_vs_use_count_inc();
-        mutex_lock(&__ip_vs_app_mutex);
-        list_add(&app->a_list, &ip_vs_app_list);
-        mutex_unlock(&__ip_vs_app_mutex);
-        return 0;
-}
-/*
- *      ip_vs_app unregistration routine
- *      We are sure there are no app incarnations attached to services
- */
-void unregister_ip_vs_app(struct ip_vs_app *app)
-{
-        struct ip_vs_app *inc, *nxt;
-        mutex_lock(&__ip_vs_app_mutex);
-        list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
-                ip_vs_app_inc_release(inc);
-        }
-        list_del(&app->a_list);
-        mutex_unlock(&__ip_vs_app_mutex);
-        /* decrease the module use count */
-        ip_vs_use_count_dec();
-}
-/*
- *      Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
- */
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
-{
-        return pp->app_conn_bind(cp);
-}
-/*
- *      Unbind cp from application incarnation (called by cp destructor)
- */
-void ip_vs_unbind_app(struct ip_vs_conn *cp)
-{
-        struct ip_vs_app *inc = cp->app;
-        if (!inc)
-                return;
-        if (inc->unbind_conn)
-                inc->unbind_conn(inc, cp);
-        if (inc->done_conn)
-                inc->done_conn(inc, cp);
-        ip_vs_app_inc_put(inc);
-        cp->app = NULL;
-}
-/*
- *      Fixes th->seq based on ip_vs_seq info.
- */
-static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
-{
-        __u32 seq = ntohl(th->seq);
-        /*
-         *      Adjust seq with delta-offset for all packets after
-         *      the most recent resized pkt seq and with previous_delta offset
-         *      for all packets before most recent resized pkt seq.
-         */
-        if (vseq->delta || vseq->previous_delta) {
-                if(after(seq, vseq->init_seq)) {
-                        th->seq = htonl(seq + vseq->delta);
-                        IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
-                                  vseq->delta);
-                } else {
-                        th->seq = htonl(seq + vseq->previous_delta);
-                        IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
-                                  "(%d) to seq\n", vseq->previous_delta);
-                }
-        }
-}
-/*
- *      Fixes th->ack_seq based on ip_vs_seq info.
- */
-static inline void
-vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
-{
-        __u32 ack_seq = ntohl(th->ack_seq);
-        /*
-         * Adjust ack_seq with delta-offset for
-         * the packets AFTER most recent resized pkt has caused a shift
-         * for packets before most recent resized pkt, use previous_delta
-         */
-        if (vseq->delta || vseq->previous_delta) {
-                /* since ack_seq is the number of octet that is expected
-                   to receive next, so compare it with init_seq+delta */
-                if(after(ack_seq, vseq->init_seq+vseq->delta)) {
-                        th->ack_seq = htonl(ack_seq - vseq->delta);
-                        IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
-                                  "(%d) from ack_seq\n", vseq->delta);
-                } else {
-                        th->ack_seq = htonl(ack_seq - vseq->previous_delta);
-                        IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
-                                  "previous_delta (%d) from ack_seq\n",
-                                  vseq->previous_delta);
-                }
-        }
-}
-/*
- *      Updates ip_vs_seq if pkt has been resized
- *      Assumes already checked proto==IPPROTO_TCP and diff!=0.
- */
-static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
-                                 unsigned flag, __u32 seq, int diff)
-{
-        /* spinlock is to keep updating cp->flags atomic */
-        spin_lock(&cp->lock);
-        if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
-                vseq->previous_delta = vseq->delta;
-                vseq->delta += diff;
-                vseq->init_seq = seq;
-                cp->flags |= flag;
-        }
-        spin_unlock(&cp->lock);
-}
-static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
-                                  struct ip_vs_app *app)
-{
-        int diff;
-        const unsigned int tcp_offset = ip_hdrlen(skb);
-        struct tcphdr *th;
-        __u32 seq;
-        if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
-                return 0;
-        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
-        /*
-         *      Remember seq number in case this pkt gets resized
-         */
-        seq = ntohl(th->seq);
-        /*
-         *      Fix seq stuff if flagged as so.
-         */
-        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
-                vs_fix_seq(&cp->out_seq, th);
-        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
-                vs_fix_ack_seq(&cp->in_seq, th);
-        /*
-         *      Call private output hook function
-         */
-        if (app->pkt_out == NULL)
-                return 1;
-        if (!app->pkt_out(app, cp, skb, &diff))
-                return 0;
-        /*
-         *      Update ip_vs seq stuff if len has changed.
-         */
-        if (diff != 0)
-                vs_seq_update(cp, &cp->out_seq,
-                              IP_VS_CONN_F_OUT_SEQ, seq, diff);
-        return 1;
-}
-/*
- *      Output pkt hook. Will call bound ip_vs_app specific function
- *      called by ipvs packet handler, assumes previously checked cp!=NULL
- *      returns false if it can't handle packet (oom)
- */
-int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
-{
-        struct ip_vs_app *app;
-        /*
-         *      check if application module is bound to
-         *      this ip_vs_conn.
-         */
-        if ((app = cp->app) == NULL)
-                return 1;
-        /* TCP is complicated */
-        if (cp->protocol == IPPROTO_TCP)
-                return app_tcp_pkt_out(cp, skb, app);
-        /*
-         *      Call private output hook function
-         */
-        if (app->pkt_out == NULL)
-                return 1;
-        return app->pkt_out(app, cp, skb, NULL);
-}
-static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
-                                 struct ip_vs_app *app)
-{
-        int diff;
-        const unsigned int tcp_offset = ip_hdrlen(skb);
-        struct tcphdr *th;
-        __u32 seq;
-        if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
-                return 0;
-        th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
-        /*
-         *      Remember seq number in case this pkt gets resized
-         */
-        seq = ntohl(th->seq);
-        /*
-         *      Fix seq stuff if flagged as so.
-         */
-        if (cp->flags & IP_VS_CONN_F_IN_SEQ)
-                vs_fix_seq(&cp->in_seq, th);
-        if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
-                vs_fix_ack_seq(&cp->out_seq, th);
-        /*
-         *      Call private input hook function
-         */
-        if (app->pkt_in == NULL)
-                return 1;
-        if (!app->pkt_in(app, cp, skb, &diff))
-                return 0;
-        /*
-         *      Update ip_vs seq stuff if len has changed.
-         */
-        if (diff != 0)
-                vs_seq_update(cp, &cp->in_seq,
-                              IP_VS_CONN_F_IN_SEQ, seq, diff);
-        return 1;
-}
-/*
- *      Input pkt hook. Will call bound ip_vs_app specific function
- *      called by ipvs packet handler, assumes previously checked cp!=NULL.
- *      returns false if can't handle packet (oom).
- */
-int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
-{
-        struct ip_vs_app *app;
-        /*
-         *      check if application module is bound to
-         *      this ip_vs_conn.
-         */
-        if ((app = cp->app) == NULL)
-                return 1;
-        /* TCP is complicated */
-        if (cp->protocol == IPPROTO_TCP)
-                return app_tcp_pkt_in(cp, skb, app);
-        /*
-         *      Call private input hook function
-         */
-        if (app->pkt_in == NULL)
-                return 1;
-        return app->pkt_in(app, cp, skb, NULL);
-}
-#ifdef CONFIG_PROC_FS
-/*
- *      /proc/net/ip_vs_app entry function
- */
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
-{
-        struct ip_vs_app *app, *inc;
-        list_for_each_entry(app, &ip_vs_app_list, a_list) {
-                list_for_each_entry(inc, &app->incs_list, a_list) {
-                        if (pos-- == 0)
-                                return inc;
-                }
-        }
-        return NULL;
-}
-static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
-{
-        mutex_lock(&__ip_vs_app_mutex);
-        return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-        struct ip_vs_app *inc, *app;
-        struct list_head *e;
-        ++*pos;
-        if (v == SEQ_START_TOKEN)
-                return ip_vs_app_idx(0);
-        inc = v;
-        app = inc->app;
-        if ((e = inc->a_list.next) != &app->incs_list)
-                return list_entry(e, struct ip_vs_app, a_list);
-        /* go on to next application */
-        for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
-                app = list_entry(e, struct ip_vs_app, a_list);
-                list_for_each_entry(inc, &app->incs_list, a_list) {
-                        return inc;
-                }
-        }
-        return NULL;
-}
-static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
-{
-        mutex_unlock(&__ip_vs_app_mutex);
-}
-static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
-{
-        if (v == SEQ_START_TOKEN)
-                seq_puts(seq, "prot port    usecnt name\n");
-        else {
-                const struct ip_vs_app *inc = v;
-                seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
-                           ip_vs_proto_name(inc->protocol),
-                           ntohs(inc->port),
-                           atomic_read(&inc->usecnt),
-                           inc->name);
-        }
-        return 0;
-}
-static const struct seq_operations ip_vs_app_seq_ops = {
-        .start = ip_vs_app_seq_start,
-        .next  = ip_vs_app_seq_next,
-        .stop  = ip_vs_app_seq_stop,
-        .show  = ip_vs_app_seq_show,
-};
-static int ip_vs_app_open(struct inode *inode, struct file *file)
-{
-        return seq_open(file, &ip_vs_app_seq_ops);
-}
-static const struct file_operations ip_vs_app_fops = {
-        .owner   = THIS_MODULE,
-        .open    = ip_vs_app_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release,
-};
-#endif
-/*
- *      Replace a segment of data with a new segment
- */
-int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
-                      char *o_buf, int o_len, char *n_buf, int n_len)
-{
-        int diff;
-        int o_offset;
-        int o_left;
-        EnterFunction(9);
-        diff = n_len - o_len;
-        o_offset = o_buf - (char *)skb->data;
-        /* The length of left data after o_buf+o_len in the skb data */
-        o_left = skb->len - (o_offset + o_len);
-        if (diff <= 0) {
-                memmove(o_buf + n_len, o_buf + o_len, o_left);
-                memcpy(o_buf, n_buf, n_len);
-                skb_trim(skb, skb->len + diff);
-        } else if (diff <= skb_tailroom(skb)) {
-                skb_put(skb, diff);
-                memmove(o_buf + n_len, o_buf + o_len, o_left);
-                memcpy(o_buf, n_buf, n_len);
-        } else {
-                if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
-                        return -ENOMEM;
-                skb_put(skb, diff);
-                memmove(skb->data + o_offset + n_len,
-                        skb->data + o_offset + o_len, o_left);
-                skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
-        }
-        /* must update the iph total length here */
-        ip_hdr(skb)->tot_len = htons(skb->len);
-        LeaveFunction(9);
-        return 0;
-}
-int __init ip_vs_app_init(void)
-{
-        /* we will replace it with proc_net_ipvs_create() soon */
-        proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
-        return 0;
-}
-void ip_vs_app_cleanup(void)
-{
-        proc_net_remove(&init_net, "ip_vs_app");
-}
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
deleted file mode 100644
index 44a6872dc245..000000000000
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the Netfilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
- * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
- * and others. Many code here is taken from IP MASQ code of kernel 2.2.
- *
- * Changes:
- *
- */
-#include <linux/interrupt.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/proc_fs.h>              /* for proc_net_* */
-#include <linux/seq_file.h>
-#include <linux/jhash.h>
-#include <linux/random.h>
-#include <net/net_namespace.h>
-#include <net/ip_vs.h>
-/*
- *  Connection hash table: for input and output packets lookups of IPVS
- */
-static struct list_head *ip_vs_conn_tab;
-/*  SLAB cache for IPVS connections */
-static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
-/*  counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-/*  counter for no client port connections */
-static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
-/* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
-/*
- *  Fine locking granularity for big connection hash table
- */
-#define CT_LOCKARRAY_BITS  4
-#define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
-#define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
-struct ip_vs_aligned_lock
-{
-        rwlock_t        l;
-} __attribute__((__aligned__(SMP_CACHE_BYTES)));
-/* lock array for conn table */
-static struct ip_vs_aligned_lock
-__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
-static inline void ct_read_lock(unsigned key)
-{
-        read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-static inline void ct_read_unlock(unsigned key)
-{
-        read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-static inline void ct_write_lock(unsigned key)
-{
-        write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-static inline void ct_write_unlock(unsigned key)
-{
-        write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-static inline void ct_read_lock_bh(unsigned key)
-{
-        read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-static inline void ct_read_unlock_bh(unsigned key)
-{
-        read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-static inline void ct_write_lock_bh(unsigned key)
-{
-        write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-static inline void ct_write_unlock_bh(unsigned key)
-{
-        write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-/*
- *      Returns hash value for IPVS connection entry
- */
-static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port)
-{
-        return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd)
-                & IP_VS_CONN_TAB_MASK;
-}
-/*
- *      Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
- *      returns bool success.
- */
-static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
-{
-        unsigned hash;
-        int ret;
-        /* Hash by protocol, client address and port */
-        hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
-        ct_write_lock(hash);
-        if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
-                list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
-                cp->flags |= IP_VS_CONN_F_HASHED;
-                atomic_inc(&cp->refcnt);
-                ret = 1;
-        } else {
-                IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "
-                          "called from %p\n", __builtin_return_address(0));
-                ret = 0;
-        }
-        ct_write_unlock(hash);
-        return ret;
-}
-/*
- *      UNhashes ip_vs_conn from ip_vs_conn_tab.
- *      returns bool success.
- */
-static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
-{
-        unsigned hash;
-        int ret;
-        /* unhash it and decrease its reference counter */
-        hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
-        ct_write_lock(hash);
-        if (cp->flags & IP_VS_CONN_F_HASHED) {
-                list_del(&cp->c_list);
-                cp->flags &= ~IP_VS_CONN_F_HASHED;
-                atomic_dec(&cp->refcnt);
-                ret = 1;
-        } else
-                ret = 0;
-        ct_write_unlock(hash);
-        return ret;
-}
-/*
- *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
- *  Called for pkts coming from OUTside-to-INside.
- *      s_addr, s_port: pkt source address (foreign host)
- *      d_addr, d_port: pkt dest address (load balancer)
- */
-static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
-{
-        unsigned hash;
-        struct ip_vs_conn *cp;
-        hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
-        ct_read_lock(hash);
-        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-                if (s_addr==cp->caddr && s_port==cp->cport &&
-                    d_port==cp->vport && d_addr==cp->vaddr &&
-                    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-                    protocol==cp->protocol) {
-                        /* HIT */
-                        atomic_inc(&cp->refcnt);
-                        ct_read_unlock(hash);
-                        return cp;
-                }
-        }
-        ct_read_unlock(hash);
-        return NULL;
-}
-struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
-{
-        struct ip_vs_conn *cp;
-        cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
-        if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
-                cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
-        IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-                  ip_vs_proto_name(protocol),
-                  NIPQUAD(s_addr), ntohs(s_port),
-                  NIPQUAD(d_addr), ntohs(d_port),
-                  cp?"hit":"not hit");
-        return cp;
-}
-/* Get reference to connection template */
-struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
-{
-        unsigned hash;
-        struct ip_vs_conn *cp;
-        hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
-        ct_read_lock(hash);
-        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-                if (s_addr==cp->caddr && s_port==cp->cport &&
-                    d_port==cp->vport && d_addr==cp->vaddr &&
-                    cp->flags & IP_VS_CONN_F_TEMPLATE &&
-                    protocol==cp->protocol) {
-                        /* HIT */
-                        atomic_inc(&cp->refcnt);
-                        goto out;
-                }
-        }
-        cp = NULL;
-  out:
-        ct_read_unlock(hash);
-        IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-                  ip_vs_proto_name(protocol),
-                  NIPQUAD(s_addr), ntohs(s_port),
-                  NIPQUAD(d_addr), ntohs(d_port),
-                  cp?"hit":"not hit");
-        return cp;
-}
-/*
- *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
- *  Called for pkts coming from inside-to-OUTside.
- *      s_addr, s_port: pkt source address (inside host)
- *      d_addr, d_port: pkt dest address (foreign host)
- */
-struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
-{
-        unsigned hash;
-        struct ip_vs_conn *cp, *ret=NULL;
-        /*
-         *      Check for "full" addressed entries
-         */
-        hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
-        ct_read_lock(hash);
-        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-                if (d_addr == cp->caddr && d_port == cp->cport &&
-                    s_port == cp->dport && s_addr == cp->daddr &&
-                    protocol == cp->protocol) {
-                        /* HIT */
-                        atomic_inc(&cp->refcnt);
-                        ret = cp;
-                        break;
-                }
-        }
-        ct_read_unlock(hash);
-        IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
-                  ip_vs_proto_name(protocol),
-                  NIPQUAD(s_addr), ntohs(s_port),
-                  NIPQUAD(d_addr), ntohs(d_port),
-                  ret?"hit":"not hit");
-        return ret;
-}
-/*
- *      Put back the conn and restart its timer with its timeout
- */
-void ip_vs_conn_put(struct ip_vs_conn *cp)
-{
-        /* reset it expire in its timeout */
-        mod_timer(&cp->timer, jiffies+cp->timeout);
-        __ip_vs_conn_put(cp);
-}
-/*
- *      Fill a no_client_port connection with a client port number
- */
-void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
-{
-        if (ip_vs_conn_unhash(cp)) {
-                spin_lock(&cp->lock);
-                if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
-                        atomic_dec(&ip_vs_conn_no_cport_cnt);
-                        cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
-                        cp->cport = cport;
-                }
-                spin_unlock(&cp->lock);
-                /* hash on new dport */
-                ip_vs_conn_hash(cp);
-        }
-}
-/*
- *      Bind a connection entry with the corresponding packet_xmit.
- *      Called by ip_vs_conn_new.
- */
-static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
-{
-        switch (IP_VS_FWD_METHOD(cp)) {
-        case IP_VS_CONN_F_MASQ:
-                cp->packet_xmit = ip_vs_nat_xmit;
-                break;
-        case IP_VS_CONN_F_TUNNEL:
-                cp->packet_xmit = ip_vs_tunnel_xmit;
-                break;
-        case IP_VS_CONN_F_DROUTE:
-                cp->packet_xmit = ip_vs_dr_xmit;
-                break;
-        case IP_VS_CONN_F_LOCALNODE:
-                cp->packet_xmit = ip_vs_null_xmit;
-                break;
-        case IP_VS_CONN_F_BYPASS:
-                cp->packet_xmit = ip_vs_bypass_xmit;
-                break;
-        }
-}
-static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
-{
-        return atomic_read(&dest->activeconns)
-                + atomic_read(&dest->inactconns);
-}
-/*
- *      Bind a connection entry with a virtual service destination
- *      Called just after a new connection entry is created.
- */
-static inline void
-ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
-{
-        /* if dest is NULL, then return directly */
-        if (!dest)
-                return;
-        /* Increase the refcnt counter of the dest */
-        atomic_inc(&dest->refcnt);
-        /* Bind with the destination and its corresponding transmitter */
-        if ((cp->flags & IP_VS_CONN_F_SYNC) &&
-            (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
-                /* if the connection is not template and is created
-                 * by sync, preserve the activity flag.
-                 */
-                cp->flags |= atomic_read(&dest->conn_flags) &
-                             (~IP_VS_CONN_F_INACTIVE);
-        else
-                cp->flags |= atomic_read(&dest->conn_flags);
-        cp->dest = dest;
-        IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-                  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
-                  "dest->refcnt:%d\n",
-                  ip_vs_proto_name(cp->protocol),
-                  NIPQUAD(cp->caddr), ntohs(cp->cport),
-                  NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                  NIPQUAD(cp->daddr), ntohs(cp->dport),
-                  ip_vs_fwd_tag(cp), cp->state,
-                  cp->flags, atomic_read(&cp->refcnt),
-                  atomic_read(&dest->refcnt));
-        /* Update the connection counters */
-        if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
-                /* It is a normal connection, so increase the inactive
-                   connection counter because it is in TCP SYNRECV
-                   state (inactive) or other protocol inacive state */
-                if ((cp->flags & IP_VS_CONN_F_SYNC) &&
-                    (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
-                        atomic_inc(&dest->activeconns);
-                else
-                        atomic_inc(&dest->inactconns);
-        } else {
-                /* It is a persistent connection/template, so increase
-                   the peristent connection counter */
-                atomic_inc(&dest->persistconns);
-        }
-        if (dest->u_threshold != 0 &&
-            ip_vs_dest_totalconns(dest) >= dest->u_threshold)
-                dest->flags |= IP_VS_DEST_F_OVERLOAD;
-}
-/*
- * Check if there is a destination for the connection, if so
- * bind the connection to the destination.
- */
-struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
-{
-        struct ip_vs_dest *dest;
-        if ((cp) && (!cp->dest)) {
-                dest = ip_vs_find_dest(cp->daddr, cp->dport,
-                                       cp->vaddr, cp->vport, cp->protocol);
-                ip_vs_bind_dest(cp, dest);
-                return dest;
-        } else
-                return NULL;
-}
-/*
- *      Unbind a connection entry with its VS destination
- *      Called by the ip_vs_conn_expire function.
- */
-static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
-{
-        struct ip_vs_dest *dest = cp->dest;
-        if (!dest)
-                return;
-        IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-                  "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
-                  "dest->refcnt:%d\n",
-                  ip_vs_proto_name(cp->protocol),
-                  NIPQUAD(cp->caddr), ntohs(cp->cport),
-                  NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                  NIPQUAD(cp->daddr), ntohs(cp->dport),
-                  ip_vs_fwd_tag(cp), cp->state,
-                  cp->flags, atomic_read(&cp->refcnt),
-                  atomic_read(&dest->refcnt));
-        /* Update the connection counters */
-        if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
-                /* It is a normal connection, so decrease the inactconns
-                   or activeconns counter */
-                if (cp->flags & IP_VS_CONN_F_INACTIVE) {
-                        atomic_dec(&dest->inactconns);
-                } else {
-                        atomic_dec(&dest->activeconns);
-                }
-        } else {
-                /* It is a persistent connection/template, so decrease
-                   the peristent connection counter */
-                atomic_dec(&dest->persistconns);
-        }
-        if (dest->l_threshold != 0) {
-                if (ip_vs_dest_totalconns(dest) < dest->l_threshold)
-                        dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
-        } else if (dest->u_threshold != 0) {
-                if (ip_vs_dest_totalconns(dest) * 4 < dest->u_threshold * 3)
-                        dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
-        } else {
-                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-                        dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
-        }
-        /*
-         * Simply decrease the refcnt of the dest, because the
-         * dest will be either in service's destination list
-         * or in the trash.
-         */
-        atomic_dec(&dest->refcnt);
-}
-/*
- *      Checking if the destination of a connection template is available.
- *      If available, return 1, otherwise invalidate this connection
- *      template and return 0.
- */
-int ip_vs_check_template(struct ip_vs_conn *ct)
-{
-        struct ip_vs_dest *dest = ct->dest;
-        /*
-         * Checking the dest server status.
-         */
-        if ((dest == NULL) ||
-            !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
-            (sysctl_ip_vs_expire_quiescent_template &&
-             (atomic_read(&dest->weight) == 0))) {
-                IP_VS_DBG(9, "check_template: dest not available for "
-                          "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
-                          "-> d:%u.%u.%u.%u:%d\n",
-                          ip_vs_proto_name(ct->protocol),
-                          NIPQUAD(ct->caddr), ntohs(ct->cport),
-                          NIPQUAD(ct->vaddr), ntohs(ct->vport),
-                          NIPQUAD(ct->daddr), ntohs(ct->dport));
-                /*
-                 * Invalidate the connection template
-                 */
-                if (ct->vport != htons(0xffff)) {
-                        if (ip_vs_conn_unhash(ct)) {
-                                ct->dport = htons(0xffff);
-                                ct->vport = htons(0xffff);
-                                ct->cport = 0;
-                                ip_vs_conn_hash(ct);
-                        }
-                }
-                /*
-                 * Simply decrease the refcnt of the template,
-                 * don't restart its timer.
-                 */
-                atomic_dec(&ct->refcnt);
-                return 0;
-        }
-        return 1;
-}
-static void ip_vs_conn_expire(unsigned long data)
-{
-        struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
-        cp->timeout = 60*HZ;
-        /*
-         *      hey, I'm using it
-         */
-        atomic_inc(&cp->refcnt);
-        /*
-         *      do I control anybody?
-         */
-        if (atomic_read(&cp->n_control))
-                goto expire_later;
-        /*
-         *      unhash it if it is hashed in the conn table
-         */
-        if (!ip_vs_conn_unhash(cp))
-                goto expire_later;
-        /*
-         *      refcnt==1 implies I'm the only one referrer
-         */
-        if (likely(atomic_read(&cp->refcnt) == 1)) {
-                /* delete the timer if it is activated by other users */
-                if (timer_pending(&cp->timer))
-                        del_timer(&cp->timer);
-                /* does anybody control me? */
-                if (cp->control)
-                        ip_vs_control_del(cp);
-                if (unlikely(cp->app != NULL))
-                        ip_vs_unbind_app(cp);
-                ip_vs_unbind_dest(cp);
-                if (cp->flags & IP_VS_CONN_F_NO_CPORT)
-                        atomic_dec(&ip_vs_conn_no_cport_cnt);
-                atomic_dec(&ip_vs_conn_count);
-                kmem_cache_free(ip_vs_conn_cachep, cp);
-                return;
-        }
-        /* hash it back to the table */
-        ip_vs_conn_hash(cp);
-  expire_later:
-        IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
-                  atomic_read(&cp->refcnt)-1,
-                  atomic_read(&cp->n_control));
-        ip_vs_conn_put(cp);
-}
-void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
-{
-        if (del_timer(&cp->timer))
-                mod_timer(&cp->timer, jiffies);
-}
-/*
- *      Create a new connection entry and hash it into the ip_vs_conn_tab
- */
-struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
-               __be32 daddr, __be16 dport, unsigned flags,
-               struct ip_vs_dest *dest)
-{
-        struct ip_vs_conn *cp;
-        struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
-        cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
-        if (cp == NULL) {
-                IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
-                return NULL;
-        }
-        INIT_LIST_HEAD(&cp->c_list);
-        setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
-        cp->protocol       = proto;
-        cp->caddr          = caddr;
-        cp->cport          = cport;
-        cp->vaddr          = vaddr;
-        cp->vport          = vport;
-        cp->daddr          = daddr;
-        cp->dport          = dport;
-        cp->flags          = flags;
-        spin_lock_init(&cp->lock);
-        /*
-         * Set the entry is referenced by the current thread before hashing
-         * it in the table, so that other thread run ip_vs_random_dropentry
-         * but cannot drop this entry.
-         */
-        atomic_set(&cp->refcnt, 1);
-        atomic_set(&cp->n_control, 0);
-        atomic_set(&cp->in_pkts, 0);
-        atomic_inc(&ip_vs_conn_count);
-        if (flags & IP_VS_CONN_F_NO_CPORT)
-                atomic_inc(&ip_vs_conn_no_cport_cnt);
-        /* Bind the connection with a destination server */
-        ip_vs_bind_dest(cp, dest);
-        /* Set its state and timeout */
-        cp->state = 0;
-        cp->timeout = 3*HZ;
-        /* Bind its packet transmitter */
-        ip_vs_bind_xmit(cp);
-        if (unlikely(pp && atomic_read(&pp->appcnt)))
-                ip_vs_bind_app(cp, pp);
-        /* Hash it in the ip_vs_conn_tab finally */
-        ip_vs_conn_hash(cp);
-        return cp;
-}
-/*
- *      /proc/net/ip_vs_conn entries
- */
-#ifdef CONFIG_PROC_FS
-static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
-{
-        int idx;
-        struct ip_vs_conn *cp;
-        for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
-                ct_read_lock_bh(idx);
-                list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-                        if (pos-- == 0) {
-                                seq->private = &ip_vs_conn_tab[idx];
-                                return cp;
-                        }
-                }
-                ct_read_unlock_bh(idx);
-        }
-        return NULL;
-}
-static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
-{
-        seq->private = NULL;
-        return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
-}
-static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-        struct ip_vs_conn *cp = v;
-        struct list_head *e, *l = seq->private;
-        int idx;
-        ++*pos;
-        if (v == SEQ_START_TOKEN)
-                return ip_vs_conn_array(seq, 0);
-        /* more on same hash chain? */
-        if ((e = cp->c_list.next) != l)
-                return list_entry(e, struct ip_vs_conn, c_list);
-        idx = l - ip_vs_conn_tab;
-        ct_read_unlock_bh(idx);
-        while (++idx < IP_VS_CONN_TAB_SIZE) {
-                ct_read_lock_bh(idx);
-                list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-                        seq->private = &ip_vs_conn_tab[idx];
-                        return cp;
-                }
-                ct_read_unlock_bh(idx);
-        }
-        seq->private = NULL;
-        return NULL;
-}
-static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
-{
-        struct list_head *l = seq->private;
-        if (l)
-                ct_read_unlock_bh(l - ip_vs_conn_tab);
-}
-static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
-{
-        if (v == SEQ_START_TOKEN)
-                seq_puts(seq,
-   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires\n");
-        else {
-                const struct ip_vs_conn *cp = v;
-                seq_printf(seq,
-                        "%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n",
-                                ip_vs_proto_name(cp->protocol),
-                                ntohl(cp->caddr), ntohs(cp->cport),
-                                ntohl(cp->vaddr), ntohs(cp->vport),
-                                ntohl(cp->daddr), ntohs(cp->dport),
-                                ip_vs_state_name(cp->protocol, cp->state),
-                                (cp->timer.expires-jiffies)/HZ);
-        }
-        return 0;
-}
-static const struct seq_operations ip_vs_conn_seq_ops = {
-        .start = ip_vs_conn_seq_start,
-        .next  = ip_vs_conn_seq_next,
-        .stop  = ip_vs_conn_seq_stop,
-        .show  = ip_vs_conn_seq_show,
-};
-static int ip_vs_conn_open(struct inode *inode, struct file *file)
-{
-        return seq_open(file, &ip_vs_conn_seq_ops);
-}
-static const struct file_operations ip_vs_conn_fops = {
-        .owner   = THIS_MODULE,
-        .open    = ip_vs_conn_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release,
-};
-static const char *ip_vs_origin_name(unsigned flags)
-{
-        if (flags & IP_VS_CONN_F_SYNC)
-                return "SYNC";
-        else
-                return "LOCAL";
-}
-static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
-{
-        if (v == SEQ_START_TOKEN)
-                seq_puts(seq,
-   "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
-        else {
-                const struct ip_vs_conn *cp = v;
-                seq_printf(seq,
-                        "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
-                                ip_vs_proto_name(cp->protocol),
-                                ntohl(cp->caddr), ntohs(cp->cport),
-                                ntohl(cp->vaddr), ntohs(cp->vport),
-                                ntohl(cp->daddr), ntohs(cp->dport),
-                                ip_vs_state_name(cp->protocol, cp->state),
-                                ip_vs_origin_name(cp->flags),
-                                (cp->timer.expires-jiffies)/HZ);
-        }
-        return 0;
-}
-static const struct seq_operations ip_vs_conn_sync_seq_ops = {
-        .start = ip_vs_conn_seq_start,
-        .next  = ip_vs_conn_seq_next,
-        .stop  = ip_vs_conn_seq_stop,
-        .show  = ip_vs_conn_sync_seq_show,
-};
-static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
-{
-        return seq_open(file, &ip_vs_conn_sync_seq_ops);
-}
-static const struct file_operations ip_vs_conn_sync_fops = {
-        .owner   = THIS_MODULE,
-        .open    = ip_vs_conn_sync_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release,
-};
-#endif
-/*
- *      Randomly drop connection entries before running out of memory
- */
-static inline int todrop_entry(struct ip_vs_conn *cp)
-{
-        /*
-         * The drop rate array needs tuning for real environments.
-         * Called from timer bh only => no locking
-         */
-        static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
-        static char todrop_counter[9] = {0};
-        int i;
-        /* if the conn entry hasn't lasted for 60 seconds, don't drop it.
-           This will leave enough time for normal connection to get
-           through. */
-        if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ))
-                return 0;
-        /* Don't drop the entry if its number of incoming packets is not
-           located in [0, 8] */
-        i = atomic_read(&cp->in_pkts);
-        if (i > 8 || i < 0) return 0;
-        if (!todrop_rate[i]) return 0;
-        if (--todrop_counter[i] > 0) return 0;
-        todrop_counter[i] = todrop_rate[i];
-        return 1;
-}
-/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
-{
-        int idx;
-        struct ip_vs_conn *cp;
-        /*
-         * Randomly scan 1/32 of the whole table every second
-         */
-        for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
-                unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
-                /*
-                 *  Lock is actually needed in this loop.
-                 */
-                ct_write_lock_bh(hash);
-                list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-                        if (cp->flags & IP_VS_CONN_F_TEMPLATE)
-                                /* connection template */
-                                continue;
-                        if (cp->protocol == IPPROTO_TCP) {
-                                switch(cp->state) {
-                                case IP_VS_TCP_S_SYN_RECV:
-                                case IP_VS_TCP_S_SYNACK:
-                                        break;
-                                case IP_VS_TCP_S_ESTABLISHED:
-                                        if (todrop_entry(cp))
-                                                break;
-                                        continue;
-                                default:
-                                        continue;
-                                }
-                        } else {
-                                if (!todrop_entry(cp))
-                                        continue;
-                        }
-                        IP_VS_DBG(4, "del connection\n");
-                        ip_vs_conn_expire_now(cp);
-                        if (cp->control) {
-                                IP_VS_DBG(4, "del conn template\n");
-                                ip_vs_conn_expire_now(cp->control);
-                        }
-                }
-                ct_write_unlock_bh(hash);
-        }
-}
-/*
- *      Flush all the connection entries in the ip_vs_conn_tab
- */
-static void ip_vs_conn_flush(void)
-{
-        int idx;
-        struct ip_vs_conn *cp;
-  flush_again:
-        for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
-                /*
-                 *  Lock is actually needed in this loop.
-                 */
-                ct_write_lock_bh(idx);
-                list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-                        IP_VS_DBG(4, "del connection\n");
-                        ip_vs_conn_expire_now(cp);
-                        if (cp->control) {
-                                IP_VS_DBG(4, "del conn template\n");
-                                ip_vs_conn_expire_now(cp->control);
-                        }
-                }
-                ct_write_unlock_bh(idx);
-        }
-        /* the counter may be not NULL, because maybe some conn entries
-           are run by slow timer handler or unhashed but still referred */
-        if (atomic_read(&ip_vs_conn_count) != 0) {
-                schedule();
-                goto flush_again;
-        }
-}
-int __init ip_vs_conn_init(void)
-{
-        int idx;
-        /*
-         * Allocate the connection hash table and initialize its list heads
-         */
-        ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
-        if (!ip_vs_conn_tab)
-                return -ENOMEM;
-        /* Allocate ip_vs_conn slab cache */
-        ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
-                                              sizeof(struct ip_vs_conn), 0,
-                                              SLAB_HWCACHE_ALIGN, NULL);
-        if (!ip_vs_conn_cachep) {
-                vfree(ip_vs_conn_tab);
-                return -ENOMEM;
-        }
-        IP_VS_INFO("Connection hash table configured "
-                   "(size=%d, memory=%ldKbytes)\n",
-                   IP_VS_CONN_TAB_SIZE,
-                   (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
-        IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
-                  sizeof(struct ip_vs_conn));
-        for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
-                INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
-        }
-        for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  {
-                rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
-        }
-        proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-        proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
-        /* calculate the random value for connection hash */
-        get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
-        return 0;
-}
-void ip_vs_conn_cleanup(void)
-{
-        /* flush all the connection entries first */
-        ip_vs_conn_flush();
-        /* Release the empty cache */
-        kmem_cache_destroy(ip_vs_conn_cachep);
-        proc_net_remove(&init_net, "ip_vs_conn");
-        proc_net_remove(&init_net, "ip_vs_conn_sync");
-        vfree(ip_vs_conn_tab);
-}
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
deleted file mode 100644
index a7879eafc3b5..000000000000
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ /dev/null
@@ -1,1125 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the Netfilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * The IPVS code for kernel 2.2 was done by Wensong Zhang and Peter Kese,
- * with changes/fixes from Julian Anastasov, Lars Marowsky-Bree, Horms
- * and others.
- *
- * Changes:
- *      Paul `Rusty' Russell            properly handle non-linear skbs
- *      Harald Welte                    don't use nfcache
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-#include <net/icmp.h>                   /* for icmp_send */
-#include <net/route.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ip_vs.h>
-EXPORT_SYMBOL(register_ip_vs_scheduler);
-EXPORT_SYMBOL(unregister_ip_vs_scheduler);
-EXPORT_SYMBOL(ip_vs_skb_replace);
-EXPORT_SYMBOL(ip_vs_proto_name);
-EXPORT_SYMBOL(ip_vs_conn_new);
-EXPORT_SYMBOL(ip_vs_conn_in_get);
-EXPORT_SYMBOL(ip_vs_conn_out_get);
-#ifdef CONFIG_IP_VS_PROTO_TCP
-EXPORT_SYMBOL(ip_vs_tcp_conn_listen);
-#endif
-EXPORT_SYMBOL(ip_vs_conn_put);
-#ifdef CONFIG_IP_VS_DEBUG
-EXPORT_SYMBOL(ip_vs_get_debug_level);
-#endif
-/* ID used in ICMP lookups */
-#define icmp_id(icmph)          (((icmph)->un).echo.id)
-const char *ip_vs_proto_name(unsigned proto)
-{
-        static char buf[20];
-        switch (proto) {
-        case IPPROTO_IP:
-                return "IP";
-        case IPPROTO_UDP:
-                return "UDP";
-        case IPPROTO_TCP:
-                return "TCP";
-        case IPPROTO_ICMP:
-                return "ICMP";
-        default:
-                sprintf(buf, "IP_%d", proto);
-                return buf;
-        }
-}
-void ip_vs_init_hash_table(struct list_head *table, int rows)
-{
-        while (--rows >= 0)
-                INIT_LIST_HEAD(&table[rows]);
-}
-static inline void
-ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest = cp->dest;
-        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-                spin_lock(&dest->stats.lock);
-                dest->stats.inpkts++;
-                dest->stats.inbytes += skb->len;
-                spin_unlock(&dest->stats.lock);
-                spin_lock(&dest->svc->stats.lock);
-                dest->svc->stats.inpkts++;
-                dest->svc->stats.inbytes += skb->len;
-                spin_unlock(&dest->svc->stats.lock);
-                spin_lock(&ip_vs_stats.lock);
-                ip_vs_stats.inpkts++;
-                ip_vs_stats.inbytes += skb->len;
-                spin_unlock(&ip_vs_stats.lock);
-        }
-}
-static inline void
-ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest = cp->dest;
-        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-                spin_lock(&dest->stats.lock);
-                dest->stats.outpkts++;
-                dest->stats.outbytes += skb->len;
-                spin_unlock(&dest->stats.lock);
-                spin_lock(&dest->svc->stats.lock);
-                dest->svc->stats.outpkts++;
-                dest->svc->stats.outbytes += skb->len;
-                spin_unlock(&dest->svc->stats.lock);
-                spin_lock(&ip_vs_stats.lock);
-                ip_vs_stats.outpkts++;
-                ip_vs_stats.outbytes += skb->len;
-                spin_unlock(&ip_vs_stats.lock);
-        }
-}
-static inline void
-ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
-{
-        spin_lock(&cp->dest->stats.lock);
-        cp->dest->stats.conns++;
-        spin_unlock(&cp->dest->stats.lock);
-        spin_lock(&svc->stats.lock);
-        svc->stats.conns++;
-        spin_unlock(&svc->stats.lock);
-        spin_lock(&ip_vs_stats.lock);
-        ip_vs_stats.conns++;
-        spin_unlock(&ip_vs_stats.lock);
-}
-static inline int
-ip_vs_set_state(struct ip_vs_conn *cp, int direction,
-                const struct sk_buff *skb,
-                struct ip_vs_protocol *pp)
-{
-        if (unlikely(!pp->state_transition))
-                return 0;
-        return pp->state_transition(cp, direction, skb, pp);
-}
-/*
- *  IPVS persistent scheduling function
- *  It creates a connection entry according to its template if exists,
- *  or selects a server and creates a connection entry plus a template.
- *  Locking: we are svc user (svc->refcnt), so we hold all dests too
- *  Protocols supported: TCP, UDP
- */
-static struct ip_vs_conn *
-ip_vs_sched_persist(struct ip_vs_service *svc,
-                    const struct sk_buff *skb,
-                    __be16 ports[2])
-{
-        struct ip_vs_conn *cp = NULL;
-        struct iphdr *iph = ip_hdr(skb);
-        struct ip_vs_dest *dest;
-        struct ip_vs_conn *ct;
-        __be16  dport;   /* destination port to forward */
-        __be32  snet;    /* source network of the client, after masking */
-        /* Mask saddr with the netmask to adjust template granularity */
-        snet = iph->saddr & svc->netmask;
-        IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u "
-                  "mnet %u.%u.%u.%u\n",
-                  NIPQUAD(iph->saddr), ntohs(ports[0]),
-                  NIPQUAD(iph->daddr), ntohs(ports[1]),
-                  NIPQUAD(snet));
-        /*
-         * As far as we know, FTP is a very complicated network protocol, and
-         * it uses control connection and data connections. For active FTP,
-         * FTP server initialize data connection to the client, its source port
-         * is often 20. For passive FTP, FTP server tells the clients the port
-         * that it passively listens to,  and the client issues the data
-         * connection. In the tunneling or direct routing mode, the load
-         * balancer is on the client-to-server half of connection, the port
-         * number is unknown to the load balancer. So, a conn template like
-         * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
-         * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
-         * is created for other persistent services.
-         */
-        if (ports[1] == svc->port) {
-                /* Check if a template already exists */
-                if (svc->port != FTPPORT)
-                        ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
-                                               iph->daddr, ports[1]);
-                else
-                        ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
-                                               iph->daddr, 0);
-                if (!ct || !ip_vs_check_template(ct)) {
-                        /*
-                         * No template found or the dest of the connection
-                         * template is not available.
-                         */
-                        dest = svc->scheduler->schedule(svc, skb);
-                        if (dest == NULL) {
-                                IP_VS_DBG(1, "p-schedule: no dest found.\n");
-                                return NULL;
-                        }
-                        /*
-                         * Create a template like <protocol,caddr,0,
-                         * vaddr,vport,daddr,dport> for non-ftp service,
-                         * and <protocol,caddr,0,vaddr,0,daddr,0>
-                         * for ftp service.
-                         */
-                        if (svc->port != FTPPORT)
-                                ct = ip_vs_conn_new(iph->protocol,
-                                                    snet, 0,
-                                                    iph->daddr,
-                                                    ports[1],
-                                                    dest->addr, dest->port,
-                                                    IP_VS_CONN_F_TEMPLATE,
-                                                    dest);
-                        else
-                                ct = ip_vs_conn_new(iph->protocol,
-                                                    snet, 0,
-                                                    iph->daddr, 0,
-                                                    dest->addr, 0,
-                                                    IP_VS_CONN_F_TEMPLATE,
-                                                    dest);
-                        if (ct == NULL)
-                                return NULL;
-                        ct->timeout = svc->timeout;
-                } else {
-                        /* set destination with the found template */
-                        dest = ct->dest;
-                }
-                dport = dest->port;
-        } else {
-                /*
-                 * Note: persistent fwmark-based services and persistent
-                 * port zero service are handled here.
-                 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
-                 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
-                 */
-                if (svc->fwmark)
-                        ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
-                                               htonl(svc->fwmark), 0);
-                else
-                        ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
-                                               iph->daddr, 0);
-                if (!ct || !ip_vs_check_template(ct)) {
-                        /*
-                         * If it is not persistent port zero, return NULL,
-                         * otherwise create a connection template.
-                         */
-                        if (svc->port)
-                                return NULL;
-                        dest = svc->scheduler->schedule(svc, skb);
-                        if (dest == NULL) {
-                                IP_VS_DBG(1, "p-schedule: no dest found.\n");
-                                return NULL;
-                        }
-                        /*
-                         * Create a template according to the service
-                         */
-                        if (svc->fwmark)
-                                ct = ip_vs_conn_new(IPPROTO_IP,
-                                                    snet, 0,
-                                                    htonl(svc->fwmark), 0,
-                                                    dest->addr, 0,
-                                                    IP_VS_CONN_F_TEMPLATE,
-                                                    dest);
-                        else
-                                ct = ip_vs_conn_new(iph->protocol,
-                                                    snet, 0,
-                                                    iph->daddr, 0,
-                                                    dest->addr, 0,
-                                                    IP_VS_CONN_F_TEMPLATE,
-                                                    dest);
-                        if (ct == NULL)
-                                return NULL;
-                        ct->timeout = svc->timeout;
-                } else {
-                        /* set destination with the found template */
-                        dest = ct->dest;
-                }
-                dport = ports[1];
-        }
-        /*
-         *    Create a new connection according to the template
-         */
-        cp = ip_vs_conn_new(iph->protocol,
-                            iph->saddr, ports[0],
-                            iph->daddr, ports[1],
-                            dest->addr, dport,
-                            0,
-                            dest);
-        if (cp == NULL) {
-                ip_vs_conn_put(ct);
-                return NULL;
-        }
-        /*
-         *    Add its control
-         */
-        ip_vs_control_add(cp, ct);
-        ip_vs_conn_put(ct);
-        ip_vs_conn_stats(cp, svc);
-        return cp;
-}
-/*
- *  IPVS main scheduling function
- *  It selects a server according to the virtual service, and
- *  creates a connection entry.
- *  Protocols supported: TCP, UDP
- */
-struct ip_vs_conn *
-ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_conn *cp = NULL;
-        struct iphdr *iph = ip_hdr(skb);
-        struct ip_vs_dest *dest;
-        __be16 _ports[2], *pptr;
-        pptr = skb_header_pointer(skb, iph->ihl*4,
-                                  sizeof(_ports), _ports);
-        if (pptr == NULL)
-                return NULL;
-        /*
-         *    Persistent service
-         */
-        if (svc->flags & IP_VS_SVC_F_PERSISTENT)
-                return ip_vs_sched_persist(svc, skb, pptr);
-        /*
-         *    Non-persistent service
-         */
-        if (!svc->fwmark && pptr[1] != svc->port) {
-                if (!svc->port)
-                        IP_VS_ERR("Schedule: port zero only supported "
-                                  "in persistent services, "
-                                  "check your ipvs configuration\n");
-                return NULL;
-        }
-        dest = svc->scheduler->schedule(svc, skb);
-        if (dest == NULL) {
-                IP_VS_DBG(1, "Schedule: no dest found.\n");
-                return NULL;
-        }
-        /*
-         *    Create a connection entry.
-         */
-        cp = ip_vs_conn_new(iph->protocol,
-                            iph->saddr, pptr[0],
-                            iph->daddr, pptr[1],
-                            dest->addr, dest->port?dest->port:pptr[1],
-                            0,
-                            dest);
-        if (cp == NULL)
-                return NULL;
-        IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
-                  "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
-                  ip_vs_fwd_tag(cp),
-                  NIPQUAD(cp->caddr), ntohs(cp->cport),
-                  NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                  NIPQUAD(cp->daddr), ntohs(cp->dport),
-                  cp->flags, atomic_read(&cp->refcnt));
-        ip_vs_conn_stats(cp, svc);
-        return cp;
-}
-/*
- *  Pass or drop the packet.
- *  Called by ip_vs_in, when the virtual service is available but
- *  no destination is available for a new connection.
- */
-int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-                struct ip_vs_protocol *pp)
-{
-        __be16 _ports[2], *pptr;
-        struct iphdr *iph = ip_hdr(skb);
-        pptr = skb_header_pointer(skb, iph->ihl*4,
-                                  sizeof(_ports), _ports);
-        if (pptr == NULL) {
-                ip_vs_service_put(svc);
-                return NF_DROP;
-        }
-        /* if it is fwmark-based service, the cache_bypass sysctl is up
-           and the destination is RTN_UNICAST (and not local), then create
-           a cache_bypass connection entry */
-        if (sysctl_ip_vs_cache_bypass && svc->fwmark
-            && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
-                int ret, cs;
-                struct ip_vs_conn *cp;
-                ip_vs_service_put(svc);
-                /* create a new connection entry */
-                IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
-                cp = ip_vs_conn_new(iph->protocol,
-                                    iph->saddr, pptr[0],
-                                    iph->daddr, pptr[1],
-                                    0, 0,
-                                    IP_VS_CONN_F_BYPASS,
-                                    NULL);
-                if (cp == NULL)
-                        return NF_DROP;
-                /* statistics */
-                ip_vs_in_stats(cp, skb);
-                /* set state */
-                cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
-                /* transmit the first SYN packet */
-                ret = cp->packet_xmit(skb, cp, pp);
-                /* do not touch skb anymore */
-                atomic_inc(&cp->in_pkts);
-                ip_vs_conn_put(cp);
-                return ret;
-        }
-        /*
-         * When the virtual ftp service is presented, packets destined
-         * for other services on the VIP may get here (except services
-         * listed in the ipvs table), pass the packets, because it is
-         * not ipvs job to decide to drop the packets.
-         */
-        if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
-                ip_vs_service_put(svc);
-                return NF_ACCEPT;
-        }
-        ip_vs_service_put(svc);
-        /*
-         * Notify the client that the destination is unreachable, and
-         * release the socket buffer.
-         * Since it is in IP layer, the TCP socket is not actually
-         * created, the TCP RST packet cannot be sent, instead that
-         * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
-         */
-        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-        return NF_DROP;
-}
-/*
- *      It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
- *      chain, and is used for VS/NAT.
- *      It detects packets for VS/NAT connections and sends the packets
- *      immediately. This can avoid that iptable_nat mangles the packets
- *      for VS/NAT.
- */
-static unsigned int ip_vs_post_routing(unsigned int hooknum,
-                                       struct sk_buff *skb,
-                                       const struct net_device *in,
-                                       const struct net_device *out,
-                                       int (*okfn)(struct sk_buff *))
-{
-        if (!skb->ipvs_property)
-                return NF_ACCEPT;
-        /* The packet was sent from IPVS, exit this chain */
-        return NF_STOP;
-}
-__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
-{
-        return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
-}
-static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
-        int err = ip_defrag(skb, user);
-        if (!err)
-                ip_send_check(ip_hdr(skb));
-        return err;
-}
-/*
- * Packet has been made sufficiently writable in caller
- * - inout: 1=in->out, 0=out->in
- */
-void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
-                    struct ip_vs_conn *cp, int inout)
-{
-        struct iphdr *iph        = ip_hdr(skb);
-        unsigned int icmp_offset = iph->ihl*4;
-        struct icmphdr *icmph    = (struct icmphdr *)(skb_network_header(skb) +
-                                                      icmp_offset);
-        struct iphdr *ciph       = (struct iphdr *)(icmph + 1);
-        if (inout) {
-                iph->saddr = cp->vaddr;
-                ip_send_check(iph);
-                ciph->daddr = cp->vaddr;
-                ip_send_check(ciph);
-        } else {
-                iph->daddr = cp->daddr;
-                ip_send_check(iph);
-                ciph->saddr = cp->daddr;
-                ip_send_check(ciph);
-        }
-        /* the TCP/UDP port */
-        if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
-                __be16 *ports = (void *)ciph + ciph->ihl*4;
-                if (inout)
-                        ports[1] = cp->vport;
-                else
-                        ports[0] = cp->dport;
-        }
-        /* And finally the ICMP checksum */
-        icmph->checksum = 0;
-        icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset);
-        skb->ip_summed = CHECKSUM_UNNECESSARY;
-        if (inout)
-                IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
-                        "Forwarding altered outgoing ICMP");
-        else
-                IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph,
-                        "Forwarding altered incoming ICMP");
-}
-/*
- *      Handle ICMP messages in the inside-to-outside direction (outgoing).
- *      Find any that might be relevant, check against existing connections,
- *      forward to the right destination host if relevant.
- *      Currently handles error types - unreachable, quench, ttl exceeded.
- *      (Only used in VS/NAT)
- */
-static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
-{
-        struct iphdr *iph;
-        struct icmphdr  _icmph, *ic;
-        struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
-        struct ip_vs_conn *cp;
-        struct ip_vs_protocol *pp;
-        unsigned int offset, ihl, verdict;
-        *related = 1;
-        /* reassemble IP fragments */
-        if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-                if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
-                        return NF_STOLEN;
-        }
-        iph = ip_hdr(skb);
-        offset = ihl = iph->ihl * 4;
-        ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
-        if (ic == NULL)
-                return NF_DROP;
-        IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
-                  ic->type, ntohs(icmp_id(ic)),
-                  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
-        /*
-         * Work through seeing if this is for us.
-         * These checks are supposed to be in an order that means easy
-         * things are checked first to speed up processing.... however
-         * this means that some packets will manage to get a long way
-         * down this stack and then be rejected, but that's life.
-         */
-        if ((ic->type != ICMP_DEST_UNREACH) &&
-            (ic->type != ICMP_SOURCE_QUENCH) &&
-            (ic->type != ICMP_TIME_EXCEEDED)) {
-                *related = 0;
-                return NF_ACCEPT;
-        }
-        /* Now find the contained IP header */
-        offset += sizeof(_icmph);
-        cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
-        if (cih == NULL)
-                return NF_ACCEPT; /* The packet looks wrong, ignore */
-        pp = ip_vs_proto_get(cih->protocol);
-        if (!pp)
-                return NF_ACCEPT;
-        /* Is the embedded protocol header present? */
-        if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
-                     pp->dont_defrag))
-                return NF_ACCEPT;
-        IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");
-        offset += cih->ihl * 4;
-        /* The embedded headers contain source and dest in reverse order */
-        cp = pp->conn_out_get(skb, pp, cih, offset, 1);
-        if (!cp)
-                return NF_ACCEPT;
-        verdict = NF_DROP;
-        if (IP_VS_FWD_METHOD(cp) != 0) {
-                IP_VS_ERR("shouldn't reach here, because the box is on the "
-                          "half connection in the tun/dr module.\n");
-        }
-        /* Ensure the checksum is correct */
-        if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
-                /* Failed checksum! */
-                IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
-                          NIPQUAD(iph->saddr));
-                goto out;
-        }
-        if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
-                offset += 2 * sizeof(__u16);
-        if (!skb_make_writable(skb, offset))
-                goto out;
-        ip_vs_nat_icmp(skb, pp, cp, 1);
-        /* do the statistics and put it back */
-        ip_vs_out_stats(cp, skb);
-        skb->ipvs_property = 1;
-        verdict = NF_ACCEPT;
-  out:
-        __ip_vs_conn_put(cp);
-        return verdict;
-}
-static inline int is_tcp_reset(const struct sk_buff *skb)
-{
-        struct tcphdr _tcph, *th;
-        th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
-        if (th == NULL)
-                return 0;
-        return th->rst;
-}
-/*
- *      It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
- *      Check if outgoing packet belongs to the established ip_vs_conn,
- *      rewrite addresses of the packet and send it on its way...
- */
-static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
-          const struct net_device *in, const struct net_device *out,
-          int (*okfn)(struct sk_buff *))
-{
-        struct iphdr    *iph;
-        struct ip_vs_protocol *pp;
-        struct ip_vs_conn *cp;
-        int ihl;
-        EnterFunction(11);
-        if (skb->ipvs_property)
-                return NF_ACCEPT;
-        iph = ip_hdr(skb);
-        if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-                int related, verdict = ip_vs_out_icmp(skb, &related);
-                if (related)
-                        return verdict;
-                iph = ip_hdr(skb);
-        }
-        pp = ip_vs_proto_get(iph->protocol);
-        if (unlikely(!pp))
-                return NF_ACCEPT;
-        /* reassemble IP fragments */
-        if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
-                     !pp->dont_defrag)) {
-                if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
-                        return NF_STOLEN;
-                iph = ip_hdr(skb);
-        }
-        ihl = iph->ihl << 2;
-        /*
-         * Check if the packet belongs to an existing entry
-         */
-        cp = pp->conn_out_get(skb, pp, iph, ihl, 0);
-        if (unlikely(!cp)) {
-                if (sysctl_ip_vs_nat_icmp_send &&
-                    (pp->protocol == IPPROTO_TCP ||
-                     pp->protocol == IPPROTO_UDP)) {
-                        __be16 _ports[2], *pptr;
-                        pptr = skb_header_pointer(skb, ihl,
-                                                  sizeof(_ports), _ports);
-                        if (pptr == NULL)
-                                return NF_ACCEPT;       /* Not for me */
-                        if (ip_vs_lookup_real_service(iph->protocol,
-                                                      iph->saddr, pptr[0])) {
-                                /*
-                                 * Notify the real server: there is no
-                                 * existing entry if it is not RST
-                                 * packet or not TCP packet.
-                                 */
-                                if (iph->protocol != IPPROTO_TCP
-                                    || !is_tcp_reset(skb)) {
-                                        icmp_send(skb,ICMP_DEST_UNREACH,
-                                                  ICMP_PORT_UNREACH, 0);
-                                        return NF_DROP;
-                                }
-                        }
-                }
-                IP_VS_DBG_PKT(12, pp, skb, 0,
-                              "packet continues traversal as normal");
-                return NF_ACCEPT;
-        }
-        IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
-        if (!skb_make_writable(skb, ihl))
-                goto drop;
-        /* mangle the packet */
-        if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
-                goto drop;
-        ip_hdr(skb)->saddr = cp->vaddr;
-        ip_send_check(ip_hdr(skb));
-        /* For policy routing, packets originating from this
-         * machine itself may be routed differently to packets
-         * passing through.  We want this packet to be routed as
-         * if it came from this machine itself.  So re-compute
-         * the routing information.
-         */
-        if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
-                goto drop;
-        IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
-        ip_vs_out_stats(cp, skb);
-        ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
-        ip_vs_conn_put(cp);
-        skb->ipvs_property = 1;
-        LeaveFunction(11);
-        return NF_ACCEPT;
-  drop:
-        ip_vs_conn_put(cp);
-        kfree_skb(skb);
-        return NF_STOLEN;
-}
-/*
- *      Handle ICMP messages in the outside-to-inside direction (incoming).
- *      Find any that might be relevant, check against existing connections,
- *      forward to the right destination host if relevant.
- *      Currently handles error types - unreachable, quench, ttl exceeded.
- */
-static int
-ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
-{
-        struct iphdr *iph;
-        struct icmphdr  _icmph, *ic;
-        struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
-        struct ip_vs_conn *cp;
-        struct ip_vs_protocol *pp;
-        unsigned int offset, ihl, verdict;
-        *related = 1;
-        /* reassemble IP fragments */
-        if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-                if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
-                                            IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
-                        return NF_STOLEN;
-        }
-        iph = ip_hdr(skb);
-        offset = ihl = iph->ihl * 4;
-        ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
-        if (ic == NULL)
-                return NF_DROP;
-        IP_VS_DBG(12, "Incoming ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
-                  ic->type, ntohs(icmp_id(ic)),
-                  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
-        /*
-         * Work through seeing if this is for us.
-         * These checks are supposed to be in an order that means easy
-         * things are checked first to speed up processing.... however
-         * this means that some packets will manage to get a long way
-         * down this stack and then be rejected, but that's life.
-         */
-        if ((ic->type != ICMP_DEST_UNREACH) &&
-            (ic->type != ICMP_SOURCE_QUENCH) &&
-            (ic->type != ICMP_TIME_EXCEEDED)) {
-                *related = 0;
-                return NF_ACCEPT;
-        }
-        /* Now find the contained IP header */
-        offset += sizeof(_icmph);
-        cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
-        if (cih == NULL)
-                return NF_ACCEPT; /* The packet looks wrong, ignore */
-        pp = ip_vs_proto_get(cih->protocol);
-        if (!pp)
-                return NF_ACCEPT;
-        /* Is the embedded protocol header present? */
-        if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
-                     pp->dont_defrag))
-                return NF_ACCEPT;
-        IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for");
-        offset += cih->ihl * 4;
-        /* The embedded headers contain source and dest in reverse order */
-        cp = pp->conn_in_get(skb, pp, cih, offset, 1);
-        if (!cp)
-                return NF_ACCEPT;
-        verdict = NF_DROP;
-        /* Ensure the checksum is correct */
-        if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
-                /* Failed checksum! */
-                IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
-                          NIPQUAD(iph->saddr));
-                goto out;
-        }
-        /* do the statistics and put it back */
-        ip_vs_in_stats(cp, skb);
-        if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
-                offset += 2 * sizeof(__u16);
-        verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
-        /* do not touch skb anymore */
-  out:
-        __ip_vs_conn_put(cp);
-        return verdict;
-}
-/*
- *      Check if it's for virtual services, look it up,
- *      and send it on its way...
- */
-static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
-         const struct net_device *in, const struct net_device *out,
-         int (*okfn)(struct sk_buff *))
-{
-        struct iphdr    *iph;
-        struct ip_vs_protocol *pp;
-        struct ip_vs_conn *cp;
-        int ret, restart;
-        int ihl;
-        /*
-         *      Big tappo: only PACKET_HOST (neither loopback nor mcasts)
-         *      ... don't know why 1st test DOES NOT include 2nd (?)
-         */
-        if (unlikely(skb->pkt_type != PACKET_HOST
-                     || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
-                IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
-                          skb->pkt_type,
-                          ip_hdr(skb)->protocol,
-                          NIPQUAD(ip_hdr(skb)->daddr));
-                return NF_ACCEPT;
-        }
-        iph = ip_hdr(skb);
-        if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-                int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
-                if (related)
-                        return verdict;
-                iph = ip_hdr(skb);
-        }
-        /* Protocol supported? */
-        pp = ip_vs_proto_get(iph->protocol);
-        if (unlikely(!pp))
-                return NF_ACCEPT;
-        ihl = iph->ihl << 2;
-        /*
-         * Check if the packet belongs to an existing connection entry
-         */
-        cp = pp->conn_in_get(skb, pp, iph, ihl, 0);
-        if (unlikely(!cp)) {
-                int v;
-                if (!pp->conn_schedule(skb, pp, &v, &cp))
-                        return v;
-        }
-        if (unlikely(!cp)) {
-                /* sorry, all this trouble for a no-hit :) */
-                IP_VS_DBG_PKT(12, pp, skb, 0,
-                              "packet continues traversal as normal");
-                return NF_ACCEPT;
-        }
-        IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
-        /* Check the server status */
-        if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-                /* the destination server is not available */
-                if (sysctl_ip_vs_expire_nodest_conn) {
-                        /* try to expire the connection immediately */
-                        ip_vs_conn_expire_now(cp);
-                }
-                /* don't restart its timer, and silently
-                   drop the packet. */
-                __ip_vs_conn_put(cp);
-                return NF_DROP;
-        }
-        ip_vs_in_stats(cp, skb);
-        restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
-        if (cp->packet_xmit)
-                ret = cp->packet_xmit(skb, cp, pp);
-                /* do not touch skb anymore */
-        else {
-                IP_VS_DBG_RL("warning: packet_xmit is null");
-                ret = NF_ACCEPT;
-        }
-        /* Increase its packet counter and check if it is needed
-         * to be synchronized
-         *
-         * Sync connection if it is about to close to
-         * encorage the standby servers to update the connections timeout
-         */
-        atomic_inc(&cp->in_pkts);
-        if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
-            (((cp->protocol != IPPROTO_TCP ||
-               cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-              (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
-               == sysctl_ip_vs_sync_threshold[0])) ||
-             ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
-              ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
-               (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
-               (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-                ip_vs_sync_conn(cp);
-        cp->old_state = cp->state;
-        ip_vs_conn_put(cp);
-        return ret;
-}
-/*
- *      It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
- *      related packets destined for 0.0.0.0/0.
- *      When fwmark-based virtual service is used, such as transparent
- *      cache cluster, TCP packets can be marked and routed to ip_vs_in,
- *      but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
- *      sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
- *      and send them to ip_vs_in_icmp.
- */
-static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
-                   const struct net_device *in, const struct net_device *out,
-                   int (*okfn)(struct sk_buff *))
-{
-        int r;
-        if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
-                return NF_ACCEPT;
-        return ip_vs_in_icmp(skb, &r, hooknum);
-}
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
-        /* After packet filtering, forward packet through VS/DR, VS/TUN,
-         * or VS/NAT(change destination), so that filtering rules can be
-         * applied to IPVS. */
-        {
-                .hook           = ip_vs_in,
-                .owner          = THIS_MODULE,
-                .pf             = PF_INET,
-                .hooknum        = NF_INET_LOCAL_IN,
-                .priority       = 100,
-        },
-        /* After packet filtering, change source only for VS/NAT */
-        {
-                .hook           = ip_vs_out,
-                .owner          = THIS_MODULE,
-                .pf             = PF_INET,
-                .hooknum        = NF_INET_FORWARD,
-                .priority       = 100,
-        },
-        /* After packet filtering (but before ip_vs_out_icmp), catch icmp
-         * destined for 0.0.0.0/0, which is for incoming IPVS connections */
-        {
-                .hook           = ip_vs_forward_icmp,
-                .owner          = THIS_MODULE,
-                .pf             = PF_INET,
-                .hooknum        = NF_INET_FORWARD,
-                .priority       = 99,
-        },
-        /* Before the netfilter connection tracking, exit from POST_ROUTING */
-        {
-                .hook           = ip_vs_post_routing,
-                .owner          = THIS_MODULE,
-                .pf             = PF_INET,
-                .hooknum        = NF_INET_POST_ROUTING,
-                .priority       = NF_IP_PRI_NAT_SRC-1,
-        },
-};
-/*
- *      Initialize IP Virtual Server
- */
-static int __init ip_vs_init(void)
-{
-        int ret;
-        ret = ip_vs_control_init();
-        if (ret < 0) {
-                IP_VS_ERR("can't setup control.\n");
-                goto cleanup_nothing;
-        }
-        ip_vs_protocol_init();
-        ret = ip_vs_app_init();
-        if (ret < 0) {
-                IP_VS_ERR("can't setup application helper.\n");
-                goto cleanup_protocol;
-        }
-        ret = ip_vs_conn_init();
-        if (ret < 0) {
-                IP_VS_ERR("can't setup connection table.\n");
-                goto cleanup_app;
-        }
-        ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
-        if (ret < 0) {
-                IP_VS_ERR("can't register hooks.\n");
-                goto cleanup_conn;
-        }
-        IP_VS_INFO("ipvs loaded.\n");
-        return ret;
-  cleanup_conn:
-        ip_vs_conn_cleanup();
-  cleanup_app:
-        ip_vs_app_cleanup();
-  cleanup_protocol:
-        ip_vs_protocol_cleanup();
-        ip_vs_control_cleanup();
-  cleanup_nothing:
-        return ret;
-}
-static void __exit ip_vs_cleanup(void)
-{
-        nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
-        ip_vs_conn_cleanup();
-        ip_vs_app_cleanup();
-        ip_vs_protocol_cleanup();
-        ip_vs_control_cleanup();
-        IP_VS_INFO("ipvs unloaded.\n");
-}
-module_init(ip_vs_init);
-module_exit(ip_vs_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
deleted file mode 100644
index 6379705a8dcb..000000000000
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ /dev/null
@@ -1,2373 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the NetFilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/capability.h>
-#include <linux/fs.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/swap.h>
-#include <linux/seq_file.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/mutex.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <net/sock.h>
-#include <asm/uaccess.h>
-#include <net/ip_vs.h>
-/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
-static DEFINE_MUTEX(__ip_vs_mutex);
-/* lock for service table */
-static DEFINE_RWLOCK(__ip_vs_svc_lock);
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-/* lock for state and timeout tables */
-static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-/* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_DEBUG
-static int sysctl_ip_vs_debug_level = 0;
-int ip_vs_get_debug_level(void)
-{
-        return sysctl_ip_vs_debug_level;
-}
-#endif
-/*
- *      update_defense_level is called from keventd and from sysctl,
- *      so it needs to protect itself from softirqs
- */
-static void update_defense_level(void)
-{
-        struct sysinfo i;
-        static int old_secure_tcp = 0;
-        int availmem;
-        int nomem;
-        int to_change = -1;
-        /* we only count free and buffered memory (in pages) */
-        si_meminfo(&i);
-        availmem = i.freeram + i.bufferram;
-        /* however in linux 2.5 the i.bufferram is total page cache size,
-           we need adjust it */
-        /* si_swapinfo(&i); */
-        /* availmem = availmem - (i.totalswap - i.freeswap); */
-        nomem = (availmem < sysctl_ip_vs_amemthresh);
-        local_bh_disable();
-        /* drop_entry */
-        spin_lock(&__ip_vs_dropentry_lock);
-        switch (sysctl_ip_vs_drop_entry) {
-        case 0:
-                atomic_set(&ip_vs_dropentry, 0);
-                break;
-        case 1:
-                if (nomem) {
-                        atomic_set(&ip_vs_dropentry, 1);
-                        sysctl_ip_vs_drop_entry = 2;
-                } else {
-                        atomic_set(&ip_vs_dropentry, 0);
-                }
-                break;
-        case 2:
-                if (nomem) {
-                        atomic_set(&ip_vs_dropentry, 1);
-                } else {
-                        atomic_set(&ip_vs_dropentry, 0);
-                        sysctl_ip_vs_drop_entry = 1;
-                };
-                break;
-        case 3:
-                atomic_set(&ip_vs_dropentry, 1);
-                break;
-        }
-        spin_unlock(&__ip_vs_dropentry_lock);
-        /* drop_packet */
-        spin_lock(&__ip_vs_droppacket_lock);
-        switch (sysctl_ip_vs_drop_packet) {
-        case 0:
-                ip_vs_drop_rate = 0;
-                break;
-        case 1:
-                if (nomem) {
-                        ip_vs_drop_rate = ip_vs_drop_counter
-                                = sysctl_ip_vs_amemthresh /
-                                (sysctl_ip_vs_amemthresh-availmem);
-                        sysctl_ip_vs_drop_packet = 2;
-                } else {
-                        ip_vs_drop_rate = 0;
-                }
-                break;
-        case 2:
-                if (nomem) {
-                        ip_vs_drop_rate = ip_vs_drop_counter
-                                = sysctl_ip_vs_amemthresh /
-                                (sysctl_ip_vs_amemthresh-availmem);
-                } else {
-                        ip_vs_drop_rate = 0;
-                        sysctl_ip_vs_drop_packet = 1;
-                }
-                break;
-        case 3:
-                ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
-                break;
-        }
-        spin_unlock(&__ip_vs_droppacket_lock);
-        /* secure_tcp */
-        write_lock(&__ip_vs_securetcp_lock);
-        switch (sysctl_ip_vs_secure_tcp) {
-        case 0:
-                if (old_secure_tcp >= 2)
-                        to_change = 0;
-                break;
-        case 1:
-                if (nomem) {
-                        if (old_secure_tcp < 2)
-                                to_change = 1;
-                        sysctl_ip_vs_secure_tcp = 2;
-                } else {
-                        if (old_secure_tcp >= 2)
-                                to_change = 0;
-                }
-                break;
-        case 2:
-                if (nomem) {
-                        if (old_secure_tcp < 2)
-                                to_change = 1;
-                } else {
-                        if (old_secure_tcp >= 2)
-                                to_change = 0;
-                        sysctl_ip_vs_secure_tcp = 1;
-                }
-                break;
-        case 3:
-                if (old_secure_tcp < 2)
-                        to_change = 1;
-                break;
-        }
-        old_secure_tcp = sysctl_ip_vs_secure_tcp;
-        if (to_change >= 0)
-                ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
-        write_unlock(&__ip_vs_securetcp_lock);
-        local_bh_enable();
-}
-/*
- *      Timer for checking the defense
- */
-#define DEFENSE_TIMER_PERIOD    1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
-static void defense_work_handler(struct work_struct *work)
-{
-        update_defense_level();
-        if (atomic_read(&ip_vs_dropentry))
-                ip_vs_random_dropentry();
-        schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-}
-int
-ip_vs_use_count_inc(void)
-{
-        return try_module_get(THIS_MODULE);
-}
-void
-ip_vs_use_count_dec(void)
-{
-        module_put(THIS_MODULE);
-}
-/*
- *      Hash table: for virtual service lookups
- */
-#define IP_VS_SVC_TAB_BITS 8
-#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
-#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
-/* the service table hashed by <protocol, addr, port> */
-static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
-/* the service table hashed by fwmark */
-static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
-/*
- *      Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-/*
- *      Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-/*
- *      FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-/*
- *      Returns hash value for virtual service
- */
-static __inline__ unsigned
-ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
-{
-        register unsigned porth = ntohs(port);
-        return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
-                & IP_VS_SVC_TAB_MASK;
-}
-/*
- *      Returns hash value of fwmark for virtual service lookup
- */
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
-{
-        return fwmark & IP_VS_SVC_TAB_MASK;
-}
-/*
- *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
- *      or in the ip_vs_svc_fwm_table by fwmark.
- *      Should be called with locked tables.
- */
-static int ip_vs_svc_hash(struct ip_vs_service *svc)
-{
-        unsigned hash;
-        if (svc->flags & IP_VS_SVC_F_HASHED) {
-                IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
-                          "called from %p\n", __builtin_return_address(0));
-                return 0;
-        }
-        if (svc->fwmark == 0) {
-                /*
-                 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
-                 */
-                hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
-                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
-        } else {
-                /*
-                 *  Hash it by fwmark in ip_vs_svc_fwm_table
-                 */
-                hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
-                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
-        }
-        svc->flags |= IP_VS_SVC_F_HASHED;
-        /* increase its refcnt because it is referenced by the svc table */
-        atomic_inc(&svc->refcnt);
-        return 1;
-}
-/*
- *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
- *      Should be called with locked tables.
- */
-static int ip_vs_svc_unhash(struct ip_vs_service *svc)
-{
-        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
-                IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
-                          "called from %p\n", __builtin_return_address(0));
-                return 0;
-        }
-        if (svc->fwmark == 0) {
-                /* Remove it from the ip_vs_svc_table table */
-                list_del(&svc->s_list);
-        } else {
-                /* Remove it from the ip_vs_svc_fwm_table table */
-                list_del(&svc->f_list);
-        }
-        svc->flags &= ~IP_VS_SVC_F_HASHED;
-        atomic_dec(&svc->refcnt);
-        return 1;
-}
-/*
- *      Get service by {proto,addr,port} in the service table.
- */
-static __inline__ struct ip_vs_service *
-__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
-{
-        unsigned hash;
-        struct ip_vs_service *svc;
-        /* Check for "full" addressed entries */
-        hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
-        list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
-                if ((svc->addr == vaddr)
-                    && (svc->port == vport)
-                    && (svc->protocol == protocol)) {
-                        /* HIT */
-                        atomic_inc(&svc->usecnt);
-                        return svc;
-                }
-        }
-        return NULL;
-}
-/*
- *      Get service by {fwmark} in the service table.
- */
-static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
-{
-        unsigned hash;
-        struct ip_vs_service *svc;
-        /* Check for fwmark addressed entries */
-        hash = ip_vs_svc_fwm_hashkey(fwmark);
-        list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-                if (svc->fwmark == fwmark) {
-                        /* HIT */
-                        atomic_inc(&svc->usecnt);
-                        return svc;
-                }
-        }
-        return NULL;
-}
-struct ip_vs_service *
-ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
-{
-        struct ip_vs_service *svc;
-        read_lock(&__ip_vs_svc_lock);
-        /*
-         *      Check the table hashed by fwmark first
-         */
-        if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
-                goto out;
-        /*
-         *      Check the table hashed by <protocol,addr,port>
-         *      for "full" addressed entries
-         */
-        svc = __ip_vs_service_get(protocol, vaddr, vport);
-        if (svc == NULL
-            && protocol == IPPROTO_TCP
-            && atomic_read(&ip_vs_ftpsvc_counter)
-            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
-                /*
-                 * Check if ftp service entry exists, the packet
-                 * might belong to FTP data connections.
-                 */
-                svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
-        }
-        if (svc == NULL
-            && atomic_read(&ip_vs_nullsvc_counter)) {
-                /*
-                 * Check if the catch-all port (port zero) exists
-                 */
-                svc = __ip_vs_service_get(protocol, vaddr, 0);
-        }
-  out:
-        read_unlock(&__ip_vs_svc_lock);
-        IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
-                  fwmark, ip_vs_proto_name(protocol),
-                  NIPQUAD(vaddr), ntohs(vport),
-                  svc?"hit":"not hit");
-        return svc;
-}
-static inline void
-__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
-{
-        atomic_inc(&svc->refcnt);
-        dest->svc = svc;
-}
-static inline void
-__ip_vs_unbind_svc(struct ip_vs_dest *dest)
-{
-        struct ip_vs_service *svc = dest->svc;
-        dest->svc = NULL;
-        if (atomic_dec_and_test(&svc->refcnt))
-                kfree(svc);
-}
-/*
- *      Returns hash value for real service
- */
-static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
-{
-        register unsigned porth = ntohs(port);
-        return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
-                & IP_VS_RTAB_MASK;
-}
-/*
- *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
- *      should be called with locked tables.
- */
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
-{
-        unsigned hash;
-        if (!list_empty(&dest->d_list)) {
-                return 0;
-        }
-        /*
-         *      Hash by proto,addr,port,
-         *      which are the parameters of the real service.
-         */
-        hash = ip_vs_rs_hashkey(dest->addr, dest->port);
-        list_add(&dest->d_list, &ip_vs_rtable[hash]);
-        return 1;
-}
-/*
- *      UNhashes ip_vs_dest from ip_vs_rtable.
- *      should be called with locked tables.
- */
-static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
-{
-        /*
-         * Remove it from the ip_vs_rtable table.
-         */
-        if (!list_empty(&dest->d_list)) {
-                list_del(&dest->d_list);
-                INIT_LIST_HEAD(&dest->d_list);
-        }
-        return 1;
-}
-/*
- *      Lookup real service by <proto,addr,port> in the real service table.
- */
-struct ip_vs_dest *
-ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
-{
-        unsigned hash;
-        struct ip_vs_dest *dest;
-        /*
-         *      Check for "full" addressed entries
-         *      Return the first found entry
-         */
-        hash = ip_vs_rs_hashkey(daddr, dport);
-        read_lock(&__ip_vs_rs_lock);
-        list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
-                if ((dest->addr == daddr)
-                    && (dest->port == dport)
-                    && ((dest->protocol == protocol) ||
-                        dest->vfwmark)) {
-                        /* HIT */
-                        read_unlock(&__ip_vs_rs_lock);
-                        return dest;
-                }
-        }
-        read_unlock(&__ip_vs_rs_lock);
-        return NULL;
-}
-/*
- *      Lookup destination by {addr,port} in the given service
- */
-static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
-{
-        struct ip_vs_dest *dest;
-        /*
-         * Find the destination for the given service
-         */
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                if ((dest->addr == daddr) && (dest->port == dport)) {
-                        /* HIT */
-                        return dest;
-                }
-        }
-        return NULL;
-}
-/*
- * Find destination by {daddr,dport,vaddr,protocol}
- * Cretaed to be used in ip_vs_process_message() in
- * the backup synchronization daemon. It finds the
- * destination to be bound to the received connection
- * on the backup.
- *
- * ip_vs_lookup_real_service() looked promissing, but
- * seems not working as expected.
- */
-struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
-                                    __be32 vaddr, __be16 vport, __u16 protocol)
-{
-        struct ip_vs_dest *dest;
-        struct ip_vs_service *svc;
-        svc = ip_vs_service_get(0, protocol, vaddr, vport);
-        if (!svc)
-                return NULL;
-        dest = ip_vs_lookup_dest(svc, daddr, dport);
-        if (dest)
-                atomic_inc(&dest->refcnt);
-        ip_vs_service_put(svc);
-        return dest;
-}
-/*
- *  Lookup dest by {svc,addr,port} in the destination trash.
- *  The destination trash is used to hold the destinations that are removed
- *  from the service table but are still referenced by some conn entries.
- *  The reason to add the destination trash is when the dest is temporary
- *  down (either by administrator or by monitor program), the dest can be
- *  picked back from the trash, the remaining connections to the dest can
- *  continue, and the counting information of the dest is also useful for
- *  scheduling.
- */
-static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
-{
-        struct ip_vs_dest *dest, *nxt;
-        /*
-         * Find the destination in trash
-         */
-        list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
-                IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
-                          "dest->refcnt=%d\n",
-                          dest->vfwmark,
-                          NIPQUAD(dest->addr), ntohs(dest->port),
-                          atomic_read(&dest->refcnt));
-                if (dest->addr == daddr &&
-                    dest->port == dport &&
-                    dest->vfwmark == svc->fwmark &&
-                    dest->protocol == svc->protocol &&
-                    (svc->fwmark ||
-                     (dest->vaddr == svc->addr &&
-                      dest->vport == svc->port))) {
-                        /* HIT */
-                        return dest;
-                }
-                /*
-                 * Try to purge the destination from trash if not referenced
-                 */
-                if (atomic_read(&dest->refcnt) == 1) {
-                        IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
-                                  "from trash\n",
-                                  dest->vfwmark,
-                                  NIPQUAD(dest->addr), ntohs(dest->port));
-                        list_del(&dest->n_list);
-                        ip_vs_dst_reset(dest);
-                        __ip_vs_unbind_svc(dest);
-                        kfree(dest);
-                }
-        }
-        return NULL;
-}
-/*
- *  Clean up all the destinations in the trash
- *  Called by the ip_vs_control_cleanup()
- *
- *  When the ip_vs_control_clearup is activated by ipvs module exit,
- *  the service tables must have been flushed and all the connections
- *  are expired, and the refcnt of each destination in the trash must
- *  be 1, so we simply release them here.
- */
-static void ip_vs_trash_cleanup(void)
-{
-        struct ip_vs_dest *dest, *nxt;
-        list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
-                list_del(&dest->n_list);
-                ip_vs_dst_reset(dest);
-                __ip_vs_unbind_svc(dest);
-                kfree(dest);
-        }
-}
-static void
-ip_vs_zero_stats(struct ip_vs_stats *stats)
-{
-        spin_lock_bh(&stats->lock);
-        stats->conns = 0;
-        stats->inpkts = 0;
-        stats->outpkts = 0;
-        stats->inbytes = 0;
-        stats->outbytes = 0;
-        stats->cps = 0;
-        stats->inpps = 0;
-        stats->outpps = 0;
-        stats->inbps = 0;
-        stats->outbps = 0;
-        ip_vs_zero_estimator(stats);
-        spin_unlock_bh(&stats->lock);
-}
-/*
- *      Update a destination in the given service
- */
-static void
-__ip_vs_update_dest(struct ip_vs_service *svc,
-                    struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
-{
-        int conn_flags;
-        /* set the weight and the flags */
-        atomic_set(&dest->weight, udest->weight);
-        conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
-        /* check if local node and update the flags */
-        if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
-                conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
-                        | IP_VS_CONN_F_LOCALNODE;
-        }
-        /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
-        if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
-                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
-        } else {
-                /*
-                 *    Put the real service in ip_vs_rtable if not present.
-                 *    For now only for NAT!
-                 */
-                write_lock_bh(&__ip_vs_rs_lock);
-                ip_vs_rs_hash(dest);
-                write_unlock_bh(&__ip_vs_rs_lock);
-        }
-        atomic_set(&dest->conn_flags, conn_flags);
-        /* bind the service */
-        if (!dest->svc) {
-                __ip_vs_bind_svc(dest, svc);
-        } else {
-                if (dest->svc != svc) {
-                        __ip_vs_unbind_svc(dest);
-                        ip_vs_zero_stats(&dest->stats);
-                        __ip_vs_bind_svc(dest, svc);
-                }
-        }
-        /* set the dest status flags */
-        dest->flags |= IP_VS_DEST_F_AVAILABLE;
-        if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
-                dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
-        dest->u_threshold = udest->u_threshold;
-        dest->l_threshold = udest->l_threshold;
-}
-/*
- *      Create a destination for the given service
- */
-static int
-ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
-               struct ip_vs_dest **dest_p)
-{
-        struct ip_vs_dest *dest;
-        unsigned atype;
-        EnterFunction(2);
-        atype = inet_addr_type(&init_net, udest->addr);
-        if (atype != RTN_LOCAL && atype != RTN_UNICAST)
-                return -EINVAL;
-        dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
-        if (dest == NULL) {
-                IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
-                return -ENOMEM;
-        }
-        dest->protocol = svc->protocol;
-        dest->vaddr = svc->addr;
-        dest->vport = svc->port;
-        dest->vfwmark = svc->fwmark;
-        dest->addr = udest->addr;
-        dest->port = udest->port;
-        atomic_set(&dest->activeconns, 0);
-        atomic_set(&dest->inactconns, 0);
-        atomic_set(&dest->persistconns, 0);
-        atomic_set(&dest->refcnt, 0);
-        INIT_LIST_HEAD(&dest->d_list);
-        spin_lock_init(&dest->dst_lock);
-        spin_lock_init(&dest->stats.lock);
-        __ip_vs_update_dest(svc, dest, udest);
-        ip_vs_new_estimator(&dest->stats);
-        *dest_p = dest;
-        LeaveFunction(2);
-        return 0;
-}
-/*
- *      Add a destination into an existing service
- */
-static int
-ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
-{
-        struct ip_vs_dest *dest;
-        __be32 daddr = udest->addr;
-        __be16 dport = udest->port;
-        int ret;
-        EnterFunction(2);
-        if (udest->weight < 0) {
-                IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
-                return -ERANGE;
-        }
-        if (udest->l_threshold > udest->u_threshold) {
-                IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
-                          "upper threshold\n");
-                return -ERANGE;
-        }
-        /*
-         * Check if the dest already exists in the list
-         */
-        dest = ip_vs_lookup_dest(svc, daddr, dport);
-        if (dest != NULL) {
-                IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
-                return -EEXIST;
-        }
-        /*
-         * Check if the dest already exists in the trash and
-         * is from the same service
-         */
-        dest = ip_vs_trash_get_dest(svc, daddr, dport);
-        if (dest != NULL) {
-                IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
-                          "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
-                          NIPQUAD(daddr), ntohs(dport),
-                          atomic_read(&dest->refcnt),
-                          dest->vfwmark,
-                          NIPQUAD(dest->vaddr),
-                          ntohs(dest->vport));
-                __ip_vs_update_dest(svc, dest, udest);
-                /*
-                 * Get the destination from the trash
-                 */
-                list_del(&dest->n_list);
-                ip_vs_new_estimator(&dest->stats);
-                write_lock_bh(&__ip_vs_svc_lock);
-                /*
-                 * Wait until all other svc users go away.
-                 */
-                IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-                list_add(&dest->n_list, &svc->destinations);
-                svc->num_dests++;
-                /* call the update_service function of its scheduler */
-                svc->scheduler->update_service(svc);
-                write_unlock_bh(&__ip_vs_svc_lock);
-                return 0;
-        }
-        /*
-         * Allocate and initialize the dest structure
-         */
-        ret = ip_vs_new_dest(svc, udest, &dest);
-        if (ret) {
-                return ret;
-        }
-        /*
-         * Add the dest entry into the list
-         */
-        atomic_inc(&dest->refcnt);
-        write_lock_bh(&__ip_vs_svc_lock);
-        /*
-         * Wait until all other svc users go away.
-         */
-        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-        list_add(&dest->n_list, &svc->destinations);
-        svc->num_dests++;
-        /* call the update_service function of its scheduler */
-        svc->scheduler->update_service(svc);
-        write_unlock_bh(&__ip_vs_svc_lock);
-        LeaveFunction(2);
-        return 0;
-}
-/*
- *      Edit a destination in the given service
- */
-static int
-ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
-{
-        struct ip_vs_dest *dest;
-        __be32 daddr = udest->addr;
-        __be16 dport = udest->port;
-        EnterFunction(2);
-        if (udest->weight < 0) {
-                IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
-                return -ERANGE;
-        }
-        if (udest->l_threshold > udest->u_threshold) {
-                IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
-                          "upper threshold\n");
-                return -ERANGE;
-        }
-        /*
-         *  Lookup the destination list
-         */
-        dest = ip_vs_lookup_dest(svc, daddr, dport);
-        if (dest == NULL) {
-                IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
-                return -ENOENT;
-        }
-        __ip_vs_update_dest(svc, dest, udest);
-        write_lock_bh(&__ip_vs_svc_lock);
-        /* Wait until all other svc users go away */
-        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-        /* call the update_service, because server weight may be changed */
-        svc->scheduler->update_service(svc);
-        write_unlock_bh(&__ip_vs_svc_lock);
-        LeaveFunction(2);
-        return 0;
-}
-/*
- *      Delete a destination (must be already unlinked from the service)
- */
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
-{
-        ip_vs_kill_estimator(&dest->stats);
-        /*
-         *  Remove it from the d-linked list with the real services.
-         */
-        write_lock_bh(&__ip_vs_rs_lock);
-        ip_vs_rs_unhash(dest);
-        write_unlock_bh(&__ip_vs_rs_lock);
-        /*
-         *  Decrease the refcnt of the dest, and free the dest
-         *  if nobody refers to it (refcnt=0). Otherwise, throw
-         *  the destination into the trash.
-         */
-        if (atomic_dec_and_test(&dest->refcnt)) {
-                ip_vs_dst_reset(dest);
-                /* simply decrease svc->refcnt here, let the caller check
-                   and release the service if nobody refers to it.
-                   Only user context can release destination and service,
-                   and only one user context can update virtual service at a
-                   time, so the operation here is OK */
-                atomic_dec(&dest->svc->refcnt);
-                kfree(dest);
-        } else {
-                IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
-                          "dest->refcnt=%d\n",
-                          NIPQUAD(dest->addr), ntohs(dest->port),
-                          atomic_read(&dest->refcnt));
-                list_add(&dest->n_list, &ip_vs_dest_trash);
-                atomic_inc(&dest->refcnt);
-        }
-}
-/*
- *      Unlink a destination from the given service
- */
-static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
-                                struct ip_vs_dest *dest,
-                                int svcupd)
-{
-        dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
-        /*
-         *  Remove it from the d-linked destination list.
-         */
-        list_del(&dest->n_list);
-        svc->num_dests--;
-        if (svcupd) {
-                /*
-                 *  Call the update_service function of its scheduler
-                 */
-                svc->scheduler->update_service(svc);
-        }
-}
-/*
- *      Delete a destination server in the given service
- */
-static int
-ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
-{
-        struct ip_vs_dest *dest;
-        __be32 daddr = udest->addr;
-        __be16 dport = udest->port;
-        EnterFunction(2);
-        dest = ip_vs_lookup_dest(svc, daddr, dport);
-        if (dest == NULL) {
-                IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
-                return -ENOENT;
-        }
-        write_lock_bh(&__ip_vs_svc_lock);
-        /*
-         *      Wait until all other svc users go away.
-         */
-        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-        /*
-         *      Unlink dest from the service
-         */
-        __ip_vs_unlink_dest(svc, dest, 1);
-        write_unlock_bh(&__ip_vs_svc_lock);
-        /*
-         *      Delete the destination
-         */
-        __ip_vs_del_dest(dest);
-        LeaveFunction(2);
-        return 0;
-}
-/*
- *      Add a service into the service hash table
- */
-static int
-ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
-{
-        int ret = 0;
-        struct ip_vs_scheduler *sched = NULL;
-        struct ip_vs_service *svc = NULL;
-        /* increase the module use count */
-        ip_vs_use_count_inc();
-        /* Lookup the scheduler by 'u->sched_name' */
-        sched = ip_vs_scheduler_get(u->sched_name);
-        if (sched == NULL) {
-                IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
-                           u->sched_name);
-                ret = -ENOENT;
-                goto out_mod_dec;
-        }
-        svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
-        if (svc == NULL) {
-                IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
-                ret = -ENOMEM;
-                goto out_err;
-        }
-        /* I'm the first user of the service */
-        atomic_set(&svc->usecnt, 1);
-        atomic_set(&svc->refcnt, 0);
-        svc->protocol = u->protocol;
-        svc->addr = u->addr;
-        svc->port = u->port;
-        svc->fwmark = u->fwmark;
-        svc->flags = u->flags;
-        svc->timeout = u->timeout * HZ;
-        svc->netmask = u->netmask;
-        INIT_LIST_HEAD(&svc->destinations);
-        rwlock_init(&svc->sched_lock);
-        spin_lock_init(&svc->stats.lock);
-        /* Bind the scheduler */
-        ret = ip_vs_bind_scheduler(svc, sched);
-        if (ret)
-                goto out_err;
-        sched = NULL;
-        /* Update the virtual service counters */
-        if (svc->port == FTPPORT)
-                atomic_inc(&ip_vs_ftpsvc_counter);
-        else if (svc->port == 0)
-                atomic_inc(&ip_vs_nullsvc_counter);
-        ip_vs_new_estimator(&svc->stats);
-        ip_vs_num_services++;
-        /* Hash the service into the service table */
-        write_lock_bh(&__ip_vs_svc_lock);
-        ip_vs_svc_hash(svc);
-        write_unlock_bh(&__ip_vs_svc_lock);
-        *svc_p = svc;
-        return 0;
-  out_err:
-        if (svc != NULL) {
-                if (svc->scheduler)
-                        ip_vs_unbind_scheduler(svc);
-                if (svc->inc) {
-                        local_bh_disable();
-                        ip_vs_app_inc_put(svc->inc);
-                        local_bh_enable();
-                }
-                kfree(svc);
-        }
-        ip_vs_scheduler_put(sched);
-  out_mod_dec:
-        /* decrease the module use count */
-        ip_vs_use_count_dec();
-        return ret;
-}
-/*
- *      Edit a service and bind it with a new scheduler
- */
-static int
-ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
-{
-        struct ip_vs_scheduler *sched, *old_sched;
-        int ret = 0;
-        /*
-         * Lookup the scheduler, by 'u->sched_name'
-         */
-        sched = ip_vs_scheduler_get(u->sched_name);
-        if (sched == NULL) {
-                IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
-                           u->sched_name);
-                return -ENOENT;
-        }
-        old_sched = sched;
-        write_lock_bh(&__ip_vs_svc_lock);
-        /*
-         * Wait until all other svc users go away.
-         */
-        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-        /*
-         * Set the flags and timeout value
-         */
-        svc->flags = u->flags | IP_VS_SVC_F_HASHED;
-        svc->timeout = u->timeout * HZ;
-        svc->netmask = u->netmask;
-        old_sched = svc->scheduler;
-        if (sched != old_sched) {
-                /*
-                 * Unbind the old scheduler
-                 */
-                if ((ret = ip_vs_unbind_scheduler(svc))) {
-                        old_sched = sched;
-                        goto out;
-                }
-                /*
-                 * Bind the new scheduler
-                 */
-                if ((ret = ip_vs_bind_scheduler(svc, sched))) {
-                        /*
-                         * If ip_vs_bind_scheduler fails, restore the old
-                         * scheduler.
-                         * The main reason of failure is out of memory.
-                         *
-                         * The question is if the old scheduler can be
-                         * restored all the time. TODO: if it cannot be
-                         * restored some time, we must delete the service,
-                         * otherwise the system may crash.
-                         */
-                        ip_vs_bind_scheduler(svc, old_sched);
-                        old_sched = sched;
-                        goto out;
-                }
-        }
-  out:
-        write_unlock_bh(&__ip_vs_svc_lock);
-        if (old_sched)
-                ip_vs_scheduler_put(old_sched);
-        return ret;
-}
-/*
- *      Delete a service from the service list
- *      - The service must be unlinked, unlocked and not referenced!
- *      - We are called under _bh lock
- */
-static void __ip_vs_del_service(struct ip_vs_service *svc)
-{
-        struct ip_vs_dest *dest, *nxt;
-        struct ip_vs_scheduler *old_sched;
-        ip_vs_num_services--;
-        ip_vs_kill_estimator(&svc->stats);
-        /* Unbind scheduler */
-        old_sched = svc->scheduler;
-        ip_vs_unbind_scheduler(svc);
-        if (old_sched)
-                ip_vs_scheduler_put(old_sched);
-        /* Unbind app inc */
-        if (svc->inc) {
-                ip_vs_app_inc_put(svc->inc);
-                svc->inc = NULL;
-        }
-        /*
-         *    Unlink the whole destination list
-         */
-        list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
-                __ip_vs_unlink_dest(svc, dest, 0);
-                __ip_vs_del_dest(dest);
-        }
-        /*
-         *    Update the virtual service counters
-         */
-        if (svc->port == FTPPORT)
-                atomic_dec(&ip_vs_ftpsvc_counter);
-        else if (svc->port == 0)
-                atomic_dec(&ip_vs_nullsvc_counter);
-        /*
-         *    Free the service if nobody refers to it
-         */
-        if (atomic_read(&svc->refcnt) == 0)
-                kfree(svc);
-        /* decrease the module use count */
-        ip_vs_use_count_dec();
-}
-/*
- *      Delete a service from the service list
- */
-static int ip_vs_del_service(struct ip_vs_service *svc)
-{
-        if (svc == NULL)
-                return -EEXIST;
-        /*
-         * Unhash it from the service table
-         */
-        write_lock_bh(&__ip_vs_svc_lock);
-        ip_vs_svc_unhash(svc);
-        /*
-         * Wait until all the svc users go away.
-         */
-        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
-        __ip_vs_del_service(svc);
-        write_unlock_bh(&__ip_vs_svc_lock);
-        return 0;
-}
-/*
- *      Flush all the virtual services
- */
-static int ip_vs_flush(void)
-{
-        int idx;
-        struct ip_vs_service *svc, *nxt;
-        /*
-         * Flush the service table hashed by <protocol,addr,port>
-         */
-        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-                list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
-                        write_lock_bh(&__ip_vs_svc_lock);
-                        ip_vs_svc_unhash(svc);
-                        /*
-                         * Wait until all the svc users go away.
-                         */
-                        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-                        __ip_vs_del_service(svc);
-                        write_unlock_bh(&__ip_vs_svc_lock);
-                }
-        }
-        /*
-         * Flush the service table hashed by fwmark
-         */
-        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-                list_for_each_entry_safe(svc, nxt,
-                                         &ip_vs_svc_fwm_table[idx], f_list) {
-                        write_lock_bh(&__ip_vs_svc_lock);
-                        ip_vs_svc_unhash(svc);
-                        /*
-                         * Wait until all the svc users go away.
-                         */
-                        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-                        __ip_vs_del_service(svc);
-                        write_unlock_bh(&__ip_vs_svc_lock);
-                }
-        }
-        return 0;
-}
-/*
- *      Zero counters in a service or all services
- */
-static int ip_vs_zero_service(struct ip_vs_service *svc)
-{
-        struct ip_vs_dest *dest;
-        write_lock_bh(&__ip_vs_svc_lock);
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                ip_vs_zero_stats(&dest->stats);
-        }
-        ip_vs_zero_stats(&svc->stats);
-        write_unlock_bh(&__ip_vs_svc_lock);
-        return 0;
-}
-static int ip_vs_zero_all(void)
-{
-        int idx;
-        struct ip_vs_service *svc;
-        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                        ip_vs_zero_service(svc);
-                }
-        }
-        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                        ip_vs_zero_service(svc);
-                }
-        }
-        ip_vs_zero_stats(&ip_vs_stats);
-        return 0;
-}
-static int
-proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
-                     void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-        int *valp = table->data;
-        int val = *valp;
-        int rc;
-        rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
-        if (write && (*valp != val)) {
-                if ((*valp < 0) || (*valp > 3)) {
-                        /* Restore the correct value */
-                        *valp = val;
-                } else {
-                        update_defense_level();
-                }
-        }
-        return rc;
-}
-static int
-proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
-                       void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-        int *valp = table->data;
-        int val[2];
-        int rc;
-        /* backup the value first */
-        memcpy(val, valp, sizeof(val));
-        rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
-        if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
-                /* Restore the correct value */
-                memcpy(valp, val, sizeof(val));
-        }
-        return rc;
-}
-/*
- *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
- */
-static struct ctl_table vs_vars[] = {
-        {
-                .procname       = "amemthresh",
-                .data           = &sysctl_ip_vs_amemthresh,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
-        },
-#ifdef CONFIG_IP_VS_DEBUG
-        {
-                .procname       = "debug_level",
-                .data           = &sysctl_ip_vs_debug_level,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
-        },
-#endif
-        {
-                .procname       = "am_droprate",
-                .data           = &sysctl_ip_vs_am_droprate,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
-        },
-        {
-                .procname       = "drop_entry",
-                .data           = &sysctl_ip_vs_drop_entry,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_do_defense_mode,
-        },
-        {
-                .procname       = "drop_packet",
-                .data           = &sysctl_ip_vs_drop_packet,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_do_defense_mode,
-        },
-        {
-                .procname       = "secure_tcp",
-                .data           = &sysctl_ip_vs_secure_tcp,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_do_defense_mode,
-        },
-#if 0
-        {
-                .procname       = "timeout_established",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_synsent",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_synrecv",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_finwait",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_timewait",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_close",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_closewait",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_lastack",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_listen",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_synack",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_udp",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        {
-                .procname       = "timeout_icmp",
-                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-#endif
-        {
-                .procname       = "cache_bypass",
-                .data           = &sysctl_ip_vs_cache_bypass,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
-        },
-        {
-                .procname       = "expire_nodest_conn",
-                .data           = &sysctl_ip_vs_expire_nodest_conn,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
-        },
-        {
-                .procname       = "expire_quiescent_template",
-                .data           = &sysctl_ip_vs_expire_quiescent_template,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
-        },
-        {
-                .procname       = "sync_threshold",
-                .data           = &sysctl_ip_vs_sync_threshold,
-                .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
-                .mode           = 0644,
-                .proc_handler   = &proc_do_sync_threshold,
-        },
-        {
-                .procname       = "nat_icmp_send",
-                .data           = &sysctl_ip_vs_nat_icmp_send,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
-        },
-        { .ctl_name = 0 }
-};
-const struct ctl_path net_vs_ctl_path[] = {
-        { .procname = "net", .ctl_name = CTL_NET, },
-        { .procname = "ipv4", .ctl_name = NET_IPV4, },
-        { .procname = "vs", },
-        { }
-};
-EXPORT_SYMBOL_GPL(net_vs_ctl_path);
-static struct ctl_table_header * sysctl_header;
-#ifdef CONFIG_PROC_FS
-struct ip_vs_iter {
-        struct list_head *table;
-        int bucket;
-};
-/*
- *      Write the contents of the VS rule table to a PROCfs file.
- *      (It is kept just for backward compatibility)
- */
-static inline const char *ip_vs_fwd_name(unsigned flags)
-{
-        switch (flags & IP_VS_CONN_F_FWD_MASK) {
-        case IP_VS_CONN_F_LOCALNODE:
-                return "Local";
-        case IP_VS_CONN_F_TUNNEL:
-                return "Tunnel";
-        case IP_VS_CONN_F_DROUTE:
-                return "Route";
-        default:
-                return "Masq";
-        }
-}
-/* Get the Nth entry in the two lists */
-static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
-{
-        struct ip_vs_iter *iter = seq->private;
-        int idx;
-        struct ip_vs_service *svc;
-        /* look in hash by protocol */
-        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                        if (pos-- == 0){
-                                iter->table = ip_vs_svc_table;
-                                iter->bucket = idx;
-                                return svc;
-                        }
-                }
-        }
-        /* keep looking in fwmark */
-        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                        if (pos-- == 0) {
-                                iter->table = ip_vs_svc_fwm_table;
-                                iter->bucket = idx;
-                                return svc;
-                        }
-                }
-        }
-        return NULL;
-}
-static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
-{
-        read_lock_bh(&__ip_vs_svc_lock);
-        return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
-}
-static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-        struct list_head *e;
-        struct ip_vs_iter *iter;
-        struct ip_vs_service *svc;
-        ++*pos;
-        if (v == SEQ_START_TOKEN)
-                return ip_vs_info_array(seq,0);
-        svc = v;
-        iter = seq->private;
-        if (iter->table == ip_vs_svc_table) {
-                /* next service in table hashed by protocol */
-                if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
-                        return list_entry(e, struct ip_vs_service, s_list);
-                while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
-                        list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
-                                            s_list) {
-                                return svc;
-                        }
-                }
-                iter->table = ip_vs_svc_fwm_table;
-                iter->bucket = -1;
-                goto scan_fwmark;
-        }
-        /* next service in hashed by fwmark */
-        if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
-                return list_entry(e, struct ip_vs_service, f_list);
- scan_fwmark:
-        while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
-                list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
-                                    f_list)
-                        return svc;
-        }
-        return NULL;
-}
-static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
-{
-        read_unlock_bh(&__ip_vs_svc_lock);
-}
-static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
-{
-        if (v == SEQ_START_TOKEN) {
-                seq_printf(seq,
-                        "IP Virtual Server version %d.%d.%d (size=%d)\n",
-                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
-                seq_puts(seq,
-                         "Prot LocalAddress:Port Scheduler Flags\n");
-                seq_puts(seq,
-                         "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
-        } else {
-                const struct ip_vs_service *svc = v;
-                const struct ip_vs_iter *iter = seq->private;
-                const struct ip_vs_dest *dest;
-                if (iter->table == ip_vs_svc_table)
-                        seq_printf(seq, "%s  %08X:%04X %s ",
-                                   ip_vs_proto_name(svc->protocol),
-                                   ntohl(svc->addr),
-                                   ntohs(svc->port),
-                                   svc->scheduler->name);
-                else
-                        seq_printf(seq, "FWM  %08X %s ",
-                                   svc->fwmark, svc->scheduler->name);
-                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
-                        seq_printf(seq, "persistent %d %08X\n",
-                                svc->timeout,
-                                ntohl(svc->netmask));
-                else
-                        seq_putc(seq, '\n');
-                list_for_each_entry(dest, &svc->destinations, n_list) {
-                        seq_printf(seq,
-                                   "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
-                                   ntohl(dest->addr), ntohs(dest->port),
-                                   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
-                                   atomic_read(&dest->weight),
-                                   atomic_read(&dest->activeconns),
-                                   atomic_read(&dest->inactconns));
-                }
-        }
-        return 0;
-}
-static const struct seq_operations ip_vs_info_seq_ops = {
-        .start = ip_vs_info_seq_start,
-        .next  = ip_vs_info_seq_next,
-        .stop  = ip_vs_info_seq_stop,
-        .show  = ip_vs_info_seq_show,
-};
-static int ip_vs_info_open(struct inode *inode, struct file *file)
-{
-        return seq_open_private(file, &ip_vs_info_seq_ops,
-                        sizeof(struct ip_vs_iter));
-}
-static const struct file_operations ip_vs_info_fops = {
-        .owner   = THIS_MODULE,
-        .open    = ip_vs_info_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release_private,
-};
-#endif
-struct ip_vs_stats ip_vs_stats = {
-        .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-#ifdef CONFIG_PROC_FS
-static int ip_vs_stats_show(struct seq_file *seq, void *v)
-{
-/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
-        seq_puts(seq,
-                 "   Total Incoming Outgoing         Incoming         Outgoing\n");
-        seq_printf(seq,
-                   "   Conns  Packets  Packets            Bytes            Bytes\n");
-        spin_lock_bh(&ip_vs_stats.lock);
-        seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
-                   ip_vs_stats.inpkts, ip_vs_stats.outpkts,
-                   (unsigned long long) ip_vs_stats.inbytes,
-                   (unsigned long long) ip_vs_stats.outbytes);
-/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
-        seq_puts(seq,
-                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
-        seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-                        ip_vs_stats.cps,
-                        ip_vs_stats.inpps,
-                        ip_vs_stats.outpps,
-                        ip_vs_stats.inbps,
-                        ip_vs_stats.outbps);
-        spin_unlock_bh(&ip_vs_stats.lock);
-        return 0;
-}
-static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, ip_vs_stats_show, NULL);
-}
-static const struct file_operations ip_vs_stats_fops = {
-        .owner = THIS_MODULE,
-        .open = ip_vs_stats_seq_open,
-        .read = seq_read,
-        .llseek = seq_lseek,
-        .release = single_release,
-};
-#endif
-/*
- *      Set timeout values for tcp tcpfin udp in the timeout_table.
- */
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
-{
-        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
-                  u->tcp_timeout,
-                  u->tcp_fin_timeout,
-                  u->udp_timeout);
-#ifdef CONFIG_IP_VS_PROTO_TCP
-        if (u->tcp_timeout) {
-                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
-                        = u->tcp_timeout * HZ;
-        }
-        if (u->tcp_fin_timeout) {
-                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
-                        = u->tcp_fin_timeout * HZ;
-        }
-#endif
-#ifdef CONFIG_IP_VS_PROTO_UDP
-        if (u->udp_timeout) {
-                ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
-                        = u->udp_timeout * HZ;
-        }
-#endif
-        return 0;
-}
-#define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
-#define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
-#define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
-                                 sizeof(struct ip_vs_dest_user))
-#define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
-#define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
-#define MAX_ARG_LEN             SVCDEST_ARG_LEN
-static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
-        [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
-        [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
-        [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
-        [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
-        [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
-        [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
-        [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
-        [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
-        [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
-        [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
-        [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
-};
-static int
-do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
-{
-        int ret;
-        unsigned char arg[MAX_ARG_LEN];
-        struct ip_vs_service_user *usvc;
-        struct ip_vs_service *svc;
-        struct ip_vs_dest_user *udest;
-        if (!capable(CAP_NET_ADMIN))
-                return -EPERM;
-        if (len != set_arglen[SET_CMDID(cmd)]) {
-                IP_VS_ERR("set_ctl: len %u != %u\n",
-                          len, set_arglen[SET_CMDID(cmd)]);
-                return -EINVAL;
-        }
-        if (copy_from_user(arg, user, len) != 0)
-                return -EFAULT;
-        /* increase the module use count */
-        ip_vs_use_count_inc();
-        if (mutex_lock_interruptible(&__ip_vs_mutex)) {
-                ret = -ERESTARTSYS;
-                goto out_dec;
-        }
-        if (cmd == IP_VS_SO_SET_FLUSH) {
-                /* Flush the virtual service */
-                ret = ip_vs_flush();
-                goto out_unlock;
-        } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
-                /* Set timeout values for (tcp tcpfin udp) */
-                ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
-                goto out_unlock;
-        } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
-                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-                ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
-                goto out_unlock;
-        } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
-                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-                ret = stop_sync_thread(dm->state);
-                goto out_unlock;
-        }
-        usvc = (struct ip_vs_service_user *)arg;
-        udest = (struct ip_vs_dest_user *)(usvc + 1);
-        if (cmd == IP_VS_SO_SET_ZERO) {
-                /* if no service address is set, zero counters in all */
-                if (!usvc->fwmark && !usvc->addr && !usvc->port) {
-                        ret = ip_vs_zero_all();
-                        goto out_unlock;
-                }
-        }
-        /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
-        if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
-                IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
-                          usvc->protocol, NIPQUAD(usvc->addr),
-                          ntohs(usvc->port), usvc->sched_name);
-                ret = -EFAULT;
-                goto out_unlock;
-        }
-        /* Lookup the exact service by <protocol, addr, port> or fwmark */
-        if (usvc->fwmark == 0)
-                svc = __ip_vs_service_get(usvc->protocol,
-                                          usvc->addr, usvc->port);
-        else
-                svc = __ip_vs_svc_fwm_get(usvc->fwmark);
-        if (cmd != IP_VS_SO_SET_ADD
-            && (svc == NULL || svc->protocol != usvc->protocol)) {
-                ret = -ESRCH;
-                goto out_unlock;
-        }
-        switch (cmd) {
-        case IP_VS_SO_SET_ADD:
-                if (svc != NULL)
-                        ret = -EEXIST;
-                else
-                        ret = ip_vs_add_service(usvc, &svc);
-                break;
-        case IP_VS_SO_SET_EDIT:
-                ret = ip_vs_edit_service(svc, usvc);
-                break;
-        case IP_VS_SO_SET_DEL:
-                ret = ip_vs_del_service(svc);
-                if (!ret)
-                        goto out_unlock;
-                break;
-        case IP_VS_SO_SET_ZERO:
-                ret = ip_vs_zero_service(svc);
-                break;
-        case IP_VS_SO_SET_ADDDEST:
-                ret = ip_vs_add_dest(svc, udest);
-                break;
-        case IP_VS_SO_SET_EDITDEST:
-                ret = ip_vs_edit_dest(svc, udest);
-                break;
-        case IP_VS_SO_SET_DELDEST:
-                ret = ip_vs_del_dest(svc, udest);
-                break;
-        default:
-                ret = -EINVAL;
-        }
-        if (svc)
-                ip_vs_service_put(svc);
-  out_unlock:
-        mutex_unlock(&__ip_vs_mutex);
-  out_dec:
-        /* decrease the module use count */
-        ip_vs_use_count_dec();
-        return ret;
-}
-static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
-{
-        spin_lock_bh(&src->lock);
-        memcpy(dst, src, (char*)&src->lock - (char*)src);
-        spin_unlock_bh(&src->lock);
-}
-static void
-ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
-{
-        dst->protocol = src->protocol;
-        dst->addr = src->addr;
-        dst->port = src->port;
-        dst->fwmark = src->fwmark;
-        strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
-        dst->flags = src->flags;
-        dst->timeout = src->timeout / HZ;
-        dst->netmask = src->netmask;
-        dst->num_dests = src->num_dests;
-        ip_vs_copy_stats(&dst->stats, &src->stats);
-}
-static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
-                            struct ip_vs_get_services __user *uptr)
-{
-        int idx, count=0;
-        struct ip_vs_service *svc;
-        struct ip_vs_service_entry entry;
-        int ret = 0;
-        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                        if (count >= get->num_services)
-                                goto out;
-                        memset(&entry, 0, sizeof(entry));
-                        ip_vs_copy_service(&entry, svc);
-                        if (copy_to_user(&uptr->entrytable[count],
-                                         &entry, sizeof(entry))) {
-                                ret = -EFAULT;
-                                goto out;
-                        }
-                        count++;
-                }
-        }
-        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                        if (count >= get->num_services)
-                                goto out;
-                        memset(&entry, 0, sizeof(entry));
-                        ip_vs_copy_service(&entry, svc);
-                        if (copy_to_user(&uptr->entrytable[count],
-                                         &entry, sizeof(entry))) {
-                                ret = -EFAULT;
-                                goto out;
-                        }
-                        count++;
-                }
-        }
-  out:
-        return ret;
-}
-static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
-                         struct ip_vs_get_dests __user *uptr)
-{
-        struct ip_vs_service *svc;
-        int ret = 0;
-        if (get->fwmark)
-                svc = __ip_vs_svc_fwm_get(get->fwmark);
-        else
-                svc = __ip_vs_service_get(get->protocol,
-                                          get->addr, get->port);
-        if (svc) {
-                int count = 0;
-                struct ip_vs_dest *dest;
-                struct ip_vs_dest_entry entry;
-                list_for_each_entry(dest, &svc->destinations, n_list) {
-                        if (count >= get->num_dests)
-                                break;
-                        entry.addr = dest->addr;
-                        entry.port = dest->port;
-                        entry.conn_flags = atomic_read(&dest->conn_flags);
-                        entry.weight = atomic_read(&dest->weight);
-                        entry.u_threshold = dest->u_threshold;
-                        entry.l_threshold = dest->l_threshold;
-                        entry.activeconns = atomic_read(&dest->activeconns);
-                        entry.inactconns = atomic_read(&dest->inactconns);
-                        entry.persistconns = atomic_read(&dest->persistconns);
-                        ip_vs_copy_stats(&entry.stats, &dest->stats);
-                        if (copy_to_user(&uptr->entrytable[count],
-                                         &entry, sizeof(entry))) {
-                                ret = -EFAULT;
-                                break;
-                        }
-                        count++;
-                }
-                ip_vs_service_put(svc);
-        } else
-                ret = -ESRCH;
-        return ret;
-}
-static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
-{
-#ifdef CONFIG_IP_VS_PROTO_TCP
-        u->tcp_timeout =
-                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
-        u->tcp_fin_timeout =
-                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
-#endif
-#ifdef CONFIG_IP_VS_PROTO_UDP
-        u->udp_timeout =
-                ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
-#endif
-}
-#define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
-#define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
-#define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
-#define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
-#define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
-#define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
-#define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
-static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
-        [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
-        [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
-        [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
-        [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
-        [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
-        [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
-        [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
-};
-static int
-do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
-{
-        unsigned char arg[128];
-        int ret = 0;
-        if (!capable(CAP_NET_ADMIN))
-                return -EPERM;
-        if (*len < get_arglen[GET_CMDID(cmd)]) {
-                IP_VS_ERR("get_ctl: len %u < %u\n",
-                          *len, get_arglen[GET_CMDID(cmd)]);
-                return -EINVAL;
-        }
-        if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
-                return -EFAULT;
-        if (mutex_lock_interruptible(&__ip_vs_mutex))
-                return -ERESTARTSYS;
-        switch (cmd) {
-        case IP_VS_SO_GET_VERSION:
-        {
-                char buf[64];
-                sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
-                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
-                if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
-                        ret = -EFAULT;
-                        goto out;
-                }
-                *len = strlen(buf)+1;
-        }
-        break;
-        case IP_VS_SO_GET_INFO:
-        {
-                struct ip_vs_getinfo info;
-                info.version = IP_VS_VERSION_CODE;
-                info.size = IP_VS_CONN_TAB_SIZE;
-                info.num_services = ip_vs_num_services;
-                if (copy_to_user(user, &info, sizeof(info)) != 0)
-                        ret = -EFAULT;
-        }
-        break;
-        case IP_VS_SO_GET_SERVICES:
-        {
-                struct ip_vs_get_services *get;
-                int size;
-                get = (struct ip_vs_get_services *)arg;
-                size = sizeof(*get) +
-                        sizeof(struct ip_vs_service_entry) * get->num_services;
-                if (*len != size) {
-                        IP_VS_ERR("length: %u != %u\n", *len, size);
-                        ret = -EINVAL;
-                        goto out;
-                }
-                ret = __ip_vs_get_service_entries(get, user);
-        }
-        break;
-        case IP_VS_SO_GET_SERVICE:
-        {
-                struct ip_vs_service_entry *entry;
-                struct ip_vs_service *svc;
-                entry = (struct ip_vs_service_entry *)arg;
-                if (entry->fwmark)
-                        svc = __ip_vs_svc_fwm_get(entry->fwmark);
-                else
-                        svc = __ip_vs_service_get(entry->protocol,
-                                                  entry->addr, entry->port);
-                if (svc) {
-                        ip_vs_copy_service(entry, svc);
-                        if (copy_to_user(user, entry, sizeof(*entry)) != 0)
-                                ret = -EFAULT;
-                        ip_vs_service_put(svc);
-                } else
-                        ret = -ESRCH;
-        }
-        break;
-        case IP_VS_SO_GET_DESTS:
-        {
-                struct ip_vs_get_dests *get;
-                int size;
-                get = (struct ip_vs_get_dests *)arg;
-                size = sizeof(*get) +
-                        sizeof(struct ip_vs_dest_entry) * get->num_dests;
-                if (*len != size) {
-                        IP_VS_ERR("length: %u != %u\n", *len, size);
-                        ret = -EINVAL;
-                        goto out;
-                }
-                ret = __ip_vs_get_dest_entries(get, user);
-        }
-        break;
-        case IP_VS_SO_GET_TIMEOUT:
-        {
-                struct ip_vs_timeout_user t;
-                __ip_vs_get_timeouts(&t);
-                if (copy_to_user(user, &t, sizeof(t)) != 0)
-                        ret = -EFAULT;
-        }
-        break;
-        case IP_VS_SO_GET_DAEMON:
-        {
-                struct ip_vs_daemon_user d[2];
-                memset(&d, 0, sizeof(d));
-                if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
-                        d[0].state = IP_VS_STATE_MASTER;
-                        strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
-                        d[0].syncid = ip_vs_master_syncid;
-                }
-                if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
-                        d[1].state = IP_VS_STATE_BACKUP;
-                        strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
-                        d[1].syncid = ip_vs_backup_syncid;
-                }
-                if (copy_to_user(user, &d, sizeof(d)) != 0)
-                        ret = -EFAULT;
-        }
-        break;
-        default:
-                ret = -EINVAL;
-        }
-  out:
-        mutex_unlock(&__ip_vs_mutex);
-        return ret;
-}
-static struct nf_sockopt_ops ip_vs_sockopts = {
-        .pf             = PF_INET,
-        .set_optmin     = IP_VS_BASE_CTL,
-        .set_optmax     = IP_VS_SO_SET_MAX+1,
-        .set            = do_ip_vs_set_ctl,
-        .get_optmin     = IP_VS_BASE_CTL,
-        .get_optmax     = IP_VS_SO_GET_MAX+1,
-        .get            = do_ip_vs_get_ctl,
-        .owner          = THIS_MODULE,
-};
-int __init ip_vs_control_init(void)
-{
-        int ret;
-        int idx;
-        EnterFunction(2);
-        ret = nf_register_sockopt(&ip_vs_sockopts);
-        if (ret) {
-                IP_VS_ERR("cannot register sockopt.\n");
-                return ret;
-        }
-        proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
-        proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-        sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
-        /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
-        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
-                INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
-                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
-        }
-        for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
-                INIT_LIST_HEAD(&ip_vs_rtable[idx]);
-        }
-        ip_vs_new_estimator(&ip_vs_stats);
-        /* Hook the defense timer */
-        schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-        LeaveFunction(2);
-        return 0;
-}
-void ip_vs_control_cleanup(void)
-{
-        EnterFunction(2);
-        ip_vs_trash_cleanup();
-        cancel_rearming_delayed_work(&defense_work);
-        cancel_work_sync(&defense_work.work);
-        ip_vs_kill_estimator(&ip_vs_stats);
-        unregister_sysctl_table(sysctl_header);
-        proc_net_remove(&init_net, "ip_vs_stats");
-        proc_net_remove(&init_net, "ip_vs");
-        nf_unregister_sockopt(&ip_vs_sockopts);
-        LeaveFunction(2);
-}
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
deleted file mode 100644
index fa66824d264f..000000000000
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * IPVS:        Destination Hashing scheduling module
- *
- * Authors:     Wensong Zhang <wensong@gnuchina.org>
- *
- *              Inspired by the consistent hashing scheduler patch from
- *              Thomas Proell <proellt@gmx.de>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-/*
- * The dh algorithm is to select server by the hash key of destination IP
- * address. The pseudo code is as follows:
- *
- *       n <- servernode[dest_ip];
- *       if (n is dead) OR
- *          (n is overloaded) OR (n.weight <= 0) then
- *                 return NULL;
- *
- *       return n;
- *
- * Notes that servernode is a 256-bucket hash table that maps the hash
- * index derived from packet destination IP address to the current server
- * array. If the dh scheduler is used in cache cluster, it is good to
- * combine it with cache_bypass feature. When the statically assigned
- * server is dead or overloaded, the load balancer can bypass the cache
- * server and send requests to the original server directly.
- *
- */
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <net/ip_vs.h>
-/*
- *      IPVS DH bucket
- */
-struct ip_vs_dh_bucket {
-        struct ip_vs_dest       *dest;          /* real server (cache) */
-};
-/*
- *     for IPVS DH entry hash table
- */
-#ifndef CONFIG_IP_VS_DH_TAB_BITS
-#define CONFIG_IP_VS_DH_TAB_BITS        8
-#endif
-#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
-#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
-#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
-/*
- *      Returns hash value for IPVS DH entry
- */
-static inline unsigned ip_vs_dh_hashkey(__be32 addr)
-{
-        return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK;
-}
-/*
- *      Get ip_vs_dest associated with supplied parameters.
- */
-static inline struct ip_vs_dest *
-ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr)
-{
-        return (tbl[ip_vs_dh_hashkey(addr)]).dest;
-}
-/*
- *      Assign all the hash buckets of the specified table with the service.
- */
-static int
-ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
-{
-        int i;
-        struct ip_vs_dh_bucket *b;
-        struct list_head *p;
-        struct ip_vs_dest *dest;
-        b = tbl;
-        p = &svc->destinations;
-        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
-                if (list_empty(p)) {
-                        b->dest = NULL;
-                } else {
-                        if (p == &svc->destinations)
-                                p = p->next;
-                        dest = list_entry(p, struct ip_vs_dest, n_list);
-                        atomic_inc(&dest->refcnt);
-                        b->dest = dest;
-                        p = p->next;
-                }
-                b++;
-        }
-        return 0;
-}
-/*
- *      Flush all the hash buckets of the specified table.
- */
-static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
-{
-        int i;
-        struct ip_vs_dh_bucket *b;
-        b = tbl;
-        for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
-                if (b->dest) {
-                        atomic_dec(&b->dest->refcnt);
-                        b->dest = NULL;
-                }
-                b++;
-        }
-}
-static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_dh_bucket *tbl;
-        /* allocate the DH table for this service */
-        tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
-                      GFP_ATOMIC);
-        if (tbl == NULL) {
-                IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n");
-                return -ENOMEM;
-        }
-        svc->sched_data = tbl;
-        IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
-                  "current service\n",
-                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
-        /* assign the hash buckets with the updated service */
-        ip_vs_dh_assign(tbl, svc);
-        return 0;
-}
-static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_dh_bucket *tbl = svc->sched_data;
-        /* got to clean up hash buckets here */
-        ip_vs_dh_flush(tbl);
-        /* release the table itself */
-        kfree(svc->sched_data);
-        IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
-                  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
-        return 0;
-}
-static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_dh_bucket *tbl = svc->sched_data;
-        /* got to clean up hash buckets here */
-        ip_vs_dh_flush(tbl);
-        /* assign the hash buckets with the updated service */
-        ip_vs_dh_assign(tbl, svc);
-        return 0;
-}
-/*
- *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
- *      consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
-{
-        return dest->flags & IP_VS_DEST_F_OVERLOAD;
-}
-/*
- *      Destination hashing scheduling
- */
-static struct ip_vs_dest *
-ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest;
-        struct ip_vs_dh_bucket *tbl;
-        struct iphdr *iph = ip_hdr(skb);
-        IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
-        tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
-        dest = ip_vs_dh_get(tbl, iph->daddr);
-        if (!dest
-            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
-            || atomic_read(&dest->weight) <= 0
-            || is_overloaded(dest)) {
-                return NULL;
-        }
-        IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
-                  "--> server %u.%u.%u.%u:%d\n",
-                  NIPQUAD(iph->daddr),
-                  NIPQUAD(dest->addr),
-                  ntohs(dest->port));
-        return dest;
-}
-/*
- *      IPVS DH Scheduler structure
- */
-static struct ip_vs_scheduler ip_vs_dh_scheduler =
-{
-        .name =                 "dh",
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list =               LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
-        .init_service =         ip_vs_dh_init_svc,
-        .done_service =         ip_vs_dh_done_svc,
-        .update_service =       ip_vs_dh_update_svc,
-        .schedule =             ip_vs_dh_schedule,
-};
-static int __init ip_vs_dh_init(void)
-{
-        return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
-}
-static void __exit ip_vs_dh_cleanup(void)
-{
-        unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
-}
-module_init(ip_vs_dh_init);
-module_exit(ip_vs_dh_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
deleted file mode 100644
index 5a20f93bd7f9..000000000000
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * ip_vs_est.c: simple rate estimator for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/sysctl.h>
-#include <linux/list.h>
-#include <net/ip_vs.h>
-/*
-  This code is to estimate rate in a shorter interval (such as 8
-  seconds) for virtual services and real servers. For measure rate in a
-  long interval, it is easy to implement a user level daemon which
-  periodically reads those statistical counters and measure rate.
-  Currently, the measurement is activated by slow timer handler. Hope
-  this measurement will not introduce too much load.
-  We measure rate during the last 8 seconds every 2 seconds:
-    avgrate = avgrate*(1-W) + rate*W
-    where W = 2^(-2)
-  NOTES.
-  * The stored value for average bps is scaled by 2^5, so that maximal
-    rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
-  * A lot code is taken from net/sched/estimator.c
- */
-static void estimation_timer(unsigned long arg);
-static LIST_HEAD(est_list);
-static DEFINE_SPINLOCK(est_lock);
-static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
-static void estimation_timer(unsigned long arg)
-{
-        struct ip_vs_estimator *e;
-        struct ip_vs_stats *s;
-        u32 n_conns;
-        u32 n_inpkts, n_outpkts;
-        u64 n_inbytes, n_outbytes;
-        u32 rate;
-        spin_lock(&est_lock);
-        list_for_each_entry(e, &est_list, list) {
-                s = container_of(e, struct ip_vs_stats, est);
-                spin_lock(&s->lock);
-                n_conns = s->conns;
-                n_inpkts = s->inpkts;
-                n_outpkts = s->outpkts;
-                n_inbytes = s->inbytes;
-                n_outbytes = s->outbytes;
-                /* scaled by 2^10, but divided 2 seconds */
-                rate = (n_conns - e->last_conns)<<9;
-                e->last_conns = n_conns;
-                e->cps += ((long)rate - (long)e->cps)>>2;
-                s->cps = (e->cps+0x1FF)>>10;
-                rate = (n_inpkts - e->last_inpkts)<<9;
-                e->last_inpkts = n_inpkts;
-                e->inpps += ((long)rate - (long)e->inpps)>>2;
-                s->inpps = (e->inpps+0x1FF)>>10;
-                rate = (n_outpkts - e->last_outpkts)<<9;
-                e->last_outpkts = n_outpkts;
-                e->outpps += ((long)rate - (long)e->outpps)>>2;
-                s->outpps = (e->outpps+0x1FF)>>10;
-                rate = (n_inbytes - e->last_inbytes)<<4;
-                e->last_inbytes = n_inbytes;
-                e->inbps += ((long)rate - (long)e->inbps)>>2;
-                s->inbps = (e->inbps+0xF)>>5;
-                rate = (n_outbytes - e->last_outbytes)<<4;
-                e->last_outbytes = n_outbytes;
-                e->outbps += ((long)rate - (long)e->outbps)>>2;
-                s->outbps = (e->outbps+0xF)>>5;
-                spin_unlock(&s->lock);
-        }
-        spin_unlock(&est_lock);
-        mod_timer(&est_timer, jiffies + 2*HZ);
-}
-void ip_vs_new_estimator(struct ip_vs_stats *stats)
-{
-        struct ip_vs_estimator *est = &stats->est;
-        INIT_LIST_HEAD(&est->list);
-        est->last_conns = stats->conns;
-        est->cps = stats->cps<<10;
-        est->last_inpkts = stats->inpkts;
-        est->inpps = stats->inpps<<10;
-        est->last_outpkts = stats->outpkts;
-        est->outpps = stats->outpps<<10;
-        est->last_inbytes = stats->inbytes;
-        est->inbps = stats->inbps<<5;
-        est->last_outbytes = stats->outbytes;
-        est->outbps = stats->outbps<<5;
-        spin_lock_bh(&est_lock);
-        if (list_empty(&est_list))
-                mod_timer(&est_timer, jiffies + 2 * HZ);
-        list_add(&est->list, &est_list);
-        spin_unlock_bh(&est_lock);
-}
-void ip_vs_kill_estimator(struct ip_vs_stats *stats)
-{
-        struct ip_vs_estimator *est = &stats->est;
-        spin_lock_bh(&est_lock);
-        list_del(&est->list);
-        while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) {
-                spin_unlock_bh(&est_lock);
-                cpu_relax();
-                spin_lock_bh(&est_lock);
-        }
-        spin_unlock_bh(&est_lock);
-}
-void ip_vs_zero_estimator(struct ip_vs_stats *stats)
-{
-        struct ip_vs_estimator *est = &stats->est;
-        /* set counters zero, caller must hold the stats->lock lock */
-        est->last_inbytes = 0;
-        est->last_outbytes = 0;
-        est->last_conns = 0;
-        est->last_inpkts = 0;
-        est->last_outpkts = 0;
-        est->cps = 0;
-        est->inpps = 0;
-        est->outpps = 0;
-        est->inbps = 0;
-        est->outbps = 0;
-}
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
deleted file mode 100644
index c1c758e4f733..000000000000
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * ip_vs_ftp.c: IPVS ftp application module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * Changes:
- *
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- * Most code here is taken from ip_masq_ftp.c in kernel 2.2. The difference
- * is that ip_vs_ftp module handles the reverse direction to ip_masq_ftp.
- *
- *              IP_MASQ_FTP ftp masquerading module
- *
- * Version:     @(#)ip_masq_ftp.c 0.04   02/05/96
- *
- * Author:      Wouter Gadeyne
- *
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <net/tcp.h>
-#include <asm/unaligned.h>
-#include <net/ip_vs.h>
-#define SERVER_STRING "227 Entering Passive Mode ("
-#define CLIENT_STRING "PORT "
-/*
- * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
- * First port is set to the default port.
- */
-static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0};
-module_param_array(ports, ushort, NULL, 0);
-MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
-/*      Dummy variable */
-static int ip_vs_ftp_pasv;
-static int
-ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
-{
-        return 0;
-}
-static int
-ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
-{
-        return 0;
-}
-/*
- * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
- * with the "pattern" and terminated with the "term" character.
- * <addr,port> is in network order.
- */
-static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
-                                  const char *pattern, size_t plen, char term,
-                                  __be32 *addr, __be16 *port,
-                                  char **start, char **end)
-{
-        unsigned char p[6];
-        int i = 0;
-        if (data_limit - data < plen) {
-                /* check if there is partial match */
-                if (strnicmp(data, pattern, data_limit - data) == 0)
-                        return -1;
-                else
-                        return 0;
-        }
-        if (strnicmp(data, pattern, plen) != 0) {
-                return 0;
-        }
-        *start = data + plen;
-        for (data = *start; *data != term; data++) {
-                if (data == data_limit)
-                        return -1;
-        }
-        *end = data;
-        memset(p, 0, sizeof(p));
-        for (data = *start; data != *end; data++) {
-                if (*data >= '0' && *data <= '9') {
-                        p[i] = p[i]*10 + *data - '0';
-                } else if (*data == ',' && i < 5) {
-                        i++;
-                } else {
-                        /* unexpected character */
-                        return -1;
-                }
-        }
-        if (i != 5)
-                return -1;
-        *addr = get_unaligned((__be32 *)p);
-        *port = get_unaligned((__be16 *)(p + 4));
-        return 1;
-}
-/*
- * Look at outgoing ftp packets to catch the response to a PASV command
- * from the server (inside-to-outside).
- * When we see one, we build a connection entry with the client address,
- * client port 0 (unknown at the moment), the server address and the
- * server port.  Mark the current connection entry as a control channel
- * of the new entry. All this work is just to make the data connection
- * can be scheduled to the right server later.
- *
- * The outgoing packet should be something like
- *   "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
- * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
- */
-static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
-                         struct sk_buff *skb, int *diff)
-{
-        struct iphdr *iph;
-        struct tcphdr *th;
-        char *data, *data_limit;
-        char *start, *end;
-        __be32 from;
-        __be16 port;
-        struct ip_vs_conn *n_cp;
-        char buf[24];           /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
-        unsigned buf_len;
-        int ret;
-        *diff = 0;
-        /* Only useful for established sessions */
-        if (cp->state != IP_VS_TCP_S_ESTABLISHED)
-                return 1;
-        /* Linear packets are much easier to deal with. */
-        if (!skb_make_writable(skb, skb->len))
-                return 0;
-        if (cp->app_data == &ip_vs_ftp_pasv) {
-                iph = ip_hdr(skb);
-                th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
-                data = (char *)th + (th->doff << 2);
-                data_limit = skb_tail_pointer(skb);
-                if (ip_vs_ftp_get_addrport(data, data_limit,
-                                           SERVER_STRING,
-                                           sizeof(SERVER_STRING)-1, ')',
-                                           &from, &port,
-                                           &start, &end) != 1)
-                        return 1;
-                IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> "
-                          "%u.%u.%u.%u:%d detected\n",
-                          NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
-                /*
-                 * Now update or create an connection entry for it
-                 */
-                n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
-                                          cp->caddr, 0);
-                if (!n_cp) {
-                        n_cp = ip_vs_conn_new(IPPROTO_TCP,
-                                              cp->caddr, 0,
-                                              cp->vaddr, port,
-                                              from, port,
-                                              IP_VS_CONN_F_NO_CPORT,
-                                              cp->dest);
-                        if (!n_cp)
-                                return 0;
-                        /* add its controller */
-                        ip_vs_control_add(n_cp, cp);
-                }
-                /*
-                 * Replace the old passive address with the new one
-                 */
-                from = n_cp->vaddr;
-                port = n_cp->vport;
-                sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
-                        (ntohs(port)>>8)&255, ntohs(port)&255);
-                buf_len = strlen(buf);
-                /*
-                 * Calculate required delta-offset to keep TCP happy
-                 */
-                *diff = buf_len - (end-start);
-                if (*diff == 0) {
-                        /* simply replace it with new passive address */
-                        memcpy(start, buf, buf_len);
-                        ret = 1;
-                } else {
-                        ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
-                                          end-start, buf, buf_len);
-                }
-                cp->app_data = NULL;
-                ip_vs_tcp_conn_listen(n_cp);
-                ip_vs_conn_put(n_cp);
-                return ret;
-        }
-        return 1;
-}
-/*
- * Look at incoming ftp packets to catch the PASV/PORT command
- * (outside-to-inside).
- *
- * The incoming packet having the PORT command should be something like
- *      "PORT xxx,xxx,xxx,xxx,ppp,ppp\n".
- * xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number.
- * In this case, we create a connection entry using the client address and
- * port, so that the active ftp data connection from the server can reach
- * the client.
- */
-static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
-                        struct sk_buff *skb, int *diff)
-{
-        struct iphdr *iph;
-        struct tcphdr *th;
-        char *data, *data_start, *data_limit;
-        char *start, *end;
-        __be32 to;
-        __be16 port;
-        struct ip_vs_conn *n_cp;
-        /* no diff required for incoming packets */
-        *diff = 0;
-        /* Only useful for established sessions */
-        if (cp->state != IP_VS_TCP_S_ESTABLISHED)
-                return 1;
-        /* Linear packets are much easier to deal with. */
-        if (!skb_make_writable(skb, skb->len))
-                return 0;
-        /*
-         * Detecting whether it is passive
-         */
-        iph = ip_hdr(skb);
-        th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
-        /* Since there may be OPTIONS in the TCP packet and the HLEN is
-           the length of the header in 32-bit multiples, it is accurate
-           to calculate data address by th+HLEN*4 */
-        data = data_start = (char *)th + (th->doff << 2);
-        data_limit = skb_tail_pointer(skb);
-        while (data <= data_limit - 6) {
-                if (strnicmp(data, "PASV\r\n", 6) == 0) {
-                        /* Passive mode on */
-                        IP_VS_DBG(7, "got PASV at %td of %td\n",
-                                  data - data_start,
-                                  data_limit - data_start);
-                        cp->app_data = &ip_vs_ftp_pasv;
-                        return 1;
-                }
-                data++;
-        }
-        /*
-         * To support virtual FTP server, the scenerio is as follows:
-         *       FTP client ----> Load Balancer ----> FTP server
-         * First detect the port number in the application data,
-         * then create a new connection entry for the coming data
-         * connection.
-         */
-        if (ip_vs_ftp_get_addrport(data_start, data_limit,
-                                   CLIENT_STRING, sizeof(CLIENT_STRING)-1,
-                                   '\r', &to, &port,
-                                   &start, &end) != 1)
-                return 1;
-        IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n",
-                  NIPQUAD(to), ntohs(port));
-        /* Passive mode off */
-        cp->app_data = NULL;
-        /*
-         * Now update or create a connection entry for it
-         */
-        IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
-                  ip_vs_proto_name(iph->protocol),
-                  NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
-        n_cp = ip_vs_conn_in_get(iph->protocol,
-                                 to, port,
-                                 cp->vaddr, htons(ntohs(cp->vport)-1));
-        if (!n_cp) {
-                n_cp = ip_vs_conn_new(IPPROTO_TCP,
-                                      to, port,
-                                      cp->vaddr, htons(ntohs(cp->vport)-1),
-                                      cp->daddr, htons(ntohs(cp->dport)-1),
-                                      0,
-                                      cp->dest);
-                if (!n_cp)
-                        return 0;
-                /* add its controller */
-                ip_vs_control_add(n_cp, cp);
-        }
-        /*
-         *      Move tunnel to listen state
-         */
-        ip_vs_tcp_conn_listen(n_cp);
-        ip_vs_conn_put(n_cp);
-        return 1;
-}
-static struct ip_vs_app ip_vs_ftp = {
-        .name =         "ftp",
-        .type =         IP_VS_APP_TYPE_FTP,
-        .protocol =     IPPROTO_TCP,
-        .module =       THIS_MODULE,
-        .incs_list =    LIST_HEAD_INIT(ip_vs_ftp.incs_list),
-        .init_conn =    ip_vs_ftp_init_conn,
-        .done_conn =    ip_vs_ftp_done_conn,
-        .bind_conn =    NULL,
-        .unbind_conn =  NULL,
-        .pkt_out =      ip_vs_ftp_out,
-        .pkt_in =       ip_vs_ftp_in,
-};
-/*
- *      ip_vs_ftp initialization
- */
-static int __init ip_vs_ftp_init(void)
-{
-        int i, ret;
-        struct ip_vs_app *app = &ip_vs_ftp;
-        ret = register_ip_vs_app(app);
-        if (ret)
-                return ret;
-        for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
-                if (!ports[i])
-                        continue;
-                ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
-                if (ret)
-                        break;
-                IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
-                           app->name, i, ports[i]);
-        }
-        if (ret)
-                unregister_ip_vs_app(app);
-        return ret;
-}
-/*
- *      ip_vs_ftp finish.
- */
-static void __exit ip_vs_ftp_exit(void)
-{
-        unregister_ip_vs_app(&ip_vs_ftp);
-}
-module_init(ip_vs_ftp_init);
-module_exit(ip_vs_ftp_exit);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
deleted file mode 100644
index 7a6a319f544a..000000000000
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- * IPVS:        Locality-Based Least-Connection scheduling module
- *
- * Authors:     Wensong Zhang <wensong@gnuchina.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Martin Hamilton         :    fixed the terrible locking bugs
- *                                   *lock(tbl->lock) ==> *lock(&tbl->lock)
- *     Wensong Zhang           :    fixed the uninitilized tbl->lock bug
- *     Wensong Zhang           :    added doing full expiration check to
- *                                   collect stale entries of 24+ hours when
- *                                   no partial expire check in a half hour
- *     Julian Anastasov        :    replaced del_timer call with del_timer_sync
- *                                   to avoid the possible race between timer
- *                                   handler and del_timer thread in SMP
- *
- */
-/*
- * The lblc algorithm is as follows (pseudo code):
- *
- *       if cachenode[dest_ip] is null then
- *               n, cachenode[dest_ip] <- {weighted least-conn node};
- *       else
- *               n <- cachenode[dest_ip];
- *               if (n is dead) OR
- *                  (n.conns>n.weight AND
- *                   there is a node m with m.conns<m.weight/2) then
- *                 n, cachenode[dest_ip] <- {weighted least-conn node};
- *
- *       return n;
- *
- * Thanks must go to Wenzhuo Zhang for talking WCCP to me and pushing
- * me to write this module.
- */
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/jiffies.h>
-/* for sysctl */
-#include <linux/fs.h>
-#include <linux/sysctl.h>
-#include <net/ip_vs.h>
-/*
- *    It is for garbage collection of stale IPVS lblc entries,
- *    when the table is full.
- */
-#define CHECK_EXPIRE_INTERVAL   (60*HZ)
-#define ENTRY_TIMEOUT           (6*60*HZ)
-/*
- *    It is for full expiration check.
- *    When there is no partial expiration check (garbage collection)
- *    in a half hour, do a full expiration check to collect stale
- *    entries that haven't been touched for a day.
- */
-#define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
-/*
- *     for IPVS lblc entry hash table
- */
-#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
-#define CONFIG_IP_VS_LBLC_TAB_BITS      10
-#endif
-#define IP_VS_LBLC_TAB_BITS     CONFIG_IP_VS_LBLC_TAB_BITS
-#define IP_VS_LBLC_TAB_SIZE     (1 << IP_VS_LBLC_TAB_BITS)
-#define IP_VS_LBLC_TAB_MASK     (IP_VS_LBLC_TAB_SIZE - 1)
-/*
- *      IPVS lblc entry represents an association between destination
- *      IP address and its destination server
- */
-struct ip_vs_lblc_entry {
-        struct list_head        list;
-        __be32                  addr;           /* destination IP address */
-        struct ip_vs_dest       *dest;          /* real server (cache) */
-        unsigned long           lastuse;        /* last used time */
-};
-/*
- *      IPVS lblc hash table
- */
-struct ip_vs_lblc_table {
-        rwlock_t                lock;           /* lock for this table */
-        struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
-        atomic_t                entries;        /* number of entries */
-        int                     max_size;       /* maximum size of entries */
-        struct timer_list       periodic_timer; /* collect stale entries */
-        int                     rover;          /* rover for expire check */
-        int                     counter;        /* counter for no expire */
-};
-/*
- *      IPVS LBLC sysctl table
- */
-static ctl_table vs_vars_table[] = {
-        {
-                .procname       = "lblc_expiration",
-                .data           = &sysctl_ip_vs_lblc_expiration,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        { .ctl_name = 0 }
-};
-static struct ctl_table_header * sysctl_header;
-/*
- *      new/free a ip_vs_lblc_entry, which is a mapping of a destionation
- *      IP address to a server.
- */
-static inline struct ip_vs_lblc_entry *
-ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest)
-{
-        struct ip_vs_lblc_entry *en;
-        en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
-        if (en == NULL) {
-                IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
-                return NULL;
-        }
-        INIT_LIST_HEAD(&en->list);
-        en->addr = daddr;
-        atomic_inc(&dest->refcnt);
-        en->dest = dest;
-        return en;
-}
-static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
-{
-        list_del(&en->list);
-        /*
-         * We don't kfree dest because it is refered either by its service
-         * or the trash dest list.
-         */
-        atomic_dec(&en->dest->refcnt);
-        kfree(en);
-}
-/*
- *      Returns hash value for IPVS LBLC entry
- */
-static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
-{
-        return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
-}
-/*
- *      Hash an entry in the ip_vs_lblc_table.
- *      returns bool success.
- */
-static int
-ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
-{
-        unsigned hash;
-        if (!list_empty(&en->list)) {
-                IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
-                          "called from %p\n", __builtin_return_address(0));
-                return 0;
-        }
-        /*
-         *      Hash by destination IP address
-         */
-        hash = ip_vs_lblc_hashkey(en->addr);
-        write_lock(&tbl->lock);
-        list_add(&en->list, &tbl->bucket[hash]);
-        atomic_inc(&tbl->entries);
-        write_unlock(&tbl->lock);
-        return 1;
-}
-/*
- *  Get ip_vs_lblc_entry associated with supplied parameters.
- */
-static inline struct ip_vs_lblc_entry *
-ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
-{
-        unsigned hash;
-        struct ip_vs_lblc_entry *en;
-        hash = ip_vs_lblc_hashkey(addr);
-        read_lock(&tbl->lock);
-        list_for_each_entry(en, &tbl->bucket[hash], list) {
-                if (en->addr == addr) {
-                        /* HIT */
-                        read_unlock(&tbl->lock);
-                        return en;
-                }
-        }
-        read_unlock(&tbl->lock);
-        return NULL;
-}
-/*
- *      Flush all the entries of the specified table.
- */
-static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
-{
-        int i;
-        struct ip_vs_lblc_entry *en, *nxt;
-        for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
-                write_lock(&tbl->lock);
-                list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
-                        ip_vs_lblc_free(en);
-                        atomic_dec(&tbl->entries);
-                }
-                write_unlock(&tbl->lock);
-        }
-}
-static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
-{
-        unsigned long now = jiffies;
-        int i, j;
-        struct ip_vs_lblc_entry *en, *nxt;
-        for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
-                j = (j + 1) & IP_VS_LBLC_TAB_MASK;
-                write_lock(&tbl->lock);
-                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-                        if (time_before(now,
-                                        en->lastuse + sysctl_ip_vs_lblc_expiration))
-                                continue;
-                        ip_vs_lblc_free(en);
-                        atomic_dec(&tbl->entries);
-                }
-                write_unlock(&tbl->lock);
-        }
-        tbl->rover = j;
-}
-/*
- *      Periodical timer handler for IPVS lblc table
- *      It is used to collect stale entries when the number of entries
- *      exceeds the maximum size of the table.
- *
- *      Fixme: we probably need more complicated algorithm to collect
- *             entries that have not been used for a long time even
- *             if the number of entries doesn't exceed the maximum size
- *             of the table.
- *      The full expiration check is for this purpose now.
- */
-static void ip_vs_lblc_check_expire(unsigned long data)
-{
-        struct ip_vs_lblc_table *tbl;
-        unsigned long now = jiffies;
-        int goal;
-        int i, j;
-        struct ip_vs_lblc_entry *en, *nxt;
-        tbl = (struct ip_vs_lblc_table *)data;
-        if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
-                /* do full expiration check */
-                ip_vs_lblc_full_check(tbl);
-                tbl->counter = 1;
-                goto out;
-        }
-        if (atomic_read(&tbl->entries) <= tbl->max_size) {
-                tbl->counter++;
-                goto out;
-        }
-        goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
-        if (goal > tbl->max_size/2)
-                goal = tbl->max_size/2;
-        for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
-                j = (j + 1) & IP_VS_LBLC_TAB_MASK;
-                write_lock(&tbl->lock);
-                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-                        if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
-                                continue;
-                        ip_vs_lblc_free(en);
-                        atomic_dec(&tbl->entries);
-                        goal--;
-                }
-                write_unlock(&tbl->lock);
-                if (goal <= 0)
-                        break;
-        }
-        tbl->rover = j;
-  out:
-        mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
-}
-static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
-{
-        int i;
-        struct ip_vs_lblc_table *tbl;
-        /*
-         *    Allocate the ip_vs_lblc_table for this service
-         */
-        tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC);
-        if (tbl == NULL) {
-                IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
-                return -ENOMEM;
-        }
-        svc->sched_data = tbl;
-        IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
-                  "current service\n",
-                  sizeof(struct ip_vs_lblc_table));
-        /*
-         *    Initialize the hash buckets
-         */
-        for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
-                INIT_LIST_HEAD(&tbl->bucket[i]);
-        }
-        rwlock_init(&tbl->lock);
-        tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
-        tbl->rover = 0;
-        tbl->counter = 1;
-        /*
-         *    Hook periodic timer for garbage collection
-         */
-        setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
-                        (unsigned long)tbl);
-        tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
-        add_timer(&tbl->periodic_timer);
-        return 0;
-}
-static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_lblc_table *tbl = svc->sched_data;
-        /* remove periodic timer */
-        del_timer_sync(&tbl->periodic_timer);
-        /* got to clean up table entries here */
-        ip_vs_lblc_flush(tbl);
-        /* release the table itself */
-        kfree(svc->sched_data);
-        IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
-                  sizeof(struct ip_vs_lblc_table));
-        return 0;
-}
-static int ip_vs_lblc_update_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static inline struct ip_vs_dest *
-__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
-{
-        struct ip_vs_dest *dest, *least;
-        int loh, doh;
-        /*
-         * We think the overhead of processing active connections is fifty
-         * times higher than that of inactive connections in average. (This
-         * fifty times might not be accurate, we will change it later.) We
-         * use the following formula to estimate the overhead:
-         *                dest->activeconns*50 + dest->inactconns
-         * and the load:
-         *                (dest overhead) / dest->weight
-         *
-         * Remember -- no floats in kernel mode!!!
-         * The comparison of h1*w2 > h2*w1 is equivalent to that of
-         *                h1/w1 > h2/w2
-         * if every weight is larger than zero.
-         *
-         * The server with weight=0 is quiesced and will not receive any
-         * new connection.
-         */
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-                        continue;
-                if (atomic_read(&dest->weight) > 0) {
-                        least = dest;
-                        loh = atomic_read(&least->activeconns) * 50
-                                + atomic_read(&least->inactconns);
-                        goto nextstage;
-                }
-        }
-        return NULL;
-        /*
-         *    Find the destination with the least load.
-         */
-  nextstage:
-        list_for_each_entry_continue(dest, &svc->destinations, n_list) {
-                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-                        continue;
-                doh = atomic_read(&dest->activeconns) * 50
-                        + atomic_read(&dest->inactconns);
-                if (loh * atomic_read(&dest->weight) >
-                    doh * atomic_read(&least->weight)) {
-                        least = dest;
-                        loh = doh;
-                }
-        }
-        IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
-                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                  NIPQUAD(least->addr), ntohs(least->port),
-                  atomic_read(&least->activeconns),
-                  atomic_read(&least->refcnt),
-                  atomic_read(&least->weight), loh);
-        return least;
-}
-/*
- *   If this destination server is overloaded and there is a less loaded
- *   server, then return true.
- */
-static inline int
-is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
-{
-        if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
-                struct ip_vs_dest *d;
-                list_for_each_entry(d, &svc->destinations, n_list) {
-                        if (atomic_read(&d->activeconns)*2
-                            < atomic_read(&d->weight)) {
-                                return 1;
-                        }
-                }
-        }
-        return 0;
-}
-/*
- *    Locality-Based (weighted) Least-Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest;
-        struct ip_vs_lblc_table *tbl;
-        struct ip_vs_lblc_entry *en;
-        struct iphdr *iph = ip_hdr(skb);
-        IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
-        tbl = (struct ip_vs_lblc_table *)svc->sched_data;
-        en = ip_vs_lblc_get(tbl, iph->daddr);
-        if (en == NULL) {
-                dest = __ip_vs_wlc_schedule(svc, iph);
-                if (dest == NULL) {
-                        IP_VS_DBG(1, "no destination available\n");
-                        return NULL;
-                }
-                en = ip_vs_lblc_new(iph->daddr, dest);
-                if (en == NULL) {
-                        return NULL;
-                }
-                ip_vs_lblc_hash(tbl, en);
-        } else {
-                dest = en->dest;
-                if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
-                    || atomic_read(&dest->weight) <= 0
-                    || is_overloaded(dest, svc)) {
-                        dest = __ip_vs_wlc_schedule(svc, iph);
-                        if (dest == NULL) {
-                                IP_VS_DBG(1, "no destination available\n");
-                                return NULL;
-                        }
-                        atomic_dec(&en->dest->refcnt);
-                        atomic_inc(&dest->refcnt);
-                        en->dest = dest;
-                }
-        }
-        en->lastuse = jiffies;
-        IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
-                  "--> server %u.%u.%u.%u:%d\n",
-                  NIPQUAD(en->addr),
-                  NIPQUAD(dest->addr),
-                  ntohs(dest->port));
-        return dest;
-}
-/*
- *      IPVS LBLC Scheduler structure
- */
-static struct ip_vs_scheduler ip_vs_lblc_scheduler =
-{
-        .name =                 "lblc",
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list =               LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
-        .init_service =         ip_vs_lblc_init_svc,
-        .done_service =         ip_vs_lblc_done_svc,
-        .update_service =       ip_vs_lblc_update_svc,
-        .schedule =             ip_vs_lblc_schedule,
-};
-static int __init ip_vs_lblc_init(void)
-{
-        int ret;
-        sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
-        ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
-        if (ret)
-                unregister_sysctl_table(sysctl_header);
-        return ret;
-}
-static void __exit ip_vs_lblc_cleanup(void)
-{
-        unregister_sysctl_table(sysctl_header);
-        unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
-}
-module_init(ip_vs_lblc_init);
-module_exit(ip_vs_lblc_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
deleted file mode 100644
index c234e73968a6..000000000000
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ /dev/null
@@ -1,760 +0,0 @@
-/*
- * IPVS:        Locality-Based Least-Connection with Replication scheduler
- *
- * Authors:     Wensong Zhang <wensong@gnuchina.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Julian Anastasov        :    Added the missing (dest->weight>0)
- *                                  condition in the ip_vs_dest_set_max.
- *
- */
-/*
- * The lblc/r algorithm is as follows (pseudo code):
- *
- *       if serverSet[dest_ip] is null then
- *               n, serverSet[dest_ip] <- {weighted least-conn node};
- *       else
- *               n <- {least-conn (alive) node in serverSet[dest_ip]};
- *               if (n is null) OR
- *                  (n.conns>n.weight AND
- *                   there is a node m with m.conns<m.weight/2) then
- *                   n <- {weighted least-conn node};
- *                   add n to serverSet[dest_ip];
- *               if |serverSet[dest_ip]| > 1 AND
- *                   now - serverSet[dest_ip].lastMod > T then
- *                   m <- {most conn node in serverSet[dest_ip]};
- *                   remove m from serverSet[dest_ip];
- *       if serverSet[dest_ip] changed then
- *               serverSet[dest_ip].lastMod <- now;
- *
- *       return n;
- *
- */
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/jiffies.h>
-/* for sysctl */
-#include <linux/fs.h>
-#include <linux/sysctl.h>
-#include <net/net_namespace.h>
-#include <net/ip_vs.h>
-/*
- *    It is for garbage collection of stale IPVS lblcr entries,
- *    when the table is full.
- */
-#define CHECK_EXPIRE_INTERVAL   (60*HZ)
-#define ENTRY_TIMEOUT           (6*60*HZ)
-/*
- *    It is for full expiration check.
- *    When there is no partial expiration check (garbage collection)
- *    in a half hour, do a full expiration check to collect stale
- *    entries that haven't been touched for a day.
- */
-#define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
-/*
- *     for IPVS lblcr entry hash table
- */
-#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
-#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
-#endif
-#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
-#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
-#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
-/*
- *      IPVS destination set structure and operations
- */
-struct ip_vs_dest_list {
-        struct ip_vs_dest_list  *next;          /* list link */
-        struct ip_vs_dest       *dest;          /* destination server */
-};
-struct ip_vs_dest_set {
-        atomic_t                size;           /* set size */
-        unsigned long           lastmod;        /* last modified time */
-        struct ip_vs_dest_list  *list;          /* destination list */
-        rwlock_t                lock;           /* lock for this list */
-};
-static struct ip_vs_dest_list *
-ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
-{
-        struct ip_vs_dest_list *e;
-        for (e=set->list; e!=NULL; e=e->next) {
-                if (e->dest == dest)
-                        /* already existed */
-                        return NULL;
-        }
-        e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC);
-        if (e == NULL) {
-                IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
-                return NULL;
-        }
-        atomic_inc(&dest->refcnt);
-        e->dest = dest;
-        /* link it to the list */
-        write_lock(&set->lock);
-        e->next = set->list;
-        set->list = e;
-        atomic_inc(&set->size);
-        write_unlock(&set->lock);
-        set->lastmod = jiffies;
-        return e;
-}
-static void
-ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
-{
-        struct ip_vs_dest_list *e, **ep;
-        write_lock(&set->lock);
-        for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
-                if (e->dest == dest) {
-                        /* HIT */
-                        *ep = e->next;
-                        atomic_dec(&set->size);
-                        set->lastmod = jiffies;
-                        atomic_dec(&e->dest->refcnt);
-                        kfree(e);
-                        break;
-                }
-                ep = &e->next;
-        }
-        write_unlock(&set->lock);
-}
-static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
-{
-        struct ip_vs_dest_list *e, **ep;
-        write_lock(&set->lock);
-        for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
-                *ep = e->next;
-                /*
-                 * We don't kfree dest because it is refered either
-                 * by its service or by the trash dest list.
-                 */
-                atomic_dec(&e->dest->refcnt);
-                kfree(e);
-        }
-        write_unlock(&set->lock);
-}
-/* get weighted least-connection node in the destination set */
-static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
-{
-        register struct ip_vs_dest_list *e;
-        struct ip_vs_dest *dest, *least;
-        int loh, doh;
-        if (set == NULL)
-                return NULL;
-        read_lock(&set->lock);
-        /* select the first destination server, whose weight > 0 */
-        for (e=set->list; e!=NULL; e=e->next) {
-                least = e->dest;
-                if (least->flags & IP_VS_DEST_F_OVERLOAD)
-                        continue;
-                if ((atomic_read(&least->weight) > 0)
-                    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
-                        loh = atomic_read(&least->activeconns) * 50
-                                + atomic_read(&least->inactconns);
-                        goto nextstage;
-                }
-        }
-        read_unlock(&set->lock);
-        return NULL;
-        /* find the destination with the weighted least load */
-  nextstage:
-        for (e=e->next; e!=NULL; e=e->next) {
-                dest = e->dest;
-                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-                        continue;
-                doh = atomic_read(&dest->activeconns) * 50
-                        + atomic_read(&dest->inactconns);
-                if ((loh * atomic_read(&dest->weight) >
-                     doh * atomic_read(&least->weight))
-                    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-                        least = dest;
-                        loh = doh;
-                }
-        }
-        read_unlock(&set->lock);
-        IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
-                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                  NIPQUAD(least->addr), ntohs(least->port),
-                  atomic_read(&least->activeconns),
-                  atomic_read(&least->refcnt),
-                  atomic_read(&least->weight), loh);
-        return least;
-}
-/* get weighted most-connection node in the destination set */
-static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
-{
-        register struct ip_vs_dest_list *e;
-        struct ip_vs_dest *dest, *most;
-        int moh, doh;
-        if (set == NULL)
-                return NULL;
-        read_lock(&set->lock);
-        /* select the first destination server, whose weight > 0 */
-        for (e=set->list; e!=NULL; e=e->next) {
-                most = e->dest;
-                if (atomic_read(&most->weight) > 0) {
-                        moh = atomic_read(&most->activeconns) * 50
-                                + atomic_read(&most->inactconns);
-                        goto nextstage;
-                }
-        }
-        read_unlock(&set->lock);
-        return NULL;
-        /* find the destination with the weighted most load */
-  nextstage:
-        for (e=e->next; e!=NULL; e=e->next) {
-                dest = e->dest;
-                doh = atomic_read(&dest->activeconns) * 50
-                        + atomic_read(&dest->inactconns);
-                /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
-                if ((moh * atomic_read(&dest->weight) <
-                     doh * atomic_read(&most->weight))
-                    && (atomic_read(&dest->weight) > 0)) {
-                        most = dest;
-                        moh = doh;
-                }
-        }
-        read_unlock(&set->lock);
-        IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
-                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                  NIPQUAD(most->addr), ntohs(most->port),
-                  atomic_read(&most->activeconns),
-                  atomic_read(&most->refcnt),
-                  atomic_read(&most->weight), moh);
-        return most;
-}
-/*
- *      IPVS lblcr entry represents an association between destination
- *      IP address and its destination server set
- */
-struct ip_vs_lblcr_entry {
-        struct list_head        list;
-        __be32                   addr;           /* destination IP address */
-        struct ip_vs_dest_set   set;            /* destination server set */
-        unsigned long           lastuse;        /* last used time */
-};
-/*
- *      IPVS lblcr hash table
- */
-struct ip_vs_lblcr_table {
-        rwlock_t                lock;           /* lock for this table */
-        struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
-        atomic_t                entries;        /* number of entries */
-        int                     max_size;       /* maximum size of entries */
-        struct timer_list       periodic_timer; /* collect stale entries */
-        int                     rover;          /* rover for expire check */
-        int                     counter;        /* counter for no expire */
-};
-/*
- *      IPVS LBLCR sysctl table
- */
-static ctl_table vs_vars_table[] = {
-        {
-                .procname       = "lblcr_expiration",
-                .data           = &sysctl_ip_vs_lblcr_expiration,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = &proc_dointvec_jiffies,
-        },
-        { .ctl_name = 0 }
-};
-static struct ctl_table_header * sysctl_header;
-/*
- *      new/free a ip_vs_lblcr_entry, which is a mapping of a destination
- *      IP address to a server.
- */
-static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr)
-{
-        struct ip_vs_lblcr_entry *en;
-        en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC);
-        if (en == NULL) {
-                IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
-                return NULL;
-        }
-        INIT_LIST_HEAD(&en->list);
-        en->addr = daddr;
-        /* initilize its dest set */
-        atomic_set(&(en->set.size), 0);
-        en->set.list = NULL;
-        rwlock_init(&en->set.lock);
-        return en;
-}
-static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
-{
-        list_del(&en->list);
-        ip_vs_dest_set_eraseall(&en->set);
-        kfree(en);
-}
-/*
- *      Returns hash value for IPVS LBLCR entry
- */
-static inline unsigned ip_vs_lblcr_hashkey(__be32 addr)
-{
-        return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
-}
-/*
- *      Hash an entry in the ip_vs_lblcr_table.
- *      returns bool success.
- */
-static int
-ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
-{
-        unsigned hash;
-        if (!list_empty(&en->list)) {
-                IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, "
-                          "called from %p\n", __builtin_return_address(0));
-                return 0;
-        }
-        /*
-         *      Hash by destination IP address
-         */
-        hash = ip_vs_lblcr_hashkey(en->addr);
-        write_lock(&tbl->lock);
-        list_add(&en->list, &tbl->bucket[hash]);
-        atomic_inc(&tbl->entries);
-        write_unlock(&tbl->lock);
-        return 1;
-}
-/*
- *  Get ip_vs_lblcr_entry associated with supplied parameters.
- */
-static inline struct ip_vs_lblcr_entry *
-ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
-{
-        unsigned hash;
-        struct ip_vs_lblcr_entry *en;
-        hash = ip_vs_lblcr_hashkey(addr);
-        read_lock(&tbl->lock);
-        list_for_each_entry(en, &tbl->bucket[hash], list) {
-                if (en->addr == addr) {
-                        /* HIT */
-                        read_unlock(&tbl->lock);
-                        return en;
-                }
-        }
-        read_unlock(&tbl->lock);
-        return NULL;
-}
-/*
- *      Flush all the entries of the specified table.
- */
-static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
-{
-        int i;
-        struct ip_vs_lblcr_entry *en, *nxt;
-        for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-                write_lock(&tbl->lock);
-                list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
-                        ip_vs_lblcr_free(en);
-                        atomic_dec(&tbl->entries);
-                }
-                write_unlock(&tbl->lock);
-        }
-}
-static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
-{
-        unsigned long now = jiffies;
-        int i, j;
-        struct ip_vs_lblcr_entry *en, *nxt;
-        for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-                j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
-                write_lock(&tbl->lock);
-                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-                        if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
-                                       now))
-                                continue;
-                        ip_vs_lblcr_free(en);
-                        atomic_dec(&tbl->entries);
-                }
-                write_unlock(&tbl->lock);
-        }
-        tbl->rover = j;
-}
-/*
- *      Periodical timer handler for IPVS lblcr table
- *      It is used to collect stale entries when the number of entries
- *      exceeds the maximum size of the table.
- *
- *      Fixme: we probably need more complicated algorithm to collect
- *             entries that have not been used for a long time even
- *             if the number of entries doesn't exceed the maximum size
- *             of the table.
- *      The full expiration check is for this purpose now.
- */
-static void ip_vs_lblcr_check_expire(unsigned long data)
-{
-        struct ip_vs_lblcr_table *tbl;
-        unsigned long now = jiffies;
-        int goal;
-        int i, j;
-        struct ip_vs_lblcr_entry *en, *nxt;
-        tbl = (struct ip_vs_lblcr_table *)data;
-        if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
-                /* do full expiration check */
-                ip_vs_lblcr_full_check(tbl);
-                tbl->counter = 1;
-                goto out;
-        }
-        if (atomic_read(&tbl->entries) <= tbl->max_size) {
-                tbl->counter++;
-                goto out;
-        }
-        goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
-        if (goal > tbl->max_size/2)
-                goal = tbl->max_size/2;
-        for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-                j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
-                write_lock(&tbl->lock);
-                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-                        if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
-                                continue;
-                        ip_vs_lblcr_free(en);
-                        atomic_dec(&tbl->entries);
-                        goal--;
-                }
-                write_unlock(&tbl->lock);
-                if (goal <= 0)
-                        break;
-        }
-        tbl->rover = j;
-  out:
-        mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
-}
-static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
-{
-        int i;
-        struct ip_vs_lblcr_table *tbl;
-        /*
-         *    Allocate the ip_vs_lblcr_table for this service
-         */
-        tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC);
-        if (tbl == NULL) {
-                IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
-                return -ENOMEM;
-        }
-        svc->sched_data = tbl;
-        IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
-                  "current service\n",
-                  sizeof(struct ip_vs_lblcr_table));
-        /*
-         *    Initialize the hash buckets
-         */
-        for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
-                INIT_LIST_HEAD(&tbl->bucket[i]);
-        }
-        rwlock_init(&tbl->lock);
-        tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
-        tbl->rover = 0;
-        tbl->counter = 1;
-        /*
-         *    Hook periodic timer for garbage collection
-         */
-        setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
-                        (unsigned long)tbl);
-        tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
-        add_timer(&tbl->periodic_timer);
-        return 0;
-}
-static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_lblcr_table *tbl = svc->sched_data;
-        /* remove periodic timer */
-        del_timer_sync(&tbl->periodic_timer);
-        /* got to clean up table entries here */
-        ip_vs_lblcr_flush(tbl);
-        /* release the table itself */
-        kfree(svc->sched_data);
-        IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
-                  sizeof(struct ip_vs_lblcr_table));
-        return 0;
-}
-static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static inline struct ip_vs_dest *
-__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
-{
-        struct ip_vs_dest *dest, *least;
-        int loh, doh;
-        /*
-         * We think the overhead of processing active connections is fifty
-         * times higher than that of inactive connections in average. (This
-         * fifty times might not be accurate, we will change it later.) We
-         * use the following formula to estimate the overhead:
-         *                dest->activeconns*50 + dest->inactconns
-         * and the load:
-         *                (dest overhead) / dest->weight
-         *
-         * Remember -- no floats in kernel mode!!!
-         * The comparison of h1*w2 > h2*w1 is equivalent to that of
-         *                h1/w1 > h2/w2
-         * if every weight is larger than zero.
-         *
-         * The server with weight=0 is quiesced and will not receive any
-         * new connection.
-         */
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-                        continue;
-                if (atomic_read(&dest->weight) > 0) {
-                        least = dest;
-                        loh = atomic_read(&least->activeconns) * 50
-                                + atomic_read(&least->inactconns);
-                        goto nextstage;
-                }
-        }
-        return NULL;
-        /*
-         *    Find the destination with the least load.
-         */
-  nextstage:
-        list_for_each_entry_continue(dest, &svc->destinations, n_list) {
-                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-                        continue;
-                doh = atomic_read(&dest->activeconns) * 50
-                        + atomic_read(&dest->inactconns);
-                if (loh * atomic_read(&dest->weight) >
-                    doh * atomic_read(&least->weight)) {
-                        least = dest;
-                        loh = doh;
-                }
-        }
-        IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
-                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                  NIPQUAD(least->addr), ntohs(least->port),
-                  atomic_read(&least->activeconns),
-                  atomic_read(&least->refcnt),
-                  atomic_read(&least->weight), loh);
-        return least;
-}
-/*
- *   If this destination server is overloaded and there is a less loaded
- *   server, then return true.
- */
-static inline int
-is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
-{
-        if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
-                struct ip_vs_dest *d;
-                list_for_each_entry(d, &svc->destinations, n_list) {
-                        if (atomic_read(&d->activeconns)*2
-                            < atomic_read(&d->weight)) {
-                                return 1;
-                        }
-                }
-        }
-        return 0;
-}
-/*
- *    Locality-Based (weighted) Least-Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest;
-        struct ip_vs_lblcr_table *tbl;
-        struct ip_vs_lblcr_entry *en;
-        struct iphdr *iph = ip_hdr(skb);
-        IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
-        tbl = (struct ip_vs_lblcr_table *)svc->sched_data;
-        en = ip_vs_lblcr_get(tbl, iph->daddr);
-        if (en == NULL) {
-                dest = __ip_vs_wlc_schedule(svc, iph);
-                if (dest == NULL) {
-                        IP_VS_DBG(1, "no destination available\n");
-                        return NULL;
-                }
-                en = ip_vs_lblcr_new(iph->daddr);
-                if (en == NULL) {
-                        return NULL;
-                }
-                ip_vs_dest_set_insert(&en->set, dest);
-                ip_vs_lblcr_hash(tbl, en);
-        } else {
-                dest = ip_vs_dest_set_min(&en->set);
-                if (!dest || is_overloaded(dest, svc)) {
-                        dest = __ip_vs_wlc_schedule(svc, iph);
-                        if (dest == NULL) {
-                                IP_VS_DBG(1, "no destination available\n");
-                                return NULL;
-                        }
-                        ip_vs_dest_set_insert(&en->set, dest);
-                }
-                if (atomic_read(&en->set.size) > 1 &&
-                    jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) {
-                        struct ip_vs_dest *m;
-                        m = ip_vs_dest_set_max(&en->set);
-                        if (m)
-                                ip_vs_dest_set_erase(&en->set, m);
-                }
-        }
-        en->lastuse = jiffies;
-        IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
-                  "--> server %u.%u.%u.%u:%d\n",
-                  NIPQUAD(en->addr),
-                  NIPQUAD(dest->addr),
-                  ntohs(dest->port));
-        return dest;
-}
-/*
- *      IPVS LBLCR Scheduler structure
- */
-static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
-{
-        .name =                 "lblcr",
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list =               LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
-        .init_service =         ip_vs_lblcr_init_svc,
-        .done_service =         ip_vs_lblcr_done_svc,
-        .update_service =       ip_vs_lblcr_update_svc,
-        .schedule =             ip_vs_lblcr_schedule,
-};
-static int __init ip_vs_lblcr_init(void)
-{
-        int ret;
-        sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
-        ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
-        if (ret)
-                unregister_sysctl_table(sysctl_header);
-        return ret;
-}
-static void __exit ip_vs_lblcr_cleanup(void)
-{
-        unregister_sysctl_table(sysctl_header);
-        unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
-}
-module_init(ip_vs_lblcr_init);
-module_exit(ip_vs_lblcr_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
deleted file mode 100644
index ebcdbf75ac65..000000000000
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * IPVS:        Least-Connection Scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Wensong Zhang            :     added the ip_vs_lc_update_svc
- *     Wensong Zhang            :     added any dest with weight=0 is quiesced
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <net/ip_vs.h>
-static int ip_vs_lc_init_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static int ip_vs_lc_done_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static int ip_vs_lc_update_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static inline unsigned int
-ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
-{
-        /*
-         * We think the overhead of processing active connections is 256
-         * times higher than that of inactive connections in average. (This
-         * 256 times might not be accurate, we will change it later) We
-         * use the following formula to estimate the overhead now:
-         *                dest->activeconns*256 + dest->inactconns
-         */
-        return (atomic_read(&dest->activeconns) << 8) +
-                atomic_read(&dest->inactconns);
-}
-/*
- *      Least Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest, *least = NULL;
-        unsigned int loh = 0, doh;
-        IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n");
-        /*
-         * Simply select the server with the least number of
-         *        (activeconns<<5) + inactconns
-         * Except whose weight is equal to zero.
-         * If the weight is equal to zero, it means that the server is
-         * quiesced, the existing connections to the server still get
-         * served, but no new connection is assigned to the server.
-         */
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
-                    atomic_read(&dest->weight) == 0)
-                        continue;
-                doh = ip_vs_lc_dest_overhead(dest);
-                if (!least || doh < loh) {
-                        least = dest;
-                        loh = doh;
-                }
-        }
-        if (least)
-        IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n",
-                  NIPQUAD(least->addr), ntohs(least->port),
-                  atomic_read(&least->activeconns),
-                  atomic_read(&least->inactconns));
-        return least;
-}
-static struct ip_vs_scheduler ip_vs_lc_scheduler = {
-        .name =                 "lc",
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list =               LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
-        .init_service =         ip_vs_lc_init_svc,
-        .done_service =         ip_vs_lc_done_svc,
-        .update_service =       ip_vs_lc_update_svc,
-        .schedule =             ip_vs_lc_schedule,
-};
-static int __init ip_vs_lc_init(void)
-{
-        return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
-}
-static void __exit ip_vs_lc_cleanup(void)
-{
-        unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
-}
-module_init(ip_vs_lc_init);
-module_exit(ip_vs_lc_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
deleted file mode 100644
index 92f3a6770031..000000000000
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * IPVS:        Never Queue scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-/*
- * The NQ algorithm adopts a two-speed model. When there is an idle server
- * available, the job will be sent to the idle server, instead of waiting
- * for a fast one. When there is no idle server available, the job will be
- * sent to the server that minimize its expected delay (The Shortest
- * Expected Delay scheduling algorithm).
- *
- * See the following paper for more information:
- * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
- * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
- * pages 986-994, 1988.
- *
- * Thanks must go to Marko Buuri <marko@buuri.name> for talking NQ to me.
- *
- * The difference between NQ and SED is that NQ can improve overall
- * system utilization.
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <net/ip_vs.h>
-static int
-ip_vs_nq_init_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static int
-ip_vs_nq_done_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static int
-ip_vs_nq_update_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static inline unsigned int
-ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
-{
-        /*
-         * We only use the active connection number in the cost
-         * calculation here.
-         */
-        return atomic_read(&dest->activeconns) + 1;
-}
-/*
- *      Weighted Least Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest, *least = NULL;
-        unsigned int loh = 0, doh;
-        IP_VS_DBG(6, "ip_vs_nq_schedule(): Scheduling...\n");
-        /*
-         * We calculate the load of each dest server as follows:
-         *      (server expected overhead) / dest->weight
-         *
-         * Remember -- no floats in kernel mode!!!
-         * The comparison of h1*w2 > h2*w1 is equivalent to that of
-         *                h1/w1 > h2/w2
-         * if every weight is larger than zero.
-         *
-         * The server with weight=0 is quiesced and will not receive any
-         * new connections.
-         */
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
-                    !atomic_read(&dest->weight))
-                        continue;
-                doh = ip_vs_nq_dest_overhead(dest);
-                /* return the server directly if it is idle */
-                if (atomic_read(&dest->activeconns) == 0) {
-                        least = dest;
-                        loh = doh;
-                        goto out;
-                }
-                if (!least ||
-                    (loh * atomic_read(&dest->weight) >
-                     doh * atomic_read(&least->weight))) {
-                        least = dest;
-                        loh = doh;
-                }
-        }
-        if (!least)
-                return NULL;
-  out:
-        IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u "
-                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                  NIPQUAD(least->addr), ntohs(least->port),
-                  atomic_read(&least->activeconns),
-                  atomic_read(&least->refcnt),
-                  atomic_read(&least->weight), loh);
-        return least;
-}
-static struct ip_vs_scheduler ip_vs_nq_scheduler =
-{
-        .name =                 "nq",
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list =               LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
-        .init_service =         ip_vs_nq_init_svc,
-        .done_service =         ip_vs_nq_done_svc,
-        .update_service =       ip_vs_nq_update_svc,
-        .schedule =             ip_vs_nq_schedule,
-};
-static int __init ip_vs_nq_init(void)
-{
-        return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
-}
-static void __exit ip_vs_nq_cleanup(void)
-{
-        unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
-}
-module_init(ip_vs_nq_init);
-module_exit(ip_vs_nq_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
deleted file mode 100644
index 6099a88fc200..000000000000
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * ip_vs_proto.c: transport protocol load balancing support for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/protocol.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-#include <asm/system.h>
-#include <linux/stat.h>
-#include <linux/proc_fs.h>
-#include <net/ip_vs.h>
-/*
- * IPVS protocols can only be registered/unregistered when the ipvs
- * module is loaded/unloaded, so no lock is needed in accessing the
- * ipvs protocol table.
- */
-#define IP_VS_PROTO_TAB_SIZE            32      /* must be power of 2 */
-#define IP_VS_PROTO_HASH(proto)         ((proto) & (IP_VS_PROTO_TAB_SIZE-1))
-static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
-/*
- *      register an ipvs protocol
- */
-static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
-{
-        unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
-        pp->next = ip_vs_proto_table[hash];
-        ip_vs_proto_table[hash] = pp;
-        if (pp->init != NULL)
-                pp->init(pp);
-        return 0;
-}
-/*
- *      unregister an ipvs protocol
- */
-static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
-{
-        struct ip_vs_protocol **pp_p;
-        unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
-        pp_p = &ip_vs_proto_table[hash];
-        for (; *pp_p; pp_p = &(*pp_p)->next) {
-                if (*pp_p == pp) {
-                        *pp_p = pp->next;
-                        if (pp->exit != NULL)
-                                pp->exit(pp);
-                        return 0;
-                }
-        }
-        return -ESRCH;
-}
-/*
- *      get ip_vs_protocol object by its proto.
- */
-struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
-{
-        struct ip_vs_protocol *pp;
-        unsigned hash = IP_VS_PROTO_HASH(proto);
-        for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) {
-                if (pp->protocol == proto)
-                        return pp;
-        }
-        return NULL;
-}
-/*
- *      Propagate event for state change to all protocols
- */
-void ip_vs_protocol_timeout_change(int flags)
-{
-        struct ip_vs_protocol *pp;
-        int i;
-        for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
-                for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
-                        if (pp->timeout_change)
-                                pp->timeout_change(pp, flags);
-                }
-        }
-}
-int *
-ip_vs_create_timeout_table(int *table, int size)
-{
-        return kmemdup(table, size, GFP_ATOMIC);
-}
-/*
- *      Set timeout value for state specified by name
- */
-int
-ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to)
-{
-        int i;
-        if (!table || !name || !to)
-                return -EINVAL;
-        for (i = 0; i < num; i++) {
-                if (strcmp(names[i], name))
-                        continue;
-                table[i] = to * HZ;
-                return 0;
-        }
-        return -ENOENT;
-}
-const char * ip_vs_state_name(__u16 proto, int state)
-{
-        struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
-        if (pp == NULL || pp->state_name == NULL)
-                return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
-        return pp->state_name(state);
-}
-void
-ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp,
-                          const struct sk_buff *skb,
-                          int offset,
-                          const char *msg)
-{
-        char buf[128];
-        struct iphdr _iph, *ih;
-        ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-        if (ih == NULL)
-                sprintf(buf, "%s TRUNCATED", pp->name);
-        else if (ih->frag_off & htons(IP_OFFSET))
-                sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
-                        pp->name, NIPQUAD(ih->saddr),
-                        NIPQUAD(ih->daddr));
-        else {
-                __be16 _ports[2], *pptr
-;
-                pptr = skb_header_pointer(skb, offset + ih->ihl*4,
-                                          sizeof(_ports), _ports);
-                if (pptr == NULL)
-                        sprintf(buf, "%s TRUNCATED %u.%u.%u.%u->%u.%u.%u.%u",
-                                pp->name,
-                                NIPQUAD(ih->saddr),
-                                NIPQUAD(ih->daddr));
-                else
-                        sprintf(buf, "%s %u.%u.%u.%u:%u->%u.%u.%u.%u:%u",
-                                pp->name,
-                                NIPQUAD(ih->saddr),
-                                ntohs(pptr[0]),
-                                NIPQUAD(ih->daddr),
-                                ntohs(pptr[1]));
-        }
-        printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-int __init ip_vs_protocol_init(void)
-{
-        char protocols[64];
-#define REGISTER_PROTOCOL(p)                    \
-        do {                                    \
-                register_ip_vs_protocol(p);     \
-                strcat(protocols, ", ");        \
-                strcat(protocols, (p)->name);   \
-        } while (0)
-        protocols[0] = '\0';
-        protocols[2] = '\0';
-#ifdef CONFIG_IP_VS_PROTO_TCP
-        REGISTER_PROTOCOL(&ip_vs_protocol_tcp);
-#endif
-#ifdef CONFIG_IP_VS_PROTO_UDP
-        REGISTER_PROTOCOL(&ip_vs_protocol_udp);
-#endif
-#ifdef CONFIG_IP_VS_PROTO_AH
-        REGISTER_PROTOCOL(&ip_vs_protocol_ah);
-#endif
-#ifdef CONFIG_IP_VS_PROTO_ESP
-        REGISTER_PROTOCOL(&ip_vs_protocol_esp);
-#endif
-        IP_VS_INFO("Registered protocols (%s)\n", &protocols[2]);
-        return 0;
-}
-void ip_vs_protocol_cleanup(void)
-{
-        struct ip_vs_protocol *pp;
-        int i;
-        /* unregister all the ipvs protocols */
-        for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
-                while ((pp = ip_vs_proto_table[i]) != NULL)
-                        unregister_ip_vs_protocol(pp);
-        }
-}
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
deleted file mode 100644
index 73e0ea87c1f5..000000000000
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * ip_vs_proto_ah.c:    AH IPSec load balancing support for IPVS
- *
- * Authors:     Julian Anastasov <ja@ssi.bg>, February 2002
- *              Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              version 2 as published by the Free Software Foundation;
- *
- */
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ip_vs.h>
-/* TODO:
-struct isakmp_hdr {
-        __u8            icookie[8];
-        __u8            rcookie[8];
-        __u8            np;
-        __u8            version;
-        __u8            xchgtype;
-        __u8            flags;
-        __u32           msgid;
-        __u32           length;
-};
-*/
-#define PORT_ISAKMP     500
-static struct ip_vs_conn *
-ah_conn_in_get(const struct sk_buff *skb,
-               struct ip_vs_protocol *pp,
-               const struct iphdr *iph,
-               unsigned int proto_off,
-               int inverse)
-{
-        struct ip_vs_conn *cp;
-        if (likely(!inverse)) {
-                cp = ip_vs_conn_in_get(IPPROTO_UDP,
-                                       iph->saddr,
-                                       htons(PORT_ISAKMP),
-                                       iph->daddr,
-                                       htons(PORT_ISAKMP));
-        } else {
-                cp = ip_vs_conn_in_get(IPPROTO_UDP,
-                                       iph->daddr,
-                                       htons(PORT_ISAKMP),
-                                       iph->saddr,
-                                       htons(PORT_ISAKMP));
-        }
-        if (!cp) {
-                /*
-                 * We are not sure if the packet is from our
-                 * service, so our conn_schedule hook should return NF_ACCEPT
-                 */
-                IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
-                          "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-                          inverse ? "ICMP+" : "",
-                          pp->name,
-                          NIPQUAD(iph->saddr),
-                          NIPQUAD(iph->daddr));
-        }
-        return cp;
-}
-static struct ip_vs_conn *
-ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-                const struct iphdr *iph, unsigned int proto_off, int inverse)
-{
-        struct ip_vs_conn *cp;
-        if (likely(!inverse)) {
-                cp = ip_vs_conn_out_get(IPPROTO_UDP,
-                                        iph->saddr,
-                                        htons(PORT_ISAKMP),
-                                        iph->daddr,
-                                        htons(PORT_ISAKMP));
-        } else {
-                cp = ip_vs_conn_out_get(IPPROTO_UDP,
-                                        iph->daddr,
-                                        htons(PORT_ISAKMP),
-                                        iph->saddr,
-                                        htons(PORT_ISAKMP));
-        }
-        if (!cp) {
-                IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
-                          "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-                          inverse ? "ICMP+" : "",
-                          pp->name,
-                          NIPQUAD(iph->saddr),
-                          NIPQUAD(iph->daddr));
-        }
-        return cp;
-}
-static int
-ah_conn_schedule(struct sk_buff *skb,
-                 struct ip_vs_protocol *pp,
-                 int *verdict, struct ip_vs_conn **cpp)
-{
-        /*
-         * AH is only related traffic. Pass the packet to IP stack.
-         */
-        *verdict = NF_ACCEPT;
-        return 0;
-}
-static void
-ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-                int offset, const char *msg)
-{
-        char buf[256];
-        struct iphdr _iph, *ih;
-        ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-        if (ih == NULL)
-                sprintf(buf, "%s TRUNCATED", pp->name);
-        else
-                sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
-                        pp->name, NIPQUAD(ih->saddr),
-                        NIPQUAD(ih->daddr));
-        printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-static void ah_init(struct ip_vs_protocol *pp)
-{
-        /* nothing to do now */
-}
-static void ah_exit(struct ip_vs_protocol *pp)
-{
-        /* nothing to do now */
-}
-struct ip_vs_protocol ip_vs_protocol_ah = {
-        .name =                 "AH",
-        .protocol =             IPPROTO_AH,
-        .num_states =           1,
-        .dont_defrag =          1,
-        .init =                 ah_init,
-        .exit =                 ah_exit,
-        .conn_schedule =        ah_conn_schedule,
-        .conn_in_get =          ah_conn_in_get,
-        .conn_out_get =         ah_conn_out_get,
-        .snat_handler =         NULL,
-        .dnat_handler =         NULL,
-        .csum_check =           NULL,
-        .state_transition =     NULL,
-        .register_app =         NULL,
-        .unregister_app =       NULL,
-        .app_conn_bind =        NULL,
-        .debug_packet =         ah_debug_packet,
-        .timeout_change =       NULL,           /* ISAKMP */
-        .set_state_timeout =    NULL,
-};
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
deleted file mode 100644
index 21d70c8ffa54..000000000000
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * ip_vs_proto_esp.c:   ESP IPSec load balancing support for IPVS
- *
- * Authors:     Julian Anastasov <ja@ssi.bg>, February 2002
- *              Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              version 2 as published by the Free Software Foundation;
- *
- */
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ip_vs.h>
-/* TODO:
-struct isakmp_hdr {
-        __u8            icookie[8];
-        __u8            rcookie[8];
-        __u8            np;
-        __u8            version;
-        __u8            xchgtype;
-        __u8            flags;
-        __u32           msgid;
-        __u32           length;
-};
-*/
-#define PORT_ISAKMP     500
-static struct ip_vs_conn *
-esp_conn_in_get(const struct sk_buff *skb,
-                struct ip_vs_protocol *pp,
-                const struct iphdr *iph,
-                unsigned int proto_off,
-                int inverse)
-{
-        struct ip_vs_conn *cp;
-        if (likely(!inverse)) {
-                cp = ip_vs_conn_in_get(IPPROTO_UDP,
-                                       iph->saddr,
-                                       htons(PORT_ISAKMP),
-                                       iph->daddr,
-                                       htons(PORT_ISAKMP));
-        } else {
-                cp = ip_vs_conn_in_get(IPPROTO_UDP,
-                                       iph->daddr,
-                                       htons(PORT_ISAKMP),
-                                       iph->saddr,
-                                       htons(PORT_ISAKMP));
-        }
-        if (!cp) {
-                /*
-                 * We are not sure if the packet is from our
-                 * service, so our conn_schedule hook should return NF_ACCEPT
-                 */
-                IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
-                          "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-                          inverse ? "ICMP+" : "",
-                          pp->name,
-                          NIPQUAD(iph->saddr),
-                          NIPQUAD(iph->daddr));
-        }
-        return cp;
-}
-static struct ip_vs_conn *
-esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-                 const struct iphdr *iph, unsigned int proto_off, int inverse)
-{
-        struct ip_vs_conn *cp;
-        if (likely(!inverse)) {
-                cp = ip_vs_conn_out_get(IPPROTO_UDP,
-                                        iph->saddr,
-                                        htons(PORT_ISAKMP),
-                                        iph->daddr,
-                                        htons(PORT_ISAKMP));
-        } else {
-                cp = ip_vs_conn_out_get(IPPROTO_UDP,
-                                        iph->daddr,
-                                        htons(PORT_ISAKMP),
-                                        iph->saddr,
-                                        htons(PORT_ISAKMP));
-        }
-        if (!cp) {
-                IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
-                          "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
-                          inverse ? "ICMP+" : "",
-                          pp->name,
-                          NIPQUAD(iph->saddr),
-                          NIPQUAD(iph->daddr));
-        }
-        return cp;
-}
-static int
-esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
-                  int *verdict, struct ip_vs_conn **cpp)
-{
-        /*
-         * ESP is only related traffic. Pass the packet to IP stack.
-         */
-        *verdict = NF_ACCEPT;
-        return 0;
-}
-static void
-esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
-                 int offset, const char *msg)
-{
-        char buf[256];
-        struct iphdr _iph, *ih;
-        ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
-        if (ih == NULL)
-                sprintf(buf, "%s TRUNCATED", pp->name);
-        else
-                sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
-                        pp->name, NIPQUAD(ih->saddr),
-                        NIPQUAD(ih->daddr));
-        printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-static void esp_init(struct ip_vs_protocol *pp)
-{
-        /* nothing to do now */
-}
-static void esp_exit(struct ip_vs_protocol *pp)
-{
-        /* nothing to do now */
-}
-struct ip_vs_protocol ip_vs_protocol_esp = {
-        .name =                 "ESP",
-        .protocol =             IPPROTO_ESP,
-        .num_states =           1,
-        .dont_defrag =          1,
-        .init =                 esp_init,
-        .exit =                 esp_exit,
-        .conn_schedule =        esp_conn_schedule,
-        .conn_in_get =          esp_conn_in_get,
-        .conn_out_get =         esp_conn_out_get,
-        .snat_handler =         NULL,
-        .dnat_handler =         NULL,
-        .csum_check =           NULL,
-        .state_transition =     NULL,
-        .register_app =         NULL,
-        .unregister_app =       NULL,
-        .app_conn_bind =        NULL,
-        .debug_packet =         esp_debug_packet,
-        .timeout_change =       NULL,           /* ISAKMP */
-};
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
deleted file mode 100644
index d0ea467986a0..000000000000
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- * ip_vs_proto_tcp.c:   TCP load balancing support for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-#include <linux/kernel.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>                  /* for tcphdr */
-#include <net/ip.h>
-#include <net/tcp.h>                    /* for csum_tcpudp_magic */
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ip_vs.h>
-static struct ip_vs_conn *
-tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-                const struct iphdr *iph, unsigned int proto_off, int inverse)
-{
-        __be16 _ports[2], *pptr;
-        pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-        if (pptr == NULL)
-                return NULL;
-        if (likely(!inverse)) {
-                return ip_vs_conn_in_get(iph->protocol,
-                                         iph->saddr, pptr[0],
-                                         iph->daddr, pptr[1]);
-        } else {
-                return ip_vs_conn_in_get(iph->protocol,
-                                         iph->daddr, pptr[1],
-                                         iph->saddr, pptr[0]);
-        }
-}
-static struct ip_vs_conn *
-tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-                 const struct iphdr *iph, unsigned int proto_off, int inverse)
-{
-        __be16 _ports[2], *pptr;
-        pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-        if (pptr == NULL)
-                return NULL;
-        if (likely(!inverse)) {
-                return ip_vs_conn_out_get(iph->protocol,
-                                          iph->saddr, pptr[0],
-                                          iph->daddr, pptr[1]);
-        } else {
-                return ip_vs_conn_out_get(iph->protocol,
-                                          iph->daddr, pptr[1],
-                                          iph->saddr, pptr[0]);
-        }
-}
-static int
-tcp_conn_schedule(struct sk_buff *skb,
-                  struct ip_vs_protocol *pp,
-                  int *verdict, struct ip_vs_conn **cpp)
-{
-        struct ip_vs_service *svc;
-        struct tcphdr _tcph, *th;
-        th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
-        if (th == NULL) {
-                *verdict = NF_DROP;
-                return 0;
-        }
-        if (th->syn &&
-            (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
-                                     ip_hdr(skb)->daddr, th->dest))) {
-                if (ip_vs_todrop()) {
-                        /*
-                         * It seems that we are very loaded.
-                         * We have to drop this packet :(
-                         */
-                        ip_vs_service_put(svc);
-                        *verdict = NF_DROP;
-                        return 0;
-                }
-                /*
-                 * Let the virtual server select a real server for the
-                 * incoming connection, and create a connection entry.
-                 */
-                *cpp = ip_vs_schedule(svc, skb);
-                if (!*cpp) {
-                        *verdict = ip_vs_leave(svc, skb, pp);
-                        return 0;
-                }
-                ip_vs_service_put(svc);
-        }
-        return 1;
-}
-static inline void
-tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
-                     __be16 oldport, __be16 newport)
-{
-        tcph->check =
-                csum_fold(ip_vs_check_diff4(oldip, newip,
-                                 ip_vs_check_diff2(oldport, newport,
-                                                ~csum_unfold(tcph->check))));
-}
-static int
-tcp_snat_handler(struct sk_buff *skb,
-                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
-{
-        struct tcphdr *tcph;
-        const unsigned int tcphoff = ip_hdrlen(skb);
-        /* csum_check requires unshared skb */
-        if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
-                return 0;
-        if (unlikely(cp->app != NULL)) {
-                /* Some checks before mangling */
-                if (pp->csum_check && !pp->csum_check(skb, pp))
-                        return 0;
-                /* Call application helper if needed */
-                if (!ip_vs_app_pkt_out(cp, skb))
-                        return 0;
-        }
-        tcph = (void *)ip_hdr(skb) + tcphoff;
-        tcph->source = cp->vport;
-        /* Adjust TCP checksums */
-        if (!cp->app) {
-                /* Only port and addr are changed, do fast csum update */
-                tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
-                                     cp->dport, cp->vport);
-                if (skb->ip_summed == CHECKSUM_COMPLETE)
-                        skb->ip_summed = CHECKSUM_NONE;
-        } else {
-                /* full checksum calculation */
-                tcph->check = 0;
-                skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-                tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
-                                                skb->len - tcphoff,
-                                                cp->protocol, skb->csum);
-                IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
-                          pp->name, tcph->check,
-                          (char*)&(tcph->check) - (char*)tcph);
-        }
-        return 1;
-}
-static int
-tcp_dnat_handler(struct sk_buff *skb,
-                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
-{
-        struct tcphdr *tcph;
-        const unsigned int tcphoff = ip_hdrlen(skb);
-        /* csum_check requires unshared skb */
-        if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
-                return 0;
-        if (unlikely(cp->app != NULL)) {
-                /* Some checks before mangling */
-                if (pp->csum_check && !pp->csum_check(skb, pp))
-                        return 0;
-                /*
-                 *      Attempt ip_vs_app call.
-                 *      It will fix ip_vs_conn and iph ack_seq stuff
-                 */
-                if (!ip_vs_app_pkt_in(cp, skb))
-                        return 0;
-        }
-        tcph = (void *)ip_hdr(skb) + tcphoff;
-        tcph->dest = cp->dport;
-        /*
-         *      Adjust TCP checksums
-         */
-        if (!cp->app) {
-                /* Only port and addr are changed, do fast csum update */
-                tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
-                                     cp->vport, cp->dport);
-                if (skb->ip_summed == CHECKSUM_COMPLETE)
-                        skb->ip_summed = CHECKSUM_NONE;
-        } else {
-                /* full checksum calculation */
-                tcph->check = 0;
-                skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-                tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
-                                                skb->len - tcphoff,
-                                                cp->protocol, skb->csum);
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
-        }
-        return 1;
-}
-static int
-tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
-{
-        const unsigned int tcphoff = ip_hdrlen(skb);
-        switch (skb->ip_summed) {
-        case CHECKSUM_NONE:
-                skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-        case CHECKSUM_COMPLETE:
-                if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
-                                      skb->len - tcphoff,
-                                      ip_hdr(skb)->protocol, skb->csum)) {
-                        IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-                                         "Failed checksum for");
-                        return 0;
-                }
-                break;
-        default:
-                /* No need to checksum. */
-                break;
-        }
-        return 1;
-}
-#define TCP_DIR_INPUT           0
-#define TCP_DIR_OUTPUT          4
-#define TCP_DIR_INPUT_ONLY      8
-static const int tcp_state_off[IP_VS_DIR_LAST] = {
-        [IP_VS_DIR_INPUT]               =       TCP_DIR_INPUT,
-        [IP_VS_DIR_OUTPUT]              =       TCP_DIR_OUTPUT,
-        [IP_VS_DIR_INPUT_ONLY]          =       TCP_DIR_INPUT_ONLY,
-};
-/*
- *      Timeout table[state]
- */
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
-        [IP_VS_TCP_S_NONE]              =       2*HZ,
-        [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
-        [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
-        [IP_VS_TCP_S_SYN_RECV]          =       1*60*HZ,
-        [IP_VS_TCP_S_FIN_WAIT]          =       2*60*HZ,
-        [IP_VS_TCP_S_TIME_WAIT]         =       2*60*HZ,
-        [IP_VS_TCP_S_CLOSE]             =       10*HZ,
-        [IP_VS_TCP_S_CLOSE_WAIT]        =       60*HZ,
-        [IP_VS_TCP_S_LAST_ACK]          =       30*HZ,
-        [IP_VS_TCP_S_LISTEN]            =       2*60*HZ,
-        [IP_VS_TCP_S_SYNACK]            =       120*HZ,
-        [IP_VS_TCP_S_LAST]              =       2*HZ,
-};
-static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
-        [IP_VS_TCP_S_NONE]              =       "NONE",
-        [IP_VS_TCP_S_ESTABLISHED]       =       "ESTABLISHED",
-        [IP_VS_TCP_S_SYN_SENT]          =       "SYN_SENT",
-        [IP_VS_TCP_S_SYN_RECV]          =       "SYN_RECV",
-        [IP_VS_TCP_S_FIN_WAIT]          =       "FIN_WAIT",
-        [IP_VS_TCP_S_TIME_WAIT]         =       "TIME_WAIT",
-        [IP_VS_TCP_S_CLOSE]             =       "CLOSE",
-        [IP_VS_TCP_S_CLOSE_WAIT]        =       "CLOSE_WAIT",
-        [IP_VS_TCP_S_LAST_ACK]          =       "LAST_ACK",
-        [IP_VS_TCP_S_LISTEN]            =       "LISTEN",
-        [IP_VS_TCP_S_SYNACK]            =       "SYNACK",
-        [IP_VS_TCP_S_LAST]              =       "BUG!",
-};
-#define sNO IP_VS_TCP_S_NONE
-#define sES IP_VS_TCP_S_ESTABLISHED
-#define sSS IP_VS_TCP_S_SYN_SENT
-#define sSR IP_VS_TCP_S_SYN_RECV
-#define sFW IP_VS_TCP_S_FIN_WAIT
-#define sTW IP_VS_TCP_S_TIME_WAIT
-#define sCL IP_VS_TCP_S_CLOSE
-#define sCW IP_VS_TCP_S_CLOSE_WAIT
-#define sLA IP_VS_TCP_S_LAST_ACK
-#define sLI IP_VS_TCP_S_LISTEN
-#define sSA IP_VS_TCP_S_SYNACK
-struct tcp_states_t {
-        int next_state[IP_VS_TCP_S_LAST];
-};
-static const char * tcp_state_name(int state)
-{
-        if (state >= IP_VS_TCP_S_LAST)
-                return "ERR!";
-        return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
-}
-static struct tcp_states_t tcp_states [] = {
-/*      INPUT */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
-/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
-/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
-/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
-/*      OUTPUT */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
-/*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
-/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
-/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
-/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
-/*      INPUT-ONLY */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
-/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
-/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
-/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
-};
-static struct tcp_states_t tcp_states_dos [] = {
-/*      INPUT */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
-/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
-/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
-/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
-/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
-/*      OUTPUT */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
-/*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
-/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
-/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
-/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
-/*      INPUT-ONLY */
-/*        sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
-/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
-/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
-/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
-/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
-};
-static struct tcp_states_t *tcp_state_table = tcp_states;
-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
-{
-        int on = (flags & 1);           /* secure_tcp */
-        /*
-        ** FIXME: change secure_tcp to independent sysctl var
-        ** or make it per-service or per-app because it is valid
-        ** for most if not for all of the applications. Something
-        ** like "capabilities" (flags) for each object.
-        */
-        tcp_state_table = (on? tcp_states_dos : tcp_states);
-}
-static int
-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-        return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
-                                       tcp_state_name_table, sname, to);
-}
-static inline int tcp_state_idx(struct tcphdr *th)
-{
-        if (th->rst)
-                return 3;
-        if (th->syn)
-                return 0;
-        if (th->fin)
-                return 1;
-        if (th->ack)
-                return 2;
-        return -1;
-}
-static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
-              int direction, struct tcphdr *th)
-{
-        int state_idx;
-        int new_state = IP_VS_TCP_S_CLOSE;
-        int state_off = tcp_state_off[direction];
-        /*
-         *    Update state offset to INPUT_ONLY if necessary
-         *    or delete NO_OUTPUT flag if output packet detected
-         */
-        if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
-                if (state_off == TCP_DIR_OUTPUT)
-                        cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
-                else
-                        state_off = TCP_DIR_INPUT_ONLY;
-        }
-        if ((state_idx = tcp_state_idx(th)) < 0) {
-                IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
-                goto tcp_state_out;
-        }
-        new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
-  tcp_state_out:
-        if (new_state != cp->state) {
-                struct ip_vs_dest *dest = cp->dest;
-                IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
-                          "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
-                          pp->name,
-                          (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
-                          th->syn? 'S' : '.',
-                          th->fin? 'F' : '.',
-                          th->ack? 'A' : '.',
-                          th->rst? 'R' : '.',
-                          NIPQUAD(cp->daddr), ntohs(cp->dport),
-                          NIPQUAD(cp->caddr), ntohs(cp->cport),
-                          tcp_state_name(cp->state),
-                          tcp_state_name(new_state),
-                          atomic_read(&cp->refcnt));
-                if (dest) {
-                        if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                            (new_state != IP_VS_TCP_S_ESTABLISHED)) {
-                                atomic_dec(&dest->activeconns);
-                                atomic_inc(&dest->inactconns);
-                                cp->flags |= IP_VS_CONN_F_INACTIVE;
-                        } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                                   (new_state == IP_VS_TCP_S_ESTABLISHED)) {
-                                atomic_inc(&dest->activeconns);
-                                atomic_dec(&dest->inactconns);
-                                cp->flags &= ~IP_VS_CONN_F_INACTIVE;
-                        }
-                }
-        }
-        cp->timeout = pp->timeout_table[cp->state = new_state];
-}
-/*
- *      Handle state transitions
- */
-static int
-tcp_state_transition(struct ip_vs_conn *cp, int direction,
-                     const struct sk_buff *skb,
-                     struct ip_vs_protocol *pp)
-{
-        struct tcphdr _tcph, *th;
-        th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
-        if (th == NULL)
-                return 0;
-        spin_lock(&cp->lock);
-        set_tcp_state(pp, cp, direction, th);
-        spin_unlock(&cp->lock);
-        return 1;
-}
-/*
- *      Hash table for TCP application incarnations
- */
-#define TCP_APP_TAB_BITS        4
-#define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
-#define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
-static inline __u16 tcp_app_hashkey(__be16 port)
-{
-        return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
-                & TCP_APP_TAB_MASK;
-}
-static int tcp_register_app(struct ip_vs_app *inc)
-{
-        struct ip_vs_app *i;
-        __u16 hash;
-        __be16 port = inc->port;
-        int ret = 0;
-        hash = tcp_app_hashkey(port);
-        spin_lock_bh(&tcp_app_lock);
-        list_for_each_entry(i, &tcp_apps[hash], p_list) {
-                if (i->port == port) {
-                        ret = -EEXIST;
-                        goto out;
-                }
-        }
-        list_add(&inc->p_list, &tcp_apps[hash]);
-        atomic_inc(&ip_vs_protocol_tcp.appcnt);
-  out:
-        spin_unlock_bh(&tcp_app_lock);
-        return ret;
-}
-static void
-tcp_unregister_app(struct ip_vs_app *inc)
-{
-        spin_lock_bh(&tcp_app_lock);
-        atomic_dec(&ip_vs_protocol_tcp.appcnt);
-        list_del(&inc->p_list);
-        spin_unlock_bh(&tcp_app_lock);
-}
-static int
-tcp_app_conn_bind(struct ip_vs_conn *cp)
-{
-        int hash;
-        struct ip_vs_app *inc;
-        int result = 0;
-        /* Default binding: bind app only for NAT */
-        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
-                return 0;
-        /* Lookup application incarnations and bind the right one */
-        hash = tcp_app_hashkey(cp->vport);
-        spin_lock(&tcp_app_lock);
-        list_for_each_entry(inc, &tcp_apps[hash], p_list) {
-                if (inc->port == cp->vport) {
-                        if (unlikely(!ip_vs_app_inc_get(inc)))
-                                break;
-                        spin_unlock(&tcp_app_lock);
-                        IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
-                                  "%u.%u.%u.%u:%u to app %s on port %u\n",
-                                  __func__,
-                                  NIPQUAD(cp->caddr), ntohs(cp->cport),
-                                  NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                                  inc->name, ntohs(inc->port));
-                        cp->app = inc;
-                        if (inc->init_conn)
-                                result = inc->init_conn(inc, cp);
-                        goto out;
-                }
-        }
-        spin_unlock(&tcp_app_lock);
-  out:
-        return result;
-}
-/*
- *      Set LISTEN timeout. (ip_vs_conn_put will setup timer)
- */
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
-{
-        spin_lock(&cp->lock);
-        cp->state = IP_VS_TCP_S_LISTEN;
-        cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
-        spin_unlock(&cp->lock);
-}
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
-{
-        IP_VS_INIT_HASH_TABLE(tcp_apps);
-        pp->timeout_table = tcp_timeouts;
-}
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
-{
-}
-struct ip_vs_protocol ip_vs_protocol_tcp = {
-        .name =                 "TCP",
-        .protocol =             IPPROTO_TCP,
-        .num_states =           IP_VS_TCP_S_LAST,
-        .dont_defrag =          0,
-        .appcnt =               ATOMIC_INIT(0),
-        .init =                 ip_vs_tcp_init,
-        .exit =                 ip_vs_tcp_exit,
-        .register_app =         tcp_register_app,
-        .unregister_app =       tcp_unregister_app,
-        .conn_schedule =        tcp_conn_schedule,
-        .conn_in_get =          tcp_conn_in_get,
-        .conn_out_get =         tcp_conn_out_get,
-        .snat_handler =         tcp_snat_handler,
-        .dnat_handler =         tcp_dnat_handler,
-        .csum_check =           tcp_csum_check,
-        .state_name =           tcp_state_name,
-        .state_transition =     tcp_state_transition,
-        .app_conn_bind =        tcp_app_conn_bind,
-        .debug_packet =         ip_vs_tcpudp_debug_packet,
-        .timeout_change =       tcp_timeout_change,
-        .set_state_timeout =    tcp_set_state_timeout,
-};
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
deleted file mode 100644
index c6be5d56823f..000000000000
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/udp.h>
-#include <net/ip_vs.h>
-#include <net/ip.h>
-static struct ip_vs_conn *
-udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-                const struct iphdr *iph, unsigned int proto_off, int inverse)
-{
-        struct ip_vs_conn *cp;
-        __be16 _ports[2], *pptr;
-        pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
-        if (pptr == NULL)
-                return NULL;
-        if (likely(!inverse)) {
-                cp = ip_vs_conn_in_get(iph->protocol,
-                                       iph->saddr, pptr[0],
-                                       iph->daddr, pptr[1]);
-        } else {
-                cp = ip_vs_conn_in_get(iph->protocol,
-                                       iph->daddr, pptr[1],
-                                       iph->saddr, pptr[0]);
-        }
-        return cp;
-}
-static struct ip_vs_conn *
-udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
-                 const struct iphdr *iph, unsigned int proto_off, int inverse)
-{
-        struct ip_vs_conn *cp;
-        __be16 _ports[2], *pptr;
-        pptr = skb_header_pointer(skb, ip_hdrlen(skb),
-                                  sizeof(_ports), _ports);
-        if (pptr == NULL)
-                return NULL;
-        if (likely(!inverse)) {
-                cp = ip_vs_conn_out_get(iph->protocol,
-                                        iph->saddr, pptr[0],
-                                        iph->daddr, pptr[1]);
-        } else {
-                cp = ip_vs_conn_out_get(iph->protocol,
-                                        iph->daddr, pptr[1],
-                                        iph->saddr, pptr[0]);
-        }
-        return cp;
-}
-static int
-udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
-                  int *verdict, struct ip_vs_conn **cpp)
-{
-        struct ip_vs_service *svc;
-        struct udphdr _udph, *uh;
-        uh = skb_header_pointer(skb, ip_hdrlen(skb),
-                                sizeof(_udph), &_udph);
-        if (uh == NULL) {
-                *verdict = NF_DROP;
-                return 0;
-        }
-        if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
-                                     ip_hdr(skb)->daddr, uh->dest))) {
-                if (ip_vs_todrop()) {
-                        /*
-                         * It seems that we are very loaded.
-                         * We have to drop this packet :(
-                         */
-                        ip_vs_service_put(svc);
-                        *verdict = NF_DROP;
-                        return 0;
-                }
-                /*
-                 * Let the virtual server select a real server for the
-                 * incoming connection, and create a connection entry.
-                 */
-                *cpp = ip_vs_schedule(svc, skb);
-                if (!*cpp) {
-                        *verdict = ip_vs_leave(svc, skb, pp);
-                        return 0;
-                }
-                ip_vs_service_put(svc);
-        }
-        return 1;
-}
-static inline void
-udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
-                     __be16 oldport, __be16 newport)
-{
-        uhdr->check =
-                csum_fold(ip_vs_check_diff4(oldip, newip,
-                                 ip_vs_check_diff2(oldport, newport,
-                                        ~csum_unfold(uhdr->check))));
-        if (!uhdr->check)
-                uhdr->check = CSUM_MANGLED_0;
-}
-static int
-udp_snat_handler(struct sk_buff *skb,
-                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
-{
-        struct udphdr *udph;
-        const unsigned int udphoff = ip_hdrlen(skb);
-        /* csum_check requires unshared skb */
-        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
-                return 0;
-        if (unlikely(cp->app != NULL)) {
-                /* Some checks before mangling */
-                if (pp->csum_check && !pp->csum_check(skb, pp))
-                        return 0;
-                /*
-                 *      Call application helper if needed
-                 */
-                if (!ip_vs_app_pkt_out(cp, skb))
-                        return 0;
-        }
-        udph = (void *)ip_hdr(skb) + udphoff;
-        udph->source = cp->vport;
-        /*
-         *      Adjust UDP checksums
-         */
-        if (!cp->app && (udph->check != 0)) {
-                /* Only port and addr are changed, do fast csum update */
-                udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
-                                     cp->dport, cp->vport);
-                if (skb->ip_summed == CHECKSUM_COMPLETE)
-                        skb->ip_summed = CHECKSUM_NONE;
-        } else {
-                /* full checksum calculation */
-                udph->check = 0;
-                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-                udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
-                                                skb->len - udphoff,
-                                                cp->protocol, skb->csum);
-                if (udph->check == 0)
-                        udph->check = CSUM_MANGLED_0;
-                IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
-                          pp->name, udph->check,
-                          (char*)&(udph->check) - (char*)udph);
-        }
-        return 1;
-}
-static int
-udp_dnat_handler(struct sk_buff *skb,
-                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
-{
-        struct udphdr *udph;
-        unsigned int udphoff = ip_hdrlen(skb);
-        /* csum_check requires unshared skb */
-        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
-                return 0;
-        if (unlikely(cp->app != NULL)) {
-                /* Some checks before mangling */
-                if (pp->csum_check && !pp->csum_check(skb, pp))
-                        return 0;
-                /*
-                 *      Attempt ip_vs_app call.
-                 *      It will fix ip_vs_conn
-                 */
-                if (!ip_vs_app_pkt_in(cp, skb))
-                        return 0;
-        }
-        udph = (void *)ip_hdr(skb) + udphoff;
-        udph->dest = cp->dport;
-        /*
-         *      Adjust UDP checksums
-         */
-        if (!cp->app && (udph->check != 0)) {
-                /* Only port and addr are changed, do fast csum update */
-                udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
-                                     cp->vport, cp->dport);
-                if (skb->ip_summed == CHECKSUM_COMPLETE)
-                        skb->ip_summed = CHECKSUM_NONE;
-        } else {
-                /* full checksum calculation */
-                udph->check = 0;
-                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
-                udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
-                                                skb->len - udphoff,
-                                                cp->protocol, skb->csum);
-                if (udph->check == 0)
-                        udph->check = CSUM_MANGLED_0;
-                skb->ip_summed = CHECKSUM_UNNECESSARY;
-        }
-        return 1;
-}
-static int
-udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
-{
-        struct udphdr _udph, *uh;
-        const unsigned int udphoff = ip_hdrlen(skb);
-        uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
-        if (uh == NULL)
-                return 0;
-        if (uh->check != 0) {
-                switch (skb->ip_summed) {
-                case CHECKSUM_NONE:
-                        skb->csum = skb_checksum(skb, udphoff,
-                                                 skb->len - udphoff, 0);
-                case CHECKSUM_COMPLETE:
-                        if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
-                                              ip_hdr(skb)->daddr,
-                                              skb->len - udphoff,
-                                              ip_hdr(skb)->protocol,
-                                              skb->csum)) {
-                                IP_VS_DBG_RL_PKT(0, pp, skb, 0,
-                                                 "Failed checksum for");
-                                return 0;
-                        }
-                        break;
-                default:
-                        /* No need to checksum. */
-                        break;
-                }
-        }
-        return 1;
-}
-/*
- *      Note: the caller guarantees that only one of register_app,
- *      unregister_app or app_conn_bind is called each time.
- */
-#define UDP_APP_TAB_BITS        4
-#define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
-#define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-static inline __u16 udp_app_hashkey(__be16 port)
-{
-        return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
-                & UDP_APP_TAB_MASK;
-}
-static int udp_register_app(struct ip_vs_app *inc)
-{
-        struct ip_vs_app *i;
-        __u16 hash;
-        __be16 port = inc->port;
-        int ret = 0;
-        hash = udp_app_hashkey(port);
-        spin_lock_bh(&udp_app_lock);
-        list_for_each_entry(i, &udp_apps[hash], p_list) {
-                if (i->port == port) {
-                        ret = -EEXIST;
-                        goto out;
-                }
-        }
-        list_add(&inc->p_list, &udp_apps[hash]);
-        atomic_inc(&ip_vs_protocol_udp.appcnt);
-  out:
-        spin_unlock_bh(&udp_app_lock);
-        return ret;
-}
-static void
-udp_unregister_app(struct ip_vs_app *inc)
-{
-        spin_lock_bh(&udp_app_lock);
-        atomic_dec(&ip_vs_protocol_udp.appcnt);
-        list_del(&inc->p_list);
-        spin_unlock_bh(&udp_app_lock);
-}
-static int udp_app_conn_bind(struct ip_vs_conn *cp)
-{
-        int hash;
-        struct ip_vs_app *inc;
-        int result = 0;
-        /* Default binding: bind app only for NAT */
-        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
-                return 0;
-        /* Lookup application incarnations and bind the right one */
-        hash = udp_app_hashkey(cp->vport);
-        spin_lock(&udp_app_lock);
-        list_for_each_entry(inc, &udp_apps[hash], p_list) {
-                if (inc->port == cp->vport) {
-                        if (unlikely(!ip_vs_app_inc_get(inc)))
-                                break;
-                        spin_unlock(&udp_app_lock);
-                        IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
-                                  "%u.%u.%u.%u:%u to app %s on port %u\n",
-                                  __func__,
-                                  NIPQUAD(cp->caddr), ntohs(cp->cport),
-                                  NIPQUAD(cp->vaddr), ntohs(cp->vport),
-                                  inc->name, ntohs(inc->port));
-                        cp->app = inc;
-                        if (inc->init_conn)
-                                result = inc->init_conn(inc, cp);
-                        goto out;
-                }
-        }
-        spin_unlock(&udp_app_lock);
-  out:
-        return result;
-}
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
-        [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
-        [IP_VS_UDP_S_LAST]              =       2*HZ,
-};
-static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
-        [IP_VS_UDP_S_NORMAL]            =       "UDP",
-        [IP_VS_UDP_S_LAST]              =       "BUG!",
-};
-static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-        return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
-                                       udp_state_name_table, sname, to);
-}
-static const char * udp_state_name(int state)
-{
-        if (state >= IP_VS_UDP_S_LAST)
-                return "ERR!";
-        return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
-}
-static int
-udp_state_transition(struct ip_vs_conn *cp, int direction,
-                     const struct sk_buff *skb,
-                     struct ip_vs_protocol *pp)
-{
-        cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
-        return 1;
-}
-static void udp_init(struct ip_vs_protocol *pp)
-{
-        IP_VS_INIT_HASH_TABLE(udp_apps);
-        pp->timeout_table = udp_timeouts;
-}
-static void udp_exit(struct ip_vs_protocol *pp)
-{
-}
-struct ip_vs_protocol ip_vs_protocol_udp = {
-        .name =                 "UDP",
-        .protocol =             IPPROTO_UDP,
-        .num_states =           IP_VS_UDP_S_LAST,
-        .dont_defrag =          0,
-        .init =                 udp_init,
-        .exit =                 udp_exit,
-        .conn_schedule =        udp_conn_schedule,
-        .conn_in_get =          udp_conn_in_get,
-        .conn_out_get =         udp_conn_out_get,
-        .snat_handler =         udp_snat_handler,
-        .dnat_handler =         udp_dnat_handler,
-        .csum_check =           udp_csum_check,
-        .state_transition =     udp_state_transition,
-        .state_name =           udp_state_name,
-        .register_app =         udp_register_app,
-        .unregister_app =       udp_unregister_app,
-        .app_conn_bind =        udp_app_conn_bind,
-        .debug_packet =         ip_vs_tcpudp_debug_packet,
-        .timeout_change =       NULL,
-        .set_state_timeout =    udp_set_state_timeout,
-};
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
deleted file mode 100644
index 358110d17e59..000000000000
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * IPVS:        Round-Robin Scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Fixes/Changes:
- *     Wensong Zhang            :     changed the ip_vs_rr_schedule to return dest
- *     Julian Anastasov         :     fixed the NULL pointer access bug in debugging
- *     Wensong Zhang            :     changed some comestics things for debugging
- *     Wensong Zhang            :     changed for the d-linked destination list
- *     Wensong Zhang            :     added the ip_vs_rr_update_svc
- *     Wensong Zhang            :     added any dest with weight=0 is quiesced
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <net/ip_vs.h>
-static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
-{
-        svc->sched_data = &svc->destinations;
-        return 0;
-}
-static int ip_vs_rr_done_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
-{
-        svc->sched_data = &svc->destinations;
-        return 0;
-}
-/*
- * Round-Robin Scheduling
- */
-static struct ip_vs_dest *
-ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct list_head *p, *q;
-        struct ip_vs_dest *dest;
-        IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n");
-        write_lock(&svc->sched_lock);
-        p = (struct list_head *)svc->sched_data;
-        p = p->next;
-        q = p;
-        do {
-                /* skip list head */
-                if (q == &svc->destinations) {
-                        q = q->next;
-                        continue;
-                }
-                dest = list_entry(q, struct ip_vs_dest, n_list);
-                if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-                    atomic_read(&dest->weight) > 0)
-                        /* HIT */
-                        goto out;
-                q = q->next;
-        } while (q != p);
-        write_unlock(&svc->sched_lock);
-        return NULL;
-  out:
-        svc->sched_data = q;
-        write_unlock(&svc->sched_lock);
-        IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u "
-                  "activeconns %d refcnt %d weight %d\n",
-                  NIPQUAD(dest->addr), ntohs(dest->port),
-                  atomic_read(&dest->activeconns),
-                  atomic_read(&dest->refcnt), atomic_read(&dest->weight));
-        return dest;
-}
-static struct ip_vs_scheduler ip_vs_rr_scheduler = {
-        .name =                 "rr",                   /* name */
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list =               LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
-        .init_service =         ip_vs_rr_init_svc,
-        .done_service =         ip_vs_rr_done_svc,
-        .update_service =       ip_vs_rr_update_svc,
-        .schedule =             ip_vs_rr_schedule,
-};
-static int __init ip_vs_rr_init(void)
-{
-        return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
-}
-static void __exit ip_vs_rr_cleanup(void)
-{
-        unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
-}
-module_init(ip_vs_rr_init);
-module_exit(ip_vs_rr_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
deleted file mode 100644
index a46ad9e35016..000000000000
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the Netfilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <asm/string.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <net/ip_vs.h>
-/*
- *  IPVS scheduler list
- */
-static LIST_HEAD(ip_vs_schedulers);
-/* lock for service table */
-static DEFINE_RWLOCK(__ip_vs_sched_lock);
-/*
- *  Bind a service with a scheduler
- */
-int ip_vs_bind_scheduler(struct ip_vs_service *svc,
-                         struct ip_vs_scheduler *scheduler)
-{
-        int ret;
-        if (svc == NULL) {
-                IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
-                return -EINVAL;
-        }
-        if (scheduler == NULL) {
-                IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
-                return -EINVAL;
-        }
-        svc->scheduler = scheduler;
-        if (scheduler->init_service) {
-                ret = scheduler->init_service(svc);
-                if (ret) {
-                        IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
-                        return ret;
-                }
-        }
-        return 0;
-}
-/*
- *  Unbind a service with its scheduler
- */
-int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
-{
-        struct ip_vs_scheduler *sched;
-        if (svc == NULL) {
-                IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
-                return -EINVAL;
-        }
-        sched = svc->scheduler;
-        if (sched == NULL) {
-                IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
-                return -EINVAL;
-        }
-        if (sched->done_service) {
-                if (sched->done_service(svc) != 0) {
-                        IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
-                        return -EINVAL;
-                }
-        }
-        svc->scheduler = NULL;
-        return 0;
-}
-/*
- *  Get scheduler in the scheduler list by name
- */
-static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
-{
-        struct ip_vs_scheduler *sched;
-        IP_VS_DBG(2, "ip_vs_sched_getbyname(): sched_name \"%s\"\n",
-                  sched_name);
-        read_lock_bh(&__ip_vs_sched_lock);
-        list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
-                /*
-                 * Test and get the modules atomically
-                 */
-                if (sched->module && !try_module_get(sched->module)) {
-                        /*
-                         * This scheduler is just deleted
-                         */
-                        continue;
-                }
-                if (strcmp(sched_name, sched->name)==0) {
-                        /* HIT */
-                        read_unlock_bh(&__ip_vs_sched_lock);
-                        return sched;
-                }
-                if (sched->module)
-                        module_put(sched->module);
-        }
-        read_unlock_bh(&__ip_vs_sched_lock);
-        return NULL;
-}
-/*
- *  Lookup scheduler and try to load it if it doesn't exist
- */
-struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
-{
-        struct ip_vs_scheduler *sched;
-        /*
-         *  Search for the scheduler by sched_name
-         */
-        sched = ip_vs_sched_getbyname(sched_name);
-        /*
-         *  If scheduler not found, load the module and search again
-         */
-        if (sched == NULL) {
-                request_module("ip_vs_%s", sched_name);
-                sched = ip_vs_sched_getbyname(sched_name);
-        }
-        return sched;
-}
-void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
-{
-        if (scheduler->module)
-                module_put(scheduler->module);
-}
-/*
- *  Register a scheduler in the scheduler list
- */
-int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
-{
-        struct ip_vs_scheduler *sched;
-        if (!scheduler) {
-                IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
-                return -EINVAL;
-        }
-        if (!scheduler->name) {
-                IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
-                return -EINVAL;
-        }
-        /* increase the module use count */
-        ip_vs_use_count_inc();
-        write_lock_bh(&__ip_vs_sched_lock);
-        if (!list_empty(&scheduler->n_list)) {
-                write_unlock_bh(&__ip_vs_sched_lock);
-                ip_vs_use_count_dec();
-                IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
-                          "already linked\n", scheduler->name);
-                return -EINVAL;
-        }
-        /*
-         *  Make sure that the scheduler with this name doesn't exist
-         *  in the scheduler list.
-         */
-        list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
-                if (strcmp(scheduler->name, sched->name) == 0) {
-                        write_unlock_bh(&__ip_vs_sched_lock);
-                        ip_vs_use_count_dec();
-                        IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
-                                        "already existed in the system\n",
-                                        scheduler->name);
-                        return -EINVAL;
-                }
-        }
-        /*
-         *      Add it into the d-linked scheduler list
-         */
-        list_add(&scheduler->n_list, &ip_vs_schedulers);
-        write_unlock_bh(&__ip_vs_sched_lock);
-        IP_VS_INFO("[%s] scheduler registered.\n", scheduler->name);
-        return 0;
-}
-/*
- *  Unregister a scheduler from the scheduler list
- */
-int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
-{
-        if (!scheduler) {
-                IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
-                return -EINVAL;
-        }
-        write_lock_bh(&__ip_vs_sched_lock);
-        if (list_empty(&scheduler->n_list)) {
-                write_unlock_bh(&__ip_vs_sched_lock);
-                IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
-                          "is not in the list. failed\n", scheduler->name);
-                return -EINVAL;
-        }
-        /*
-         *      Remove it from the d-linked scheduler list
-         */
-        list_del(&scheduler->n_list);
-        write_unlock_bh(&__ip_vs_sched_lock);
-        /* decrease the module use count */
-        ip_vs_use_count_dec();
-        IP_VS_INFO("[%s] scheduler unregistered.\n", scheduler->name);
-        return 0;
-}
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
deleted file mode 100644
index 77663d84cbd1..000000000000
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * IPVS:        Shortest Expected Delay scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-/*
- * The SED algorithm attempts to minimize each job's expected delay until
- * completion. The expected delay that the job will experience is
- * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
- * jobs on the ith server and Ui is the fixed service rate (weight) of
- * the ith server. The SED algorithm adopts a greedy policy that each does
- * what is in its own best interest, i.e. to join the queue which would
- * minimize its expected delay of completion.
- *
- * See the following paper for more information:
- * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
- * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
- * pages 986-994, 1988.
- *
- * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
- *
- * The difference between SED and WLC is that SED includes the incoming
- * job in the cost function (the increment of 1). SED may outperform
- * WLC, while scheduling big jobs under larger heterogeneous systems
- * (the server weight varies a lot).
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <net/ip_vs.h>
-static int
-ip_vs_sed_init_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static int
-ip_vs_sed_done_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static int
-ip_vs_sed_update_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static inline unsigned int
-ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
-{
-        /*
-         * We only use the active connection number in the cost
-         * calculation here.
-         */
-        return atomic_read(&dest->activeconns) + 1;
-}
-/*
- *      Weighted Least Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest, *least;
-        unsigned int loh, doh;
-        IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
-        /*
-         * We calculate the load of each dest server as follows:
-         *      (server expected overhead) / dest->weight
-         *
-         * Remember -- no floats in kernel mode!!!
-         * The comparison of h1*w2 > h2*w1 is equivalent to that of
-         *                h1/w1 > h2/w2
-         * if every weight is larger than zero.
-         *
-         * The server with weight=0 is quiesced and will not receive any
-         * new connections.
-         */
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-                    atomic_read(&dest->weight) > 0) {
-                        least = dest;
-                        loh = ip_vs_sed_dest_overhead(least);
-                        goto nextstage;
-                }
-        }
-        return NULL;
-        /*
-         *    Find the destination with the least load.
-         */
-  nextstage:
-        list_for_each_entry_continue(dest, &svc->destinations, n_list) {
-                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-                        continue;
-                doh = ip_vs_sed_dest_overhead(dest);
-                if (loh * atomic_read(&dest->weight) >
-                    doh * atomic_read(&least->weight)) {
-                        least = dest;
-                        loh = doh;
-                }
-        }
-        IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
-                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                  NIPQUAD(least->addr), ntohs(least->port),
-                  atomic_read(&least->activeconns),
-                  atomic_read(&least->refcnt),
-                  atomic_read(&least->weight), loh);
-        return least;
-}
-static struct ip_vs_scheduler ip_vs_sed_scheduler =
-{
-        .name =                 "sed",
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list =               LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
-        .init_service =         ip_vs_sed_init_svc,
-        .done_service =         ip_vs_sed_done_svc,
-        .update_service =       ip_vs_sed_update_svc,
-        .schedule =             ip_vs_sed_schedule,
-};
-static int __init ip_vs_sed_init(void)
-{
-        return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
-}
-static void __exit ip_vs_sed_cleanup(void)
-{
-        unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
-}
-module_init(ip_vs_sed_init);
-module_exit(ip_vs_sed_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
deleted file mode 100644
index 7b979e228056..000000000000
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * IPVS:        Source Hashing scheduling module
- *
- * Authors:     Wensong Zhang <wensong@gnuchina.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-/*
- * The sh algorithm is to select server by the hash key of source IP
- * address. The pseudo code is as follows:
- *
- *       n <- servernode[src_ip];
- *       if (n is dead) OR
- *          (n is overloaded) or (n.weight <= 0) then
- *                 return NULL;
- *
- *       return n;
- *
- * Notes that servernode is a 256-bucket hash table that maps the hash
- * index derived from packet source IP address to the current server
- * array. If the sh scheduler is used in cache cluster, it is good to
- * combine it with cache_bypass feature. When the statically assigned
- * server is dead or overloaded, the load balancer can bypass the cache
- * server and send requests to the original server directly.
- *
- */
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <net/ip_vs.h>
-/*
- *      IPVS SH bucket
- */
-struct ip_vs_sh_bucket {
-        struct ip_vs_dest       *dest;          /* real server (cache) */
-};
-/*
- *     for IPVS SH entry hash table
- */
-#ifndef CONFIG_IP_VS_SH_TAB_BITS
-#define CONFIG_IP_VS_SH_TAB_BITS        8
-#endif
-#define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
-#define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
-#define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
-/*
- *      Returns hash value for IPVS SH entry
- */
-static inline unsigned ip_vs_sh_hashkey(__be32 addr)
-{
-        return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
-}
-/*
- *      Get ip_vs_dest associated with supplied parameters.
- */
-static inline struct ip_vs_dest *
-ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr)
-{
-        return (tbl[ip_vs_sh_hashkey(addr)]).dest;
-}
-/*
- *      Assign all the hash buckets of the specified table with the service.
- */
-static int
-ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
-{
-        int i;
-        struct ip_vs_sh_bucket *b;
-        struct list_head *p;
-        struct ip_vs_dest *dest;
-        b = tbl;
-        p = &svc->destinations;
-        for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
-                if (list_empty(p)) {
-                        b->dest = NULL;
-                } else {
-                        if (p == &svc->destinations)
-                                p = p->next;
-                        dest = list_entry(p, struct ip_vs_dest, n_list);
-                        atomic_inc(&dest->refcnt);
-                        b->dest = dest;
-                        p = p->next;
-                }
-                b++;
-        }
-        return 0;
-}
-/*
- *      Flush all the hash buckets of the specified table.
- */
-static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
-{
-        int i;
-        struct ip_vs_sh_bucket *b;
-        b = tbl;
-        for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
-                if (b->dest) {
-                        atomic_dec(&b->dest->refcnt);
-                        b->dest = NULL;
-                }
-                b++;
-        }
-}
-static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_sh_bucket *tbl;
-        /* allocate the SH table for this service */
-        tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
-                      GFP_ATOMIC);
-        if (tbl == NULL) {
-                IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
-                return -ENOMEM;
-        }
-        svc->sched_data = tbl;
-        IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
-                  "current service\n",
-                  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
-        /* assign the hash buckets with the updated service */
-        ip_vs_sh_assign(tbl, svc);
-        return 0;
-}
-static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_sh_bucket *tbl = svc->sched_data;
-        /* got to clean up hash buckets here */
-        ip_vs_sh_flush(tbl);
-        /* release the table itself */
-        kfree(svc->sched_data);
-        IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
-                  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
-        return 0;
-}
-static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_sh_bucket *tbl = svc->sched_data;
-        /* got to clean up hash buckets here */
-        ip_vs_sh_flush(tbl);
-        /* assign the hash buckets with the updated service */
-        ip_vs_sh_assign(tbl, svc);
-        return 0;
-}
-/*
- *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
- *      consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
-{
-        return dest->flags & IP_VS_DEST_F_OVERLOAD;
-}
-/*
- *      Source Hashing scheduling
- */
-static struct ip_vs_dest *
-ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest;
-        struct ip_vs_sh_bucket *tbl;
-        struct iphdr *iph = ip_hdr(skb);
-        IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
-        tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
-        dest = ip_vs_sh_get(tbl, iph->saddr);
-        if (!dest
-            || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
-            || atomic_read(&dest->weight) <= 0
-            || is_overloaded(dest)) {
-                return NULL;
-        }
-        IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
-                  "--> server %u.%u.%u.%u:%d\n",
-                  NIPQUAD(iph->saddr),
-                  NIPQUAD(dest->addr),
-                  ntohs(dest->port));
-        return dest;
-}
-/*
- *      IPVS SH Scheduler structure
- */
-static struct ip_vs_scheduler ip_vs_sh_scheduler =
-{
-        .name =                 "sh",
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list  =              LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
-        .init_service =         ip_vs_sh_init_svc,
-        .done_service =         ip_vs_sh_done_svc,
-        .update_service =       ip_vs_sh_update_svc,
-        .schedule =             ip_vs_sh_schedule,
-};
-static int __init ip_vs_sh_init(void)
-{
-        return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
-}
-static void __exit ip_vs_sh_cleanup(void)
-{
-        unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
-}
-module_init(ip_vs_sh_init);
-module_exit(ip_vs_sh_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
deleted file mode 100644
index a652da2c3200..000000000000
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ /dev/null
@@ -1,930 +0,0 @@
-/*
- * IPVS         An implementation of the IP virtual server support for the
- *              LINUX operating system.  IPVS is now implemented as a module
- *              over the NetFilter framework. IPVS can be used to build a
- *              high-performance and highly available server based on a
- *              cluster of servers.
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * ip_vs_sync:  sync connection info from master load balancer to backups
- *              through multicast
- *
- * Changes:
- *      Alexandre Cassen        :       Added master & backup support at a time.
- *      Alexandre Cassen        :       Added SyncID support for incoming sync
- *                                      messages filtering.
- *      Justin Ossevoort        :       Fix endian problem on sync message size.
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/inetdevice.h>
-#include <linux/net.h>
-#include <linux/completion.h>
-#include <linux/delay.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/igmp.h>                 /* for ip_mc_join_group */
-#include <linux/udp.h>
-#include <linux/err.h>
-#include <linux/kthread.h>
-#include <linux/wait.h>
-#include <net/ip.h>
-#include <net/sock.h>
-#include <net/ip_vs.h>
-#define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
-#define IP_VS_SYNC_PORT  8848          /* multicast port */
-/*
- *      IPVS sync connection entry
- */
-struct ip_vs_sync_conn {
-        __u8                    reserved;
-        /* Protocol, addresses and port numbers */
-        __u8                    protocol;       /* Which protocol (TCP/UDP) */
-        __be16                  cport;
-        __be16                  vport;
-        __be16                  dport;
-        __be32                  caddr;          /* client address */
-        __be32                  vaddr;          /* virtual address */
-        __be32                  daddr;          /* destination address */
-        /* Flags and state transition */
-        __be16                  flags;          /* status flags */
-        __be16                  state;          /* state info */
-        /* The sequence options start here */
-};
-struct ip_vs_sync_conn_options {
-        struct ip_vs_seq        in_seq;         /* incoming seq. struct */
-        struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
-};
-struct ip_vs_sync_thread_data {
-        struct socket *sock;
-        char *buf;
-};
-#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
-#define FULL_CONN_SIZE  \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
-/*
-  The master mulitcasts messages to the backup load balancers in the
-  following format.
-       0                   1                   2                   3
-       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |  Count Conns  |    SyncID     |            Size               |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |                                                               |
-      |                    IPVS Sync Connection (1)                   |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |                            .                                  |
-      |                            .                                  |
-      |                            .                                  |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |                                                               |
-      |                    IPVS Sync Connection (n)                   |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*/
-#define SYNC_MESG_HEADER_LEN    4
-struct ip_vs_sync_mesg {
-        __u8                    nr_conns;
-        __u8                    syncid;
-        __u16                   size;
-        /* ip_vs_sync_conn entries start here */
-};
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
-struct ip_vs_sync_buff {
-        struct list_head        list;
-        unsigned long           firstuse;
-        /* pointers for the message data */
-        struct ip_vs_sync_mesg  *mesg;
-        unsigned char           *head;
-        unsigned char           *end;
-};
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff   *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-/* multicast addr */
-static struct sockaddr_in mcast_addr = {
-        .sin_family             = AF_INET,
-        .sin_port               = __constant_htons(IP_VS_SYNC_PORT),
-        .sin_addr.s_addr        = __constant_htonl(IP_VS_SYNC_GROUP),
-};
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
-{
-        struct ip_vs_sync_buff *sb;
-        spin_lock_bh(&ip_vs_sync_lock);
-        if (list_empty(&ip_vs_sync_queue)) {
-                sb = NULL;
-        } else {
-                sb = list_entry(ip_vs_sync_queue.next,
-                                struct ip_vs_sync_buff,
-                                list);
-                list_del(&sb->list);
-        }
-        spin_unlock_bh(&ip_vs_sync_lock);
-        return sb;
-}
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
-{
-        struct ip_vs_sync_buff *sb;
-        if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
-                return NULL;
-        if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
-                kfree(sb);
-                return NULL;
-        }
-        sb->mesg->nr_conns = 0;
-        sb->mesg->syncid = ip_vs_master_syncid;
-        sb->mesg->size = 4;
-        sb->head = (unsigned char *)sb->mesg + 4;
-        sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
-        sb->firstuse = jiffies;
-        return sb;
-}
-static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
-{
-        kfree(sb->mesg);
-        kfree(sb);
-}
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
-{
-        spin_lock(&ip_vs_sync_lock);
-        if (ip_vs_sync_state & IP_VS_STATE_MASTER)
-                list_add_tail(&sb->list, &ip_vs_sync_queue);
-        else
-                ip_vs_sync_buff_release(sb);
-        spin_unlock(&ip_vs_sync_lock);
-}
-/*
- *      Get the current sync buffer if it has been created for more
- *      than the specified time or the specified time is zero.
- */
-static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
-{
-        struct ip_vs_sync_buff *sb;
-        spin_lock_bh(&curr_sb_lock);
-        if (curr_sb && (time == 0 ||
-                        time_before(jiffies - curr_sb->firstuse, time))) {
-                sb = curr_sb;
-                curr_sb = NULL;
-        } else
-                sb = NULL;
-        spin_unlock_bh(&curr_sb_lock);
-        return sb;
-}
-/*
- *      Add an ip_vs_conn information into the current sync_buff.
- *      Called by ip_vs_in.
- */
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
-{
-        struct ip_vs_sync_mesg *m;
-        struct ip_vs_sync_conn *s;
-        int len;
-        spin_lock(&curr_sb_lock);
-        if (!curr_sb) {
-                if (!(curr_sb=ip_vs_sync_buff_create())) {
-                        spin_unlock(&curr_sb_lock);
-                        IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
-                        return;
-                }
-        }
-        len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
-                SIMPLE_CONN_SIZE;
-        m = curr_sb->mesg;
-        s = (struct ip_vs_sync_conn *)curr_sb->head;
-        /* copy members */
-        s->protocol = cp->protocol;
-        s->cport = cp->cport;
-        s->vport = cp->vport;
-        s->dport = cp->dport;
-        s->caddr = cp->caddr;
-        s->vaddr = cp->vaddr;
-        s->daddr = cp->daddr;
-        s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
-        s->state = htons(cp->state);
-        if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
-                struct ip_vs_sync_conn_options *opt =
-                        (struct ip_vs_sync_conn_options *)&s[1];
-                memcpy(opt, &cp->in_seq, sizeof(*opt));
-        }
-        m->nr_conns++;
-        m->size += len;
-        curr_sb->head += len;
-        /* check if there is a space for next one */
-        if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
-                sb_queue_tail(curr_sb);
-                curr_sb = NULL;
-        }
-        spin_unlock(&curr_sb_lock);
-        /* synchronize its controller if it has */
-        if (cp->control)
-                ip_vs_sync_conn(cp->control);
-}
-/*
- *      Process received multicast message and create the corresponding
- *      ip_vs_conn entries.
- */
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
-{
-        struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
-        struct ip_vs_sync_conn *s;
-        struct ip_vs_sync_conn_options *opt;
-        struct ip_vs_conn *cp;
-        struct ip_vs_protocol *pp;
-        struct ip_vs_dest *dest;
-        char *p;
-        int i;
-        if (buflen < sizeof(struct ip_vs_sync_mesg)) {
-                IP_VS_ERR_RL("sync message header too short\n");
-                return;
-        }
-        /* Convert size back to host byte order */
-        m->size = ntohs(m->size);
-        if (buflen != m->size) {
-                IP_VS_ERR_RL("bogus sync message size\n");
-                return;
-        }
-        /* SyncID sanity check */
-        if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
-                IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
-                          m->syncid);
-                return;
-        }
-        p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
-        for (i=0; i<m->nr_conns; i++) {
-                unsigned flags, state;
-                if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
-                        IP_VS_ERR_RL("bogus conn in sync message\n");
-                        return;
-                }
-                s = (struct ip_vs_sync_conn *) p;
-                flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
-                flags &= ~IP_VS_CONN_F_HASHED;
-                if (flags & IP_VS_CONN_F_SEQ_MASK) {
-                        opt = (struct ip_vs_sync_conn_options *)&s[1];
-                        p += FULL_CONN_SIZE;
-                        if (p > buffer+buflen) {
-                                IP_VS_ERR_RL("bogus conn options in sync message\n");
-                                return;
-                        }
-                } else {
-                        opt = NULL;
-                        p += SIMPLE_CONN_SIZE;
-                }
-                state = ntohs(s->state);
-                if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
-                        pp = ip_vs_proto_get(s->protocol);
-                        if (!pp) {
-                                IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
-                                        s->protocol);
-                                continue;
-                        }
-                        if (state >= pp->num_states) {
-                                IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
-                                        pp->name, state);
-                                continue;
-                        }
-                } else {
-                        /* protocol in templates is not used for state/timeout */
-                        pp = NULL;
-                        if (state > 0) {
-                                IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
-                                        state);
-                                state = 0;
-                        }
-                }
-                if (!(flags & IP_VS_CONN_F_TEMPLATE))
-                        cp = ip_vs_conn_in_get(s->protocol,
-                                               s->caddr, s->cport,
-                                               s->vaddr, s->vport);
-                else
-                        cp = ip_vs_ct_in_get(s->protocol,
-                                               s->caddr, s->cport,
-                                               s->vaddr, s->vport);
-                if (!cp) {
-                        /*
-                         * Find the appropriate destination for the connection.
-                         * If it is not found the connection will remain unbound
-                         * but still handled.
-                         */
-                        dest = ip_vs_find_dest(s->daddr, s->dport,
-                                               s->vaddr, s->vport,
-                                               s->protocol);
-                        /*  Set the approprite ativity flag */
-                        if (s->protocol == IPPROTO_TCP) {
-                                if (state != IP_VS_TCP_S_ESTABLISHED)
-                                        flags |= IP_VS_CONN_F_INACTIVE;
-                                else
-                                        flags &= ~IP_VS_CONN_F_INACTIVE;
-                        }
-                        cp = ip_vs_conn_new(s->protocol,
-                                            s->caddr, s->cport,
-                                            s->vaddr, s->vport,
-                                            s->daddr, s->dport,
-                                            flags, dest);
-                        if (dest)
-                                atomic_dec(&dest->refcnt);
-                        if (!cp) {
-                                IP_VS_ERR("ip_vs_conn_new failed\n");
-                                return;
-                        }
-                } else if (!cp->dest) {
-                        dest = ip_vs_try_bind_dest(cp);
-                        if (dest)
-                                atomic_dec(&dest->refcnt);
-                } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
-                           (cp->state != state)) {
-                        /* update active/inactive flag for the connection */
-                        dest = cp->dest;
-                        if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                                (state != IP_VS_TCP_S_ESTABLISHED)) {
-                                atomic_dec(&dest->activeconns);
-                                atomic_inc(&dest->inactconns);
-                                cp->flags |= IP_VS_CONN_F_INACTIVE;
-                        } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                                (state == IP_VS_TCP_S_ESTABLISHED)) {
-                                atomic_inc(&dest->activeconns);
-                                atomic_dec(&dest->inactconns);
-                                cp->flags &= ~IP_VS_CONN_F_INACTIVE;
-                        }
-                }
-                if (opt)
-                        memcpy(&cp->in_seq, opt, sizeof(*opt));
-                atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-                cp->state = state;
-                cp->old_state = cp->state;
-                /*
-                 * We can not recover the right timeout for templates
-                 * in all cases, we can not find the right fwmark
-                 * virtual service. If needed, we can do it for
-                 * non-fwmark persistent services.
-                 */
-                if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
-                        cp->timeout = pp->timeout_table[state];
-                else
-                        cp->timeout = (3*60*HZ);
-                ip_vs_conn_put(cp);
-        }
-}
-/*
- *      Setup loopback of outgoing multicasts on a sending socket
- */
-static void set_mcast_loop(struct sock *sk, u_char loop)
-{
-        struct inet_sock *inet = inet_sk(sk);
-        /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
-        lock_sock(sk);
-        inet->mc_loop = loop ? 1 : 0;
-        release_sock(sk);
-}
-/*
- *      Specify TTL for outgoing multicasts on a sending socket
- */
-static void set_mcast_ttl(struct sock *sk, u_char ttl)
-{
-        struct inet_sock *inet = inet_sk(sk);
-        /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */
-        lock_sock(sk);
-        inet->mc_ttl = ttl;
-        release_sock(sk);
-}
-/*
- *      Specifiy default interface for outgoing multicasts
- */
-static int set_mcast_if(struct sock *sk, char *ifname)
-{
-        struct net_device *dev;
-        struct inet_sock *inet = inet_sk(sk);
-        if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
-                return -ENODEV;
-        if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
-                return -EINVAL;
-        lock_sock(sk);
-        inet->mc_index = dev->ifindex;
-        /*  inet->mc_addr  = 0; */
-        release_sock(sk);
-        return 0;
-}
-/*
- *      Set the maximum length of sync message according to the
- *      specified interface's MTU.
- */
-static int set_sync_mesg_maxlen(int sync_state)
-{
-        struct net_device *dev;
-        int num;
-        if (sync_state == IP_VS_STATE_MASTER) {
-                if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
-                        return -ENODEV;
-                num = (dev->mtu - sizeof(struct iphdr) -
-                       sizeof(struct udphdr) -
-                       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-                sync_send_mesg_maxlen =
-                        SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * num;
-                IP_VS_DBG(7, "setting the maximum length of sync sending "
-                          "message %d.\n", sync_send_mesg_maxlen);
-        } else if (sync_state == IP_VS_STATE_BACKUP) {
-                if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
-                        return -ENODEV;
-                sync_recv_mesg_maxlen = dev->mtu -
-                        sizeof(struct iphdr) - sizeof(struct udphdr);
-                IP_VS_DBG(7, "setting the maximum length of sync receiving "
-                          "message %d.\n", sync_recv_mesg_maxlen);
-        }
-        return 0;
-}
-/*
- *      Join a multicast group.
- *      the group is specified by a class D multicast address 224.0.0.0/8
- *      in the in_addr structure passed in as a parameter.
- */
-static int
-join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
-{
-        struct ip_mreqn mreq;
-        struct net_device *dev;
-        int ret;
-        memset(&mreq, 0, sizeof(mreq));
-        memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
-        if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
-                return -ENODEV;
-        if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
-                return -EINVAL;
-        mreq.imr_ifindex = dev->ifindex;
-        lock_sock(sk);
-        ret = ip_mc_join_group(sk, &mreq);
-        release_sock(sk);
-        return ret;
-}
-static int bind_mcastif_addr(struct socket *sock, char *ifname)
-{
-        struct net_device *dev;
-        __be32 addr;
-        struct sockaddr_in sin;
-        if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
-                return -ENODEV;
-        addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
-        if (!addr)
-                IP_VS_ERR("You probably need to specify IP address on "
-                          "multicast interface.\n");
-        IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n",
-                  ifname, NIPQUAD(addr));
-        /* Now bind the socket with the address of multicast interface */
-        sin.sin_family       = AF_INET;
-        sin.sin_addr.s_addr  = addr;
-        sin.sin_port         = 0;
-        return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
-}
-/*
- *      Set up sending multicast socket over UDP
- */
-static struct socket * make_send_sock(void)
-{
-        struct socket *sock;
-        int result;
-        /* First create a socket */
-        result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
-        if (result < 0) {
-                IP_VS_ERR("Error during creation of socket; terminating\n");
-                return ERR_PTR(result);
-        }
-        result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
-        if (result < 0) {
-                IP_VS_ERR("Error setting outbound mcast interface\n");
-                goto error;
-        }
-        set_mcast_loop(sock->sk, 0);
-        set_mcast_ttl(sock->sk, 1);
-        result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
-        if (result < 0) {
-                IP_VS_ERR("Error binding address of the mcast interface\n");
-                goto error;
-        }
-        result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
-                        sizeof(struct sockaddr), 0);
-        if (result < 0) {
-                IP_VS_ERR("Error connecting to the multicast addr\n");
-                goto error;
-        }
-        return sock;
-  error:
-        sock_release(sock);
-        return ERR_PTR(result);
-}
-/*
- *      Set up receiving multicast socket over UDP
- */
-static struct socket * make_receive_sock(void)
-{
-        struct socket *sock;
-        int result;
-        /* First create a socket */
-        result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
-        if (result < 0) {
-                IP_VS_ERR("Error during creation of socket; terminating\n");
-                return ERR_PTR(result);
-        }
-        /* it is equivalent to the REUSEADDR option in user-space */
-        sock->sk->sk_reuse = 1;
-        result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
-                        sizeof(struct sockaddr));
-        if (result < 0) {
-                IP_VS_ERR("Error binding to the multicast addr\n");
-                goto error;
-        }
-        /* join the multicast group */
-        result = join_mcast_group(sock->sk,
-                        (struct in_addr *) &mcast_addr.sin_addr,
-                        ip_vs_backup_mcast_ifn);
-        if (result < 0) {
-                IP_VS_ERR("Error joining to the multicast group\n");
-                goto error;
-        }
-        return sock;
-  error:
-        sock_release(sock);
-        return ERR_PTR(result);
-}
-static int
-ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
-{
-        struct msghdr   msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
-        struct kvec     iov;
-        int             len;
-        EnterFunction(7);
-        iov.iov_base     = (void *)buffer;
-        iov.iov_len      = length;
-        len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
-        LeaveFunction(7);
-        return len;
-}
-static void
-ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
-{
-        int msize;
-        msize = msg->size;
-        /* Put size in network byte order */
-        msg->size = htons(msg->size);
-        if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
-                IP_VS_ERR("ip_vs_send_async error\n");
-}
-static int
-ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
-{
-        struct msghdr           msg = {NULL,};
-        struct kvec             iov;
-        int                     len;
-        EnterFunction(7);
-        /* Receive a packet */
-        iov.iov_base     = buffer;
-        iov.iov_len      = (size_t)buflen;
-        len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
-        if (len < 0)
-                return -1;
-        LeaveFunction(7);
-        return len;
-}
-static int sync_thread_master(void *data)
-{
-        struct ip_vs_sync_thread_data *tinfo = data;
-        struct ip_vs_sync_buff *sb;
-        IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
-                   "syncid = %d\n",
-                   ip_vs_master_mcast_ifn, ip_vs_master_syncid);
-        while (!kthread_should_stop()) {
-                while ((sb = sb_dequeue())) {
-                        ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
-                        ip_vs_sync_buff_release(sb);
-                }
-                /* check if entries stay in curr_sb for 2 seconds */
-                sb = get_curr_sync_buff(2 * HZ);
-                if (sb) {
-                        ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
-                        ip_vs_sync_buff_release(sb);
-                }
-                schedule_timeout_interruptible(HZ);
-        }
-        /* clean up the sync_buff queue */
-        while ((sb=sb_dequeue())) {
-                ip_vs_sync_buff_release(sb);
-        }
-        /* clean up the current sync_buff */
-        if ((sb = get_curr_sync_buff(0))) {
-                ip_vs_sync_buff_release(sb);
-        }
-        /* release the sending multicast socket */
-        sock_release(tinfo->sock);
-        kfree(tinfo);
-        return 0;
-}
-static int sync_thread_backup(void *data)
-{
-        struct ip_vs_sync_thread_data *tinfo = data;
-        int len;
-        IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
-                   "syncid = %d\n",
-                   ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
-        while (!kthread_should_stop()) {
-                wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
-                         !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
-                         || kthread_should_stop());
-                /* do we have data now? */
-                while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
-                        len = ip_vs_receive(tinfo->sock, tinfo->buf,
-                                        sync_recv_mesg_maxlen);
-                        if (len <= 0) {
-                                IP_VS_ERR("receiving message error\n");
-                                break;
-                        }
-                        /* disable bottom half, because it accesses the data
-                           shared by softirq while getting/creating conns */
-                        local_bh_disable();
-                        ip_vs_process_message(tinfo->buf, len);
-                        local_bh_enable();
-                }
-        }
-        /* release the sending multicast socket */
-        sock_release(tinfo->sock);
-        kfree(tinfo->buf);
-        kfree(tinfo);
-        return 0;
-}
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
-{
-        struct ip_vs_sync_thread_data *tinfo;
-        struct task_struct **realtask, *task;
-        struct socket *sock;
-        char *name, *buf = NULL;
-        int (*threadfn)(void *data);
-        int result = -ENOMEM;
-        IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
-        IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
-                  sizeof(struct ip_vs_sync_conn));
-        if (state == IP_VS_STATE_MASTER) {
-                if (sync_master_thread)
-                        return -EEXIST;
-                strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
-                        sizeof(ip_vs_master_mcast_ifn));
-                ip_vs_master_syncid = syncid;
-                realtask = &sync_master_thread;
-                name = "ipvs_syncmaster";
-                threadfn = sync_thread_master;
-                sock = make_send_sock();
-        } else if (state == IP_VS_STATE_BACKUP) {
-                if (sync_backup_thread)
-                        return -EEXIST;
-                strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
-                        sizeof(ip_vs_backup_mcast_ifn));
-                ip_vs_backup_syncid = syncid;
-                realtask = &sync_backup_thread;
-                name = "ipvs_syncbackup";
-                threadfn = sync_thread_backup;
-                sock = make_receive_sock();
-        } else {
-                return -EINVAL;
-        }
-        if (IS_ERR(sock)) {
-                result = PTR_ERR(sock);
-                goto out;
-        }
-        set_sync_mesg_maxlen(state);
-        if (state == IP_VS_STATE_BACKUP) {
-                buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
-                if (!buf)
-                        goto outsocket;
-        }
-        tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
-        if (!tinfo)
-                goto outbuf;
-        tinfo->sock = sock;
-        tinfo->buf = buf;
-        task = kthread_run(threadfn, tinfo, name);
-        if (IS_ERR(task)) {
-                result = PTR_ERR(task);
-                goto outtinfo;
-        }
-        /* mark as active */
-        *realtask = task;
-        ip_vs_sync_state |= state;
-        /* increase the module use count */
-        ip_vs_use_count_inc();
-        return 0;
-outtinfo:
-        kfree(tinfo);
-outbuf:
-        kfree(buf);
-outsocket:
-        sock_release(sock);
-out:
-        return result;
-}
-int stop_sync_thread(int state)
-{
-        IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
-        if (state == IP_VS_STATE_MASTER) {
-                if (!sync_master_thread)
-                        return -ESRCH;
-                IP_VS_INFO("stopping master sync thread %d ...\n",
-                           task_pid_nr(sync_master_thread));
-                /*
-                 * The lock synchronizes with sb_queue_tail(), so that we don't
-                 * add sync buffers to the queue, when we are already in
-                 * progress of stopping the master sync daemon.
-                 */
-                spin_lock_bh(&ip_vs_sync_lock);
-                ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
-                spin_unlock_bh(&ip_vs_sync_lock);
-                kthread_stop(sync_master_thread);
-                sync_master_thread = NULL;
-        } else if (state == IP_VS_STATE_BACKUP) {
-                if (!sync_backup_thread)
-                        return -ESRCH;
-                IP_VS_INFO("stopping backup sync thread %d ...\n",
-                           task_pid_nr(sync_backup_thread));
-                ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
-                kthread_stop(sync_backup_thread);
-                sync_backup_thread = NULL;
-        } else {
-                return -EINVAL;
-        }
-        /* decrease the module use count */
-        ip_vs_use_count_dec();
-        return 0;
-}
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
deleted file mode 100644
index 9b0ef86bb1f7..000000000000
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * IPVS:        Weighted Least-Connection Scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Peter Kese <peter.kese@ijs.si>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Wensong Zhang            :     changed the ip_vs_wlc_schedule to return dest
- *     Wensong Zhang            :     changed to use the inactconns in scheduling
- *     Wensong Zhang            :     changed some comestics things for debugging
- *     Wensong Zhang            :     changed for the d-linked destination list
- *     Wensong Zhang            :     added the ip_vs_wlc_update_svc
- *     Wensong Zhang            :     added any dest with weight=0 is quiesced
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <net/ip_vs.h>
-static int
-ip_vs_wlc_init_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static int
-ip_vs_wlc_done_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static int
-ip_vs_wlc_update_svc(struct ip_vs_service *svc)
-{
-        return 0;
-}
-static inline unsigned int
-ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
-{
-        /*
-         * We think the overhead of processing active connections is 256
-         * times higher than that of inactive connections in average. (This
-         * 256 times might not be accurate, we will change it later) We
-         * use the following formula to estimate the overhead now:
-         *                dest->activeconns*256 + dest->inactconns
-         */
-        return (atomic_read(&dest->activeconns) << 8) +
-                atomic_read(&dest->inactconns);
-}
-/*
- *      Weighted Least Connection scheduling
- */
-static struct ip_vs_dest *
-ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest, *least;
-        unsigned int loh, doh;
-        IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
-        /*
-         * We calculate the load of each dest server as follows:
-         *                (dest overhead) / dest->weight
-         *
-         * Remember -- no floats in kernel mode!!!
-         * The comparison of h1*w2 > h2*w1 is equivalent to that of
-         *                h1/w1 > h2/w2
-         * if every weight is larger than zero.
-         *
-         * The server with weight=0 is quiesced and will not receive any
-         * new connections.
-         */
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-                    atomic_read(&dest->weight) > 0) {
-                        least = dest;
-                        loh = ip_vs_wlc_dest_overhead(least);
-                        goto nextstage;
-                }
-        }
-        return NULL;
-        /*
-         *    Find the destination with the least load.
-         */
-  nextstage:
-        list_for_each_entry_continue(dest, &svc->destinations, n_list) {
-                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
-                        continue;
-                doh = ip_vs_wlc_dest_overhead(dest);
-                if (loh * atomic_read(&dest->weight) >
-                    doh * atomic_read(&least->weight)) {
-                        least = dest;
-                        loh = doh;
-                }
-        }
-        IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u "
-                  "activeconns %d refcnt %d weight %d overhead %d\n",
-                  NIPQUAD(least->addr), ntohs(least->port),
-                  atomic_read(&least->activeconns),
-                  atomic_read(&least->refcnt),
-                  atomic_read(&least->weight), loh);
-        return least;
-}
-static struct ip_vs_scheduler ip_vs_wlc_scheduler =
-{
-        .name =                 "wlc",
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list =               LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
-        .init_service =         ip_vs_wlc_init_svc,
-        .done_service =         ip_vs_wlc_done_svc,
-        .update_service =       ip_vs_wlc_update_svc,
-        .schedule =             ip_vs_wlc_schedule,
-};
-static int __init ip_vs_wlc_init(void)
-{
-        return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
-}
-static void __exit ip_vs_wlc_cleanup(void)
-{
-        unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
-}
-module_init(ip_vs_wlc_init);
-module_exit(ip_vs_wlc_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
deleted file mode 100644
index 0d86a79b87b5..000000000000
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * IPVS:        Weighted Round-Robin Scheduling module
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *     Wensong Zhang            :     changed the ip_vs_wrr_schedule to return dest
- *     Wensong Zhang            :     changed some comestics things for debugging
- *     Wensong Zhang            :     changed for the d-linked destination list
- *     Wensong Zhang            :     added the ip_vs_wrr_update_svc
- *     Julian Anastasov         :     fixed the bug of returning destination
- *                                    with weight 0 when all weights are zero
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/net.h>
-#include <net/ip_vs.h>
-/*
- * current destination pointer for weighted round-robin scheduling
- */
-struct ip_vs_wrr_mark {
-        struct list_head *cl;   /* current list head */
-        int cw;                 /* current weight */
-        int mw;                 /* maximum weight */
-        int di;                 /* decreasing interval */
-};
-/*
- *    Get the gcd of server weights
- */
-static int gcd(int a, int b)
-{
-        int c;
-        while ((c = a % b)) {
-                a = b;
-                b = c;
-        }
-        return b;
-}
-static int ip_vs_wrr_gcd_weight(struct ip_vs_service *svc)
-{
-        struct ip_vs_dest *dest;
-        int weight;
-        int g = 0;
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                weight = atomic_read(&dest->weight);
-                if (weight > 0) {
-                        if (g > 0)
-                                g = gcd(weight, g);
-                        else
-                                g = weight;
-                }
-        }
-        return g ? g : 1;
-}
-/*
- *    Get the maximum weight of the service destinations.
- */
-static int ip_vs_wrr_max_weight(struct ip_vs_service *svc)
-{
-        struct ip_vs_dest *dest;
-        int weight = 0;
-        list_for_each_entry(dest, &svc->destinations, n_list) {
-                if (atomic_read(&dest->weight) > weight)
-                        weight = atomic_read(&dest->weight);
-        }
-        return weight;
-}
-static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_wrr_mark *mark;
-        /*
-         *    Allocate the mark variable for WRR scheduling
-         */
-        mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
-        if (mark == NULL) {
-                IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
-                return -ENOMEM;
-        }
-        mark->cl = &svc->destinations;
-        mark->cw = 0;
-        mark->mw = ip_vs_wrr_max_weight(svc);
-        mark->di = ip_vs_wrr_gcd_weight(svc);
-        svc->sched_data = mark;
-        return 0;
-}
-static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
-{
-        /*
-         *    Release the mark variable
-         */
-        kfree(svc->sched_data);
-        return 0;
-}
-static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
-{
-        struct ip_vs_wrr_mark *mark = svc->sched_data;
-        mark->cl = &svc->destinations;
-        mark->mw = ip_vs_wrr_max_weight(svc);
-        mark->di = ip_vs_wrr_gcd_weight(svc);
-        if (mark->cw > mark->mw)
-                mark->cw = 0;
-        return 0;
-}
-/*
- *    Weighted Round-Robin Scheduling
- */
-static struct ip_vs_dest *
-ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
-{
-        struct ip_vs_dest *dest;
-        struct ip_vs_wrr_mark *mark = svc->sched_data;
-        struct list_head *p;
-        IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
-        /*
-         * This loop will always terminate, because mark->cw in (0, max_weight]
-         * and at least one server has its weight equal to max_weight.
-         */
-        write_lock(&svc->sched_lock);
-        p = mark->cl;
-        while (1) {
-                if (mark->cl == &svc->destinations) {
-                        /* it is at the head of the destination list */
-                        if (mark->cl == mark->cl->next) {
-                                /* no dest entry */
-                                dest = NULL;
-                                goto out;
-                        }
-                        mark->cl = svc->destinations.next;
-                        mark->cw -= mark->di;
-                        if (mark->cw <= 0) {
-                                mark->cw = mark->mw;
-                                /*
-                                 * Still zero, which means no available servers.
-                                 */
-                                if (mark->cw == 0) {
-                                        mark->cl = &svc->destinations;
-                                        IP_VS_ERR_RL("ip_vs_wrr_schedule(): "
-                                                   "no available servers\n");
-                                        dest = NULL;
-                                        goto out;
-                                }
-                        }
-                } else
-                        mark->cl = mark->cl->next;
-                if (mark->cl != &svc->destinations) {
-                        /* not at the head of the list */
-                        dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
-                        if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
-                            atomic_read(&dest->weight) >= mark->cw) {
-                                /* got it */
-                                break;
-                        }
-                }
-                if (mark->cl == p && mark->cw == mark->di) {
-                        /* back to the start, and no dest is found.
-                           It is only possible when all dests are OVERLOADED */
-                        dest = NULL;
-                        goto out;
-                }
-        }
-        IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u "
-                  "activeconns %d refcnt %d weight %d\n",
-                  NIPQUAD(dest->addr), ntohs(dest->port),
-                  atomic_read(&dest->activeconns),
-                  atomic_read(&dest->refcnt),
-                  atomic_read(&dest->weight));
-  out:
-        write_unlock(&svc->sched_lock);
-        return dest;
-}
-static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
-        .name =                 "wrr",
-        .refcnt =               ATOMIC_INIT(0),
-        .module =               THIS_MODULE,
-        .n_list =               LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
-        .init_service =         ip_vs_wrr_init_svc,
-        .done_service =         ip_vs_wrr_done_svc,
-        .update_service =       ip_vs_wrr_update_svc,
-        .schedule =             ip_vs_wrr_schedule,
-};
-static int __init ip_vs_wrr_init(void)
-{
-        return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
-}
-static void __exit ip_vs_wrr_cleanup(void)
-{
-        unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
-}
-module_init(ip_vs_wrr_init);
-module_exit(ip_vs_wrr_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
deleted file mode 100644
index 9892d4aca42e..000000000000
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ /dev/null
@@ -1,559 +0,0 @@
-/*
- * ip_vs_xmit.c: various packet transmitters for IPVS
- *
- * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
- *              Julian Anastasov <ja@ssi.bg>
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- *
- * Changes:
- *
- */
-#include <linux/kernel.h>
-#include <linux/tcp.h>                  /* for tcphdr */
-#include <net/ip.h>
-#include <net/tcp.h>                    /* for csum_tcpudp_magic */
-#include <net/udp.h>
-#include <net/icmp.h>                   /* for icmp_send */
-#include <net/route.h>                  /* for ip_route_output */
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ip_vs.h>
-/*
- *      Destination cache to speed up outgoing route lookup
- */
-static inline void
-__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
-{
-        struct dst_entry *old_dst;
-        old_dst = dest->dst_cache;
-        dest->dst_cache = dst;
-        dest->dst_rtos = rtos;
-        dst_release(old_dst);
-}
-static inline struct dst_entry *
-__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
-{
-        struct dst_entry *dst = dest->dst_cache;
-        if (!dst)
-                return NULL;
-        if ((dst->obsolete || rtos != dest->dst_rtos) &&
-            dst->ops->check(dst, cookie) == NULL) {
-                dest->dst_cache = NULL;
-                dst_release(dst);
-                return NULL;
-        }
-        dst_hold(dst);
-        return dst;
-}
-static struct rtable *
-__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
-{
-        struct rtable *rt;                      /* Route to the other host */
-        struct ip_vs_dest *dest = cp->dest;
-        if (dest) {
-                spin_lock(&dest->dst_lock);
-                if (!(rt = (struct rtable *)
-                      __ip_vs_dst_check(dest, rtos, 0))) {
-                        struct flowi fl = {
-                                .oif = 0,
-                                .nl_u = {
-                                        .ip4_u = {
-                                                .daddr = dest->addr,
-                                                .saddr = 0,
-                                                .tos = rtos, } },
-                        };
-                        if (ip_route_output_key(&init_net, &rt, &fl)) {
-                                spin_unlock(&dest->dst_lock);
-                                IP_VS_DBG_RL("ip_route_output error, "
-                                             "dest: %u.%u.%u.%u\n",
-                                             NIPQUAD(dest->addr));
-                                return NULL;
-                        }
-                        __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
-                        IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n",
-                                  NIPQUAD(dest->addr),
-                                  atomic_read(&rt->u.dst.__refcnt), rtos);
-                }
-                spin_unlock(&dest->dst_lock);
-        } else {
-                struct flowi fl = {
-                        .oif = 0,
-                        .nl_u = {
-                                .ip4_u = {
-                                        .daddr = cp->daddr,
-                                        .saddr = 0,
-                                        .tos = rtos, } },
-                };
-                if (ip_route_output_key(&init_net, &rt, &fl)) {
-                        IP_VS_DBG_RL("ip_route_output error, dest: "
-                                     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
-                        return NULL;
-                }
-        }
-        return rt;
-}
-/*
- *      Release dest->dst_cache before a dest is removed
- */
-void
-ip_vs_dst_reset(struct ip_vs_dest *dest)
-{
-        struct dst_entry *old_dst;
-        old_dst = dest->dst_cache;
-        dest->dst_cache = NULL;
-        dst_release(old_dst);
-}
-#define IP_VS_XMIT(skb, rt)                             \
-do {                                                    \
-        (skb)->ipvs_property = 1;                       \
-        skb_forward_csum(skb);                          \
-        NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL,        \
-                (rt)->u.dst.dev, dst_output);           \
-} while (0)
-/*
- *      NULL transmitter (do nothing except return NF_ACCEPT)
- */
-int
-ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-                struct ip_vs_protocol *pp)
-{
-        /* we do not touch skb and do not need pskb ptr */
-        return NF_ACCEPT;
-}
-/*
- *      Bypass transmitter
- *      Let packets bypass the destination when the destination is not
- *      available, it may be only used in transparent cache cluster.
- */
-int
-ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-                  struct ip_vs_protocol *pp)
-{
-        struct rtable *rt;                      /* Route to the other host */
-        struct iphdr  *iph = ip_hdr(skb);
-        u8     tos = iph->tos;
-        int    mtu;
-        struct flowi fl = {
-                .oif = 0,
-                .nl_u = {
-                        .ip4_u = {
-                                .daddr = iph->daddr,
-                                .saddr = 0,
-                                .tos = RT_TOS(tos), } },
-        };
-        EnterFunction(10);
-        if (ip_route_output_key(&init_net, &rt, &fl)) {
-                IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
-                             "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
-                goto tx_error_icmp;
-        }
-        /* MTU checking */
-        mtu = dst_mtu(&rt->u.dst);
-        if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
-                ip_rt_put(rt);
-                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-                IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
-                goto tx_error;
-        }
-        /*
-         * Call ip_send_check because we are not sure it is called
-         * after ip_defrag. Is copy-on-write needed?
-         */
-        if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
-                ip_rt_put(rt);
-                return NF_STOLEN;
-        }
-        ip_send_check(ip_hdr(skb));
-        /* drop old route */
-        dst_release(skb->dst);
-        skb->dst = &rt->u.dst;
-        /* Another hack: avoid icmp_send in ip_fragment */
-        skb->local_df = 1;
-        IP_VS_XMIT(skb, rt);
-        LeaveFunction(10);
-        return NF_STOLEN;
- tx_error_icmp:
-        dst_link_failure(skb);
- tx_error:
-        kfree_skb(skb);
-        LeaveFunction(10);
-        return NF_STOLEN;
-}
-/*
- *      NAT transmitter (only for outside-to-inside nat forwarding)
- *      Not used for related ICMP
- */
-int
-ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-               struct ip_vs_protocol *pp)
-{
-        struct rtable *rt;              /* Route to the other host */
-        int mtu;
-        struct iphdr *iph = ip_hdr(skb);
-        EnterFunction(10);
-        /* check if it is a connection of no-client-port */
-        if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
-                __be16 _pt, *p;
-                p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
-                if (p == NULL)
-                        goto tx_error;
-                ip_vs_conn_fill_cport(cp, *p);
-                IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
-        }
-        if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
-                goto tx_error_icmp;
-        /* MTU checking */
-        mtu = dst_mtu(&rt->u.dst);
-        if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
-                ip_rt_put(rt);
-                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-                IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
-                goto tx_error;
-        }
-        /* copy-on-write the packet before mangling it */
-        if (!skb_make_writable(skb, sizeof(struct iphdr)))
-                goto tx_error_put;
-        if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
-                goto tx_error_put;
-        /* drop old route */
-        dst_release(skb->dst);
-        skb->dst = &rt->u.dst;
-        /* mangle the packet */
-        if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
-                goto tx_error;
-        ip_hdr(skb)->daddr = cp->daddr;
-        ip_send_check(ip_hdr(skb));
-        IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
-        /* FIXME: when application helper enlarges the packet and the length
-           is larger than the MTU of outgoing device, there will be still
-           MTU problem. */
-        /* Another hack: avoid icmp_send in ip_fragment */
-        skb->local_df = 1;
-        IP_VS_XMIT(skb, rt);
-        LeaveFunction(10);
-        return NF_STOLEN;
-  tx_error_icmp:
-        dst_link_failure(skb);
-  tx_error:
-        LeaveFunction(10);
-        kfree_skb(skb);
-        return NF_STOLEN;
-  tx_error_put:
-        ip_rt_put(rt);
-        goto tx_error;
-}
-/*
- *   IP Tunneling transmitter
- *
- *   This function encapsulates the packet in a new IP packet, its
- *   destination will be set to cp->daddr. Most code of this function
- *   is taken from ipip.c.
- *
- *   It is used in VS/TUN cluster. The load balancer selects a real
- *   server from a cluster based on a scheduling algorithm,
- *   encapsulates the request packet and forwards it to the selected
- *   server. For example, all real servers are configured with
- *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
- *   the encapsulated packet, it will decapsulate the packet, processe
- *   the request and return the response packets directly to the client
- *   without passing the load balancer. This can greatly increase the
- *   scalability of virtual server.
- *
- *   Used for ANY protocol
- */
-int
-ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-                  struct ip_vs_protocol *pp)
-{
-        struct rtable *rt;                      /* Route to the other host */
-        struct net_device *tdev;                /* Device to other host */
-        struct iphdr  *old_iph = ip_hdr(skb);
-        u8     tos = old_iph->tos;
-        __be16 df = old_iph->frag_off;
-        sk_buff_data_t old_transport_header = skb->transport_header;
-        struct iphdr  *iph;                     /* Our new IP header */
-        unsigned int max_headroom;              /* The extra header space needed */
-        int    mtu;
-        EnterFunction(10);
-        if (skb->protocol != htons(ETH_P_IP)) {
-                IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
-                             "ETH_P_IP: %d, skb protocol: %d\n",
-                             htons(ETH_P_IP), skb->protocol);
-                goto tx_error;
-        }
-        if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
-                goto tx_error_icmp;
-        tdev = rt->u.dst.dev;
-        mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
-        if (mtu < 68) {
-                ip_rt_put(rt);
-                IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
-                goto tx_error;
-        }
-        if (skb->dst)
-                skb->dst->ops->update_pmtu(skb->dst, mtu);
-        df |= (old_iph->frag_off & htons(IP_DF));
-        if ((old_iph->frag_off & htons(IP_DF))
-            && mtu < ntohs(old_iph->tot_len)) {
-                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-                ip_rt_put(rt);
-                IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
-                goto tx_error;
-        }
-        /*
-         * Okay, now see if we can stuff it in the buffer as-is.
-         */
-        max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
-        if (skb_headroom(skb) < max_headroom
-            || skb_cloned(skb) || skb_shared(skb)) {
-                struct sk_buff *new_skb =
-                        skb_realloc_headroom(skb, max_headroom);
-                if (!new_skb) {
-                        ip_rt_put(rt);
-                        kfree_skb(skb);
-                        IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
-                        return NF_STOLEN;
-                }
-                kfree_skb(skb);
-                skb = new_skb;
-                old_iph = ip_hdr(skb);
-        }
-        skb->transport_header = old_transport_header;
-        /* fix old IP header checksum */
-        ip_send_check(old_iph);
-        skb_push(skb, sizeof(struct iphdr));
-        skb_reset_network_header(skb);
-        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-        /* drop old route */
-        dst_release(skb->dst);
-        skb->dst = &rt->u.dst;
-        /*
-         *      Push down and install the IPIP header.
-         */
-        iph                     =       ip_hdr(skb);
-        iph->version            =       4;
-        iph->ihl                =       sizeof(struct iphdr)>>2;
-        iph->frag_off           =       df;
-        iph->protocol           =       IPPROTO_IPIP;
-        iph->tos                =       tos;
-        iph->daddr              =       rt->rt_dst;
-        iph->saddr              =       rt->rt_src;
-        iph->ttl                =       old_iph->ttl;
-        ip_select_ident(iph, &rt->u.dst, NULL);
-        /* Another hack: avoid icmp_send in ip_fragment */
-        skb->local_df = 1;
-        ip_local_out(skb);
-        LeaveFunction(10);
-        return NF_STOLEN;
-  tx_error_icmp:
-        dst_link_failure(skb);
-  tx_error:
-        kfree_skb(skb);
-        LeaveFunction(10);
-        return NF_STOLEN;
-}
-/*
- *      Direct Routing transmitter
- *      Used for ANY protocol
- */
-int
-ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-              struct ip_vs_protocol *pp)
-{
-        struct rtable *rt;                      /* Route to the other host */
-        struct iphdr  *iph = ip_hdr(skb);
-        int    mtu;
-        EnterFunction(10);
-        if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
-                goto tx_error_icmp;
-        /* MTU checking */
-        mtu = dst_mtu(&rt->u.dst);
-        if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
-                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-                ip_rt_put(rt);
-                IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
-                goto tx_error;
-        }
-        /*
-         * Call ip_send_check because we are not sure it is called
-         * after ip_defrag. Is copy-on-write needed?
-         */
-        if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
-                ip_rt_put(rt);
-                return NF_STOLEN;
-        }
-        ip_send_check(ip_hdr(skb));
-        /* drop old route */
-        dst_release(skb->dst);
-        skb->dst = &rt->u.dst;
-        /* Another hack: avoid icmp_send in ip_fragment */
-        skb->local_df = 1;
-        IP_VS_XMIT(skb, rt);
-        LeaveFunction(10);
-        return NF_STOLEN;
-  tx_error_icmp:
-        dst_link_failure(skb);
-  tx_error:
-        kfree_skb(skb);
-        LeaveFunction(10);
-        return NF_STOLEN;
-}
-/*
- *      ICMP packet transmitter
- *      called by the ip_vs_in_icmp
- */
-int
-ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
-                struct ip_vs_protocol *pp, int offset)
-{
-        struct rtable   *rt;    /* Route to the other host */
-        int mtu;
-        int rc;
-        EnterFunction(10);
-        /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
-           forwarded directly here, because there is no need to
-           translate address/port back */
-        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
-                if (cp->packet_xmit)
-                        rc = cp->packet_xmit(skb, cp, pp);
-                else
-                        rc = NF_ACCEPT;
-                /* do not touch skb anymore */
-                atomic_inc(&cp->in_pkts);
-                goto out;
-        }
-        /*
-         * mangle and send the packet here (only for VS/NAT)
-         */
-        if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
-                goto tx_error_icmp;
-        /* MTU checking */
-        mtu = dst_mtu(&rt->u.dst);
-        if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
-                ip_rt_put(rt);
-                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-                IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
-                goto tx_error;
-        }
-        /* copy-on-write the packet before mangling it */
-        if (!skb_make_writable(skb, offset))
-                goto tx_error_put;
-        if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
-                goto tx_error_put;
-        /* drop the old route when skb is not shared */
-        dst_release(skb->dst);
-        skb->dst = &rt->u.dst;
-        ip_vs_nat_icmp(skb, pp, cp, 0);
-        /* Another hack: avoid icmp_send in ip_fragment */
-        skb->local_df = 1;
-        IP_VS_XMIT(skb, rt);
-        rc = NF_STOLEN;
-        goto out;
-  tx_error_icmp:
-        dst_link_failure(skb);
-  tx_error:
-        dev_kfree_skb(skb);
-        rc = NF_STOLEN;
-  out:
-        LeaveFunction(10);
-        return rc;
-  tx_error_put:
-        ip_rt_put(rt);
-        goto tx_error;
-}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f8edacdf991d..6efdb70b3eb2 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -12,6 +12,7 @@
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 {
+        struct net *net = dev_net(skb->dst->dev);
        const struct iphdr *iph = ip_hdr(skb);
        struct rtable *rt;
        struct flowi fl = {};
@@ -19,7 +20,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
        unsigned int hh_len;
        unsigned int type;
-        type = inet_addr_type(&init_net, iph->saddr);
+        type = inet_addr_type(net, iph->saddr);
+        if (skb->sk && inet_sk(skb->sk)->transparent)
+                type = RTN_LOCAL;
        if (addr_type == RTN_UNSPEC)
                addr_type = type;
@@ -33,7 +36,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
                fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
                fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
                fl.mark = skb->mark;
-                if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+                fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
+                if (ip_route_output_key(net, &rt, &fl) != 0)
                        return -1;
                /* Drop old route. */
@@ -43,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
                /* non-local src, find valid iif to satisfy
                 * rp-filter when calling ip_route_input. */
                fl.nl_u.ip4_u.daddr = iph->saddr;
-                if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+                if (ip_route_output_key(net, &rt, &fl) != 0)
                        return -1;
                odst = skb->dst;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 90eb7cb47e77..3816e1dc9295 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -5,10 +5,15 @@
 menu "IP: Netfilter Configuration"
        depends on INET && NETFILTER
+config NF_DEFRAG_IPV4
+        tristate
+        default n
 config NF_CONNTRACK_IPV4
        tristate "IPv4 connection tracking support (required for NAT)"
        depends on NF_CONNTRACK
        default m if NETFILTER_ADVANCED=n
+        select NF_DEFRAG_IPV4
        ---help---
          Connection tracking keeps a record of what packets have passed
          through your machine, in order to figure out how they are related
@@ -56,23 +61,30 @@ config IP_NF_IPTABLES
          To compile it as a module, choose M here.  If unsure, say N.
+if IP_NF_IPTABLES
 # The matches.
-config IP_NF_MATCH_RECENT
+config IP_NF_MATCH_ADDRTYPE
-        tristate '"recent" match support'
+        tristate '"addrtype" address type match support'
-        depends on IP_NF_IPTABLES
        depends on NETFILTER_ADVANCED
        help
-          This match is used for creating one or many lists of recently
+          This option allows you to match what routing thinks of an address,
-          used addresses and then matching against that/those list(s).
+          eg. UNICAST, LOCAL, BROADCAST, ...
-          Short options are available by using 'iptables -m recent -h'
+          If you want to compile it as a module, say M here and read
-          Official Website: <http://snowman.net/projects/ipt_recent/>
+          <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+config IP_NF_MATCH_AH
+        tristate '"ah" match support'
+        depends on NETFILTER_ADVANCED
+        help
+          This match extension allows you to match a range of SPIs
+          inside AH header of IPSec packets.
          To compile it as a module, choose M here.  If unsure, say N.
 config IP_NF_MATCH_ECN
        tristate '"ecn" match support'
-        depends on IP_NF_IPTABLES
        depends on NETFILTER_ADVANCED
        help
          This option adds a `ECN' match, which allows you to match against
@@ -80,19 +92,8 @@ config IP_NF_MATCH_ECN
          To compile it as a module, choose M here.  If unsure, say N.
-config IP_NF_MATCH_AH
-        tristate '"ah" match support'
-        depends on IP_NF_IPTABLES
-        depends on NETFILTER_ADVANCED
-        help
-          This match extension allows you to match a range of SPIs
-          inside AH header of IPSec packets.
-          To compile it as a module, choose M here.  If unsure, say N.
 config IP_NF_MATCH_TTL
        tristate '"ttl" match support'
-        depends on IP_NF_IPTABLES
        depends on NETFILTER_ADVANCED
        help
          This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
@@ -100,21 +101,9 @@ config IP_NF_MATCH_TTL
          To compile it as a module, choose M here.  If unsure, say N.
-config IP_NF_MATCH_ADDRTYPE
-        tristate '"addrtype" address type match support'
-        depends on IP_NF_IPTABLES
-        depends on NETFILTER_ADVANCED
-        help
-          This option allows you to match what routing thinks of an address,
-          eg. UNICAST, LOCAL, BROADCAST, ...
-          If you want to compile it as a module, say M here and read
-          <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 # `filter', generic and specific targets
 config IP_NF_FILTER
        tristate "Packet filtering"
-        depends on IP_NF_IPTABLES
        default m if NETFILTER_ADVANCED=n
        help
          Packet filtering defines a table `filter', which has a series of
@@ -136,7 +125,6 @@ config IP_NF_TARGET_REJECT
 config IP_NF_TARGET_LOG
        tristate "LOG target support"
-        depends on IP_NF_IPTABLES
        default m if NETFILTER_ADVANCED=n
        help
          This option adds a `LOG' target, which allows you to create rules in
@@ -146,7 +134,6 @@ config IP_NF_TARGET_LOG
 config IP_NF_TARGET_ULOG
        tristate "ULOG target support"
-        depends on IP_NF_IPTABLES
        default m if NETFILTER_ADVANCED=n
        ---help---
@@ -167,7 +154,7 @@ config IP_NF_TARGET_ULOG
 # NAT + specific targets: nf_conntrack
 config NF_NAT
        tristate "Full NAT"
-        depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4
+        depends on NF_CONNTRACK_IPV4
        default m if NETFILTER_ADVANCED=n
        help
          The Full NAT option allows masquerading, port forwarding and other
@@ -194,26 +181,26 @@ config IP_NF_TARGET_MASQUERADE
          To compile it as a module, choose M here.  If unsure, say N.
-config IP_NF_TARGET_REDIRECT
+config IP_NF_TARGET_NETMAP
-        tristate "REDIRECT target support"
+        tristate "NETMAP target support"
        depends on NF_NAT
        depends on NETFILTER_ADVANCED
        help
-          REDIRECT is a special case of NAT: all incoming connections are
+          NETMAP is an implementation of static 1:1 NAT mapping of network
-          mapped onto the incoming interface's address, causing the packets to
+          addresses. It maps the network address part, while keeping the host
-          come to the local machine instead of passing through.  This is
+          address part intact.
-          useful for transparent proxies.
          To compile it as a module, choose M here.  If unsure, say N.
-config IP_NF_TARGET_NETMAP
+config IP_NF_TARGET_REDIRECT
-        tristate "NETMAP target support"
+        tristate "REDIRECT target support"
        depends on NF_NAT
        depends on NETFILTER_ADVANCED
        help
-          NETMAP is an implementation of static 1:1 NAT mapping of network
+          REDIRECT is a special case of NAT: all incoming connections are
-          addresses. It maps the network address part, while keeping the host
+          mapped onto the incoming interface's address, causing the packets to
-          address part intact.
+          come to the local machine instead of passing through.  This is
+          useful for transparent proxies.
          To compile it as a module, choose M here.  If unsure, say N.
@@ -262,44 +249,43 @@ config NF_NAT_PROTO_SCTP
 config NF_NAT_FTP
        tristate
-        depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+        depends on NF_CONNTRACK && NF_NAT
        default NF_NAT && NF_CONNTRACK_FTP
 config NF_NAT_IRC
        tristate
-        depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+        depends on NF_CONNTRACK && NF_NAT
        default NF_NAT && NF_CONNTRACK_IRC
 config NF_NAT_TFTP
        tristate
-        depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+        depends on NF_CONNTRACK && NF_NAT
        default NF_NAT && NF_CONNTRACK_TFTP
 config NF_NAT_AMANDA
        tristate
-        depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+        depends on NF_CONNTRACK && NF_NAT
        default NF_NAT && NF_CONNTRACK_AMANDA
 config NF_NAT_PPTP
        tristate
-        depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+        depends on NF_CONNTRACK && NF_NAT
        default NF_NAT && NF_CONNTRACK_PPTP
        select NF_NAT_PROTO_GRE
 config NF_NAT_H323
        tristate
-        depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+        depends on NF_CONNTRACK && NF_NAT
        default NF_NAT && NF_CONNTRACK_H323
 config NF_NAT_SIP
        tristate
-        depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
+        depends on NF_CONNTRACK && NF_NAT
        default NF_NAT && NF_CONNTRACK_SIP
 # mangle + specific targets
 config IP_NF_MANGLE
        tristate "Packet mangling"
-        depends on IP_NF_IPTABLES
        default m if NETFILTER_ADVANCED=n
        help
          This option adds a `mangle' table to iptables: see the man page for
@@ -308,6 +294,19 @@ config IP_NF_MANGLE
          To compile it as a module, choose M here.  If unsure, say N.
+config IP_NF_TARGET_CLUSTERIP
+        tristate "CLUSTERIP target support (EXPERIMENTAL)"
+        depends on IP_NF_MANGLE && EXPERIMENTAL
+        depends on NF_CONNTRACK_IPV4
+        depends on NETFILTER_ADVANCED
+        select NF_CONNTRACK_MARK
+        help
+          The CLUSTERIP target allows you to build load-balancing clusters of
+          network servers without having a dedicated load-balancing
+          router/server/switch.
+        
+          To compile it as a module, choose M here.  If unsure, say N.
 config IP_NF_TARGET_ECN
        tristate "ECN target support"
        depends on IP_NF_MANGLE
@@ -338,23 +337,9 @@ config IP_NF_TARGET_TTL
          To compile it as a module, choose M here.  If unsure, say N.
-config IP_NF_TARGET_CLUSTERIP
-        tristate "CLUSTERIP target support (EXPERIMENTAL)"
-        depends on IP_NF_MANGLE && EXPERIMENTAL
-        depends on NF_CONNTRACK_IPV4
-        depends on NETFILTER_ADVANCED
-        select NF_CONNTRACK_MARK
-        help
-          The CLUSTERIP target allows you to build load-balancing clusters of
-          network servers without having a dedicated load-balancing
-          router/server/switch.
-        
-          To compile it as a module, choose M here.  If unsure, say N.
 # raw + specific targets
 config IP_NF_RAW
        tristate  'raw table support (required for NOTRACK/TRACE)'
-        depends on IP_NF_IPTABLES
        depends on NETFILTER_ADVANCED
        help
          This option adds a `raw' table to iptables. This table is the very
@@ -367,7 +352,6 @@ config IP_NF_RAW
 # security table for MAC policy
 config IP_NF_SECURITY
        tristate "Security table"
-        depends on IP_NF_IPTABLES
        depends on SECURITY
        depends on NETFILTER_ADVANCED
        help
@@ -376,6 +360,8 @@ config IP_NF_SECURITY
         
          If unsure, say N.
+endif # IP_NF_IPTABLES
 # ARP tables
 config IP_NF_ARPTABLES
        tristate "ARP tables support"
@@ -388,9 +374,10 @@ config IP_NF_ARPTABLES
          To compile it as a module, choose M here.  If unsure, say N.
+if IP_NF_ARPTABLES
 config IP_NF_ARPFILTER
        tristate "ARP packet filtering"
-        depends on IP_NF_ARPTABLES
        help
          ARP packet filtering defines a table `filter', which has a series of
          rules for simple ARP packet filtering at local input and
@@ -401,10 +388,11 @@ config IP_NF_ARPFILTER
 config IP_NF_ARP_MANGLE
        tristate "ARP payload mangling"
-        depends on IP_NF_ARPTABLES
        help
          Allows altering the ARP packet payload: source and destination
          hardware and network addresses.
+endif # IP_NF_ARPTABLES
 endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3f31291f37ce..5f9b650d90fc 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -18,6 +18,9 @@ obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
 obj-$(CONFIG_NF_NAT) += nf_nat.o
+# defrag
+obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
 # NAT helpers (nf_conntrack)
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
 obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
@@ -48,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
 # targets
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 03e83a65aec5..8d70d29f1ccf 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -200,15 +200,12 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
        return 1;
 }
-static unsigned int arpt_error(struct sk_buff *skb,
+static unsigned int
-                               const struct net_device *in,
+arpt_error(struct sk_buff *skb, const struct xt_target_param *par)
-                               const struct net_device *out,
-                               unsigned int hooknum,
-                               const struct xt_target *target,
-                               const void *targinfo)
 {
        if (net_ratelimit())
-                printk("arp_tables: error: '%s'\n", (char *)targinfo);
+                printk("arp_tables: error: '%s'\n",
+                       (const char *)par->targinfo);
        return NF_DROP;
 }
@@ -232,6 +229,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
        const char *indev, *outdev;
        void *table_base;
        const struct xt_table_info *private;
+        struct xt_target_param tgpar;
        if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
                return NF_DROP;
@@ -245,6 +243,11 @@ unsigned int arpt_do_table(struct sk_buff *skb,
        e = get_entry(table_base, private->hook_entry[hook]);
        back = get_entry(table_base, private->underflow[hook]);
+        tgpar.in      = in;
+        tgpar.out     = out;
+        tgpar.hooknum = hook;
+        tgpar.family  = NFPROTO_ARP;
        arp = arp_hdr(skb);
        do {
                if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
@@ -290,11 +293,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
                                /* Targets which reenter must return
                                 * abs. verdicts
                                 */
+                                tgpar.target   = t->u.kernel.target;
+                                tgpar.targinfo = t->data;
                                verdict = t->u.kernel.target->target(skb,
-                                                                     in, out,
+                                                                     &tgpar);
-                                                                     hook,
-                                                                     t->u.kernel.target,
-                                                                     t->data);
                                /* Target might have changed stuff. */
                                arp = arp_hdr(skb);
@@ -456,23 +458,24 @@ static inline int check_entry(struct arpt_entry *e, const char *name)
 static inline int check_target(struct arpt_entry *e, const char *name)
 {
-        struct arpt_entry_target *t;
+        struct arpt_entry_target *t = arpt_get_target(e);
-        struct xt_target *target;
        int ret;
+        struct xt_tgchk_param par = {
-        t = arpt_get_target(e);
+                .table     = name,
-        target = t->u.kernel.target;
+                .entryinfo = e,
+                .target    = t->u.kernel.target,
-        ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t),
+                .targinfo  = t->data,
-                              name, e->comefrom, 0, 0);
+                .hook_mask = e->comefrom,
-        if (!ret && t->u.kernel.target->checkentry
+                .family    = NFPROTO_ARP,
-            && !t->u.kernel.target->checkentry(name, e, target, t->data,
+        };
-                                               e->comefrom)) {
+        ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
+        if (ret < 0) {
                duprintf("arp_tables: check failed for `%s'.\n",
                         t->u.kernel.target->name);
-                ret = -EINVAL;
+                return ret;
        }
-        return ret;
+        return 0;
 }
 static inline int
@@ -488,7 +491,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
                return ret;
        t = arpt_get_target(e);
-        target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
+        target = try_then_request_module(xt_find_target(NFPROTO_ARP,
+                                                        t->u.user.name,
                                                        t->u.user.revision),
                                         "arpt_%s", t->u.user.name);
        if (IS_ERR(target) || !target) {
@@ -554,15 +558,19 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
 static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
 {
+        struct xt_tgdtor_param par;
        struct arpt_entry_target *t;
        if (i && (*i)-- == 0)
                return 1;
        t = arpt_get_target(e);
-        if (t->u.kernel.target->destroy)
+        par.target   = t->u.kernel.target;
-                t->u.kernel.target->destroy(t->u.kernel.target, t->data);
+        par.targinfo = t->data;
-        module_put(t->u.kernel.target->me);
+        par.family   = NFPROTO_ARP;
+        if (par.target->destroy != NULL)
+                par.target->destroy(&par);
+        module_put(par.target->me);
        return 0;
 }
@@ -788,7 +796,7 @@ static void compat_standard_from_user(void *dst, void *src)
        int v = *(compat_int_t *)src;
        if (v > 0)
-                v += xt_compat_calc_jump(NF_ARP, v);
+                v += xt_compat_calc_jump(NFPROTO_ARP, v);
        memcpy(dst, &v, sizeof(v));
 }
@@ -797,7 +805,7 @@ static int compat_standard_to_user(void __user *dst, void *src)
        compat_int_t cv = *(int *)src;
        if (cv > 0)
-                cv -= xt_compat_calc_jump(NF_ARP, cv);
+                cv -= xt_compat_calc_jump(NFPROTO_ARP, cv);
        return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
 }
@@ -815,7 +823,7 @@ static int compat_calc_entry(struct arpt_entry *e,
        t = arpt_get_target(e);
        off += xt_compat_target_offset(t->u.kernel.target);
        newinfo->size -= off;
-        ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
+        ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
        if (ret)
                return ret;
@@ -866,9 +874,9 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
        name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
 #ifdef CONFIG_COMPAT
        if (compat)
-                xt_compat_lock(NF_ARP);
+                xt_compat_lock(NFPROTO_ARP);
 #endif
-        t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
+        t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
                                    "arptable_%s", name);
        if (t && !IS_ERR(t)) {
                struct arpt_getinfo info;
@@ -878,7 +886,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
                if (compat) {
                        struct xt_table_info tmp;
                        ret = compat_table_info(private, &tmp);
-                        xt_compat_flush_offsets(NF_ARP);
+                        xt_compat_flush_offsets(NFPROTO_ARP);
                        private = &tmp;
                }
 #endif
@@ -901,7 +909,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
                ret = t ? PTR_ERR(t) : -ENOENT;
 #ifdef CONFIG_COMPAT
        if (compat)
-                xt_compat_unlock(NF_ARP);
+                xt_compat_unlock(NFPROTO_ARP);
 #endif
        return ret;
 }
@@ -925,7 +933,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
                return -EINVAL;
        }
-        t = xt_find_table_lock(net, NF_ARP, get.name);
+        t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
        if (t && !IS_ERR(t)) {
                const struct xt_table_info *private = t->private;
@@ -967,7 +975,7 @@ static int __do_replace(struct net *net, const char *name,
                goto out;
        }
-        t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
+        t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
                                    "arptable_%s", name);
        if (!t || IS_ERR(t)) {
                ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1134,7 +1142,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
                goto free;
        }
-        t = xt_find_table_lock(net, NF_ARP, name);
+        t = xt_find_table_lock(net, NFPROTO_ARP, name);
        if (!t || IS_ERR(t)) {
                ret = t ? PTR_ERR(t) : -ENOENT;
                goto free;
@@ -1218,7 +1226,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
        entry_offset = (void *)e - (void *)base;
        t = compat_arpt_get_target(e);
-        target = try_then_request_module(xt_find_target(NF_ARP,
+        target = try_then_request_module(xt_find_target(NFPROTO_ARP,
                                                        t->u.user.name,
                                                        t->u.user.revision),
                                         "arpt_%s", t->u.user.name);
@@ -1232,7 +1240,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
        off += xt_compat_target_offset(target);
        *size += off;
-        ret = xt_compat_add_offset(NF_ARP, entry_offset, off);
+        ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
        if (ret)
                goto release_target;
@@ -1333,7 +1341,7 @@ static int translate_compat_table(const char *name,
        duprintf("translate_compat_table: size %u\n", info->size);
        j = 0;
-        xt_compat_lock(NF_ARP);
+        xt_compat_lock(NFPROTO_ARP);
        /* Walk through entries, checking offsets. */
        ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
                                        check_compat_entry_size_and_hooks,
@@ -1383,8 +1391,8 @@ static int translate_compat_table(const char *name,
        ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
                                        compat_copy_entry_from_user,
                                        &pos, &size, name, newinfo, entry1);
-        xt_compat_flush_offsets(NF_ARP);
+        xt_compat_flush_offsets(NFPROTO_ARP);
-        xt_compat_unlock(NF_ARP);
+        xt_compat_unlock(NFPROTO_ARP);
        if (ret)
                goto free_newinfo;
@@ -1420,8 +1428,8 @@ out:
        COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
        return ret;
 out_unlock:
-        xt_compat_flush_offsets(NF_ARP);
+        xt_compat_flush_offsets(NFPROTO_ARP);
-        xt_compat_unlock(NF_ARP);
+        xt_compat_unlock(NFPROTO_ARP);
        goto out;
 }
@@ -1607,8 +1615,8 @@ static int compat_get_entries(struct net *net,
                return -EINVAL;
        }
-        xt_compat_lock(NF_ARP);
+        xt_compat_lock(NFPROTO_ARP);
-        t = xt_find_table_lock(net, NF_ARP, get.name);
+        t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
        if (t && !IS_ERR(t)) {
                const struct xt_table_info *private = t->private;
                struct xt_table_info info;
@@ -1623,13 +1631,13 @@ static int compat_get_entries(struct net *net,
                                 private->size, get.size);
                        ret = -EAGAIN;
                }
-                xt_compat_flush_offsets(NF_ARP);
+                xt_compat_flush_offsets(NFPROTO_ARP);
                module_put(t->me);
                xt_table_unlock(t);
        } else
                ret = t ? PTR_ERR(t) : -ENOENT;
-        xt_compat_unlock(NF_ARP);
+        xt_compat_unlock(NFPROTO_ARP);
        return ret;
 }
@@ -1709,7 +1717,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
                        break;
                }
-                try_then_request_module(xt_find_revision(NF_ARP, rev.name,
+                try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
                                                         rev.revision, 1, &ret),
                                        "arpt_%s", rev.name);
                break;
@@ -1787,7 +1795,7 @@ void arpt_unregister_table(struct xt_table *table)
 static struct xt_target arpt_standard_target __read_mostly = {
        .name           = ARPT_STANDARD_TARGET,
        .targetsize     = sizeof(int),
-        .family         = NF_ARP,
+        .family         = NFPROTO_ARP,
 #ifdef CONFIG_COMPAT
        .compatsize     = sizeof(compat_int_t),
        .compat_from_user = compat_standard_from_user,
@@ -1799,7 +1807,7 @@ static struct xt_target arpt_error_target __read_mostly = {
        .name           = ARPT_ERROR_TARGET,
        .target         = arpt_error,
        .targetsize     = ARPT_FUNCTION_MAXNAMELEN,
-        .family         = NF_ARP,
+        .family         = NFPROTO_ARP,
 };
 static struct nf_sockopt_ops arpt_sockopts = {
@@ -1821,12 +1829,12 @@ static struct nf_sockopt_ops arpt_sockopts = {
 static int __net_init arp_tables_net_init(struct net *net)
 {
-        return xt_proto_init(net, NF_ARP);
+        return xt_proto_init(net, NFPROTO_ARP);
 }
 static void __net_exit arp_tables_net_exit(struct net *net)
 {
-        xt_proto_fini(net, NF_ARP);
+        xt_proto_fini(net, NFPROTO_ARP);
 }
 static struct pernet_operations arp_tables_net_ops = {
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index a385959d2655..b0d5b1d0a769 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -9,12 +9,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
 MODULE_DESCRIPTION("arptables arp payload mangle target");
 static unsigned int
-target(struct sk_buff *skb,
+target(struct sk_buff *skb, const struct xt_target_param *par)
-       const struct net_device *in, const struct net_device *out,
-       unsigned int hooknum, const struct xt_target *target,
-       const void *targinfo)
 {
-        const struct arpt_mangle *mangle = targinfo;
+        const struct arpt_mangle *mangle = par->targinfo;
        const struct arphdr *arp;
        unsigned char *arpptr;
        int pln, hln;
@@ -57,11 +54,9 @@ target(struct sk_buff *skb,
        return mangle->target;
 }
-static bool
+static bool checkentry(const struct xt_tgchk_param *par)
-checkentry(const char *tablename, const void *e, const struct xt_target *target,
-           void *targinfo, unsigned int hook_mask)
 {
-        const struct arpt_mangle *mangle = targinfo;
+        const struct arpt_mangle *mangle = par->targinfo;
        if (mangle->flags & ~ARPT_MANGLE_MASK ||
            !(mangle->flags & ARPT_MANGLE_MASK))
@@ -75,7 +70,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target,
 static struct xt_target arpt_mangle_reg __read_mostly = {
        .name           = "mangle",
-        .family         = NF_ARP,
+        .family         = NFPROTO_ARP,
        .target         = target,
        .targetsize     = sizeof(struct arpt_mangle),
        .checkentry     = checkentry,
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 082f5dd3156c..bee3d117661a 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -51,7 +51,7 @@ static struct xt_table packet_filter = {
        .lock           = __RW_LOCK_UNLOCKED(packet_filter.lock),
        .private        = NULL,
        .me             = THIS_MODULE,
-        .af             = NF_ARP,
+        .af             = NFPROTO_ARP,
 };
 /* The work comes in here from netfilter.c */
@@ -89,21 +89,21 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = {
        {
                .hook           = arpt_in_hook,
                .owner          = THIS_MODULE,
-                .pf             = NF_ARP,
+                .pf             = NFPROTO_ARP,
                .hooknum        = NF_ARP_IN,
                .priority       = NF_IP_PRI_FILTER,
        },
        {
                .hook           = arpt_out_hook,
                .owner          = THIS_MODULE,
-                .pf             = NF_ARP,
+                .pf             = NFPROTO_ARP,
                .hooknum        = NF_ARP_OUT,
                .priority       = NF_IP_PRI_FILTER,
        },
        {
                .hook           = arpt_forward_hook,
                .owner          = THIS_MODULE,
-                .pf             = NF_ARP,
+                .pf             = NFPROTO_ARP,
                .hooknum        = NF_ARP_FORWARD,
                .priority       = NF_IP_PRI_FILTER,
        },
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4e7c719445c2..213fb27debc1 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -171,31 +171,25 @@ ip_checkentry(const struct ipt_ip *ip)
 }
 static unsigned int
-ipt_error(struct sk_buff *skb,
+ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
-          const struct net_device *in,
-          const struct net_device *out,
-          unsigned int hooknum,
-          const struct xt_target *target,
-          const void *targinfo)
 {
        if (net_ratelimit())
-                printk("ip_tables: error: `%s'\n", (char *)targinfo);
+                printk("ip_tables: error: `%s'\n",
+                       (const char *)par->targinfo);
        return NF_DROP;
 }
 /* Performance critical - called for every packet */
 static inline bool
-do_match(struct ipt_entry_match *m,
+do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
-              const struct sk_buff *skb,
+         struct xt_match_param *par)
-              const struct net_device *in,
-              const struct net_device *out,
-              int offset,
-              bool *hotdrop)
 {
+        par->match     = m->u.kernel.match;
+        par->matchinfo = m->data;
        /* Stop iteration if it doesn't match */
-        if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
+        if (!m->u.kernel.match->match(skb, par))
-                                      offset, ip_hdrlen(skb), hotdrop))
                return true;
        else
                return false;
@@ -326,7 +320,6 @@ ipt_do_table(struct sk_buff *skb,
             struct xt_table *table)
 {
        static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
-        u_int16_t offset;
        const struct iphdr *ip;
        u_int16_t datalen;
        bool hotdrop = false;
@@ -336,6 +329,8 @@ ipt_do_table(struct sk_buff *skb,
        void *table_base;
        struct ipt_entry *e, *back;
        struct xt_table_info *private;
+        struct xt_match_param mtpar;
+        struct xt_target_param tgpar;
        /* Initialization */
        ip = ip_hdr(skb);
@@ -348,7 +343,13 @@ ipt_do_table(struct sk_buff *skb,
         * things we don't know, ie. tcp syn flag or ports).  If the
         * rule is also a fragment-specific rule, non-fragments won't
         * match it. */
-        offset = ntohs(ip->frag_off) & IP_OFFSET;
+        mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+        mtpar.thoff   = ip_hdrlen(skb);
+        mtpar.hotdrop = &hotdrop;
+        mtpar.in      = tgpar.in  = in;
+        mtpar.out     = tgpar.out = out;
+        mtpar.family  = tgpar.family = NFPROTO_IPV4;
+        tgpar.hooknum = hook;
        read_lock_bh(&table->lock);
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
@@ -362,12 +363,11 @@ ipt_do_table(struct sk_buff *skb,
        do {
                IP_NF_ASSERT(e);
                IP_NF_ASSERT(back);
-                if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
+                if (ip_packet_match(ip, indev, outdev,
+                    &e->ip, mtpar.fragoff)) {
                        struct ipt_entry_target *t;
-                        if (IPT_MATCH_ITERATE(e, do_match,
+                        if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
-                                              skb, in, out,
-                                              offset, &hotdrop) != 0)
                                goto no_match;
                        ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
@@ -413,16 +413,14 @@ ipt_do_table(struct sk_buff *skb,
                        } else {
                                /* Targets which reenter must return
                                   abs. verdicts */
+                                tgpar.target   = t->u.kernel.target;
+                                tgpar.targinfo = t->data;
 #ifdef CONFIG_NETFILTER_DEBUG
                                ((struct ipt_entry *)table_base)->comefrom
                                        = 0xeeeeeeec;
 #endif
                                verdict = t->u.kernel.target->target(skb,
-                                                                     in, out,
+                                                                     &tgpar);
-                                                                     hook,
-                                                                     t->u.kernel.target,
-                                                                     t->data);
 #ifdef CONFIG_NETFILTER_DEBUG
                                if (((struct ipt_entry *)table_base)->comefrom
                                    != 0xeeeeeeec
@@ -575,12 +573,17 @@ mark_source_chains(struct xt_table_info *newinfo,
 static int
 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
 {
+        struct xt_mtdtor_param par;
        if (i && (*i)-- == 0)
                return 1;
-        if (m->u.kernel.match->destroy)
+        par.match     = m->u.kernel.match;
-                m->u.kernel.match->destroy(m->u.kernel.match, m->data);
+        par.matchinfo = m->data;
-        module_put(m->u.kernel.match->me);
+        par.family    = NFPROTO_IPV4;
+        if (par.match->destroy != NULL)
+                par.match->destroy(&par);
+        module_put(par.match->me);
        return 0;
 }
@@ -606,34 +609,28 @@ check_entry(struct ipt_entry *e, const char *name)
 }
 static int
-check_match(struct ipt_entry_match *m, const char *name,
+check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
-                              const struct ipt_ip *ip,
+            unsigned int *i)
-                              unsigned int hookmask, unsigned int *i)
 {
-        struct xt_match *match;
+        const struct ipt_ip *ip = par->entryinfo;
        int ret;
-        match = m->u.kernel.match;
+        par->match     = m->u.kernel.match;
-        ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
+        par->matchinfo = m->data;
-                             name, hookmask, ip->proto,
-                             ip->invflags & IPT_INV_PROTO);
+        ret = xt_check_match(par, m->u.match_size - sizeof(*m),
-        if (!ret && m->u.kernel.match->checkentry
+              ip->proto, ip->invflags & IPT_INV_PROTO);
-            && !m->u.kernel.match->checkentry(name, ip, match, m->data,
+        if (ret < 0) {
-                                              hookmask)) {
                duprintf("ip_tables: check failed for `%s'.\n",
-                         m->u.kernel.match->name);
+                         par.match->name);
-                ret = -EINVAL;
+                return ret;
        }
-        if (!ret)
+        ++*i;
-                (*i)++;
+        return 0;
-        return ret;
 }
 static int
-find_check_match(struct ipt_entry_match *m,
+find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
-                 const char *name,
-                 const struct ipt_ip *ip,
-                 unsigned int hookmask,
                 unsigned int *i)
 {
        struct xt_match *match;
@@ -648,7 +645,7 @@ find_check_match(struct ipt_entry_match *m,
        }
        m->u.kernel.match = match;
-        ret = check_match(m, name, ip, hookmask, i);
+        ret = check_match(m, par, i);
        if (ret)
                goto err;
@@ -660,23 +657,25 @@ err:
 static int check_target(struct ipt_entry *e, const char *name)
 {
-        struct ipt_entry_target *t;
+        struct ipt_entry_target *t = ipt_get_target(e);
-        struct xt_target *target;
+        struct xt_tgchk_param par = {
+                .table     = name,
+                .entryinfo = e,
+                .target    = t->u.kernel.target,
+                .targinfo  = t->data,
+                .hook_mask = e->comefrom,
+                .family    = NFPROTO_IPV4,
+        };
        int ret;
-        t = ipt_get_target(e);
+        ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
-        target = t->u.kernel.target;
+              e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
-        ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
+        if (ret < 0) {
-                              name, e->comefrom, e->ip.proto,
-                              e->ip.invflags & IPT_INV_PROTO);
-        if (!ret && t->u.kernel.target->checkentry
-            && !t->u.kernel.target->checkentry(name, e, target, t->data,
-                                               e->comefrom)) {
                duprintf("ip_tables: check failed for `%s'.\n",
                         t->u.kernel.target->name);
-                ret = -EINVAL;
+                return ret;
        }
-        return ret;
+        return 0;
 }
 static int
@@ -687,14 +686,18 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
        struct xt_target *target;
        int ret;
        unsigned int j;
+        struct xt_mtchk_param mtpar;
        ret = check_entry(e, name);
        if (ret)
                return ret;
        j = 0;
-        ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip,
+        mtpar.table     = name;
-                                e->comefrom, &j);
+        mtpar.entryinfo = &e->ip;
+        mtpar.hook_mask = e->comefrom;
+        mtpar.family    = NFPROTO_IPV4;
+        ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
        if (ret != 0)
                goto cleanup_matches;
@@ -769,6 +772,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
 static int
 cleanup_entry(struct ipt_entry *e, unsigned int *i)
 {
+        struct xt_tgdtor_param par;
        struct ipt_entry_target *t;
        if (i && (*i)-- == 0)
@@ -777,9 +781,13 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
        /* Cleanup all matches */
        IPT_MATCH_ITERATE(e, cleanup_match, NULL);
        t = ipt_get_target(e);
-        if (t->u.kernel.target->destroy)
-                t->u.kernel.target->destroy(t->u.kernel.target, t->data);
+        par.target   = t->u.kernel.target;
-        module_put(t->u.kernel.target->me);
+        par.targinfo = t->data;
+        par.family   = NFPROTO_IPV4;
+        if (par.target->destroy != NULL)
+                par.target->destroy(&par);
+        module_put(par.target->me);
        return 0;
 }
@@ -1648,12 +1656,16 @@ static int
 compat_check_entry(struct ipt_entry *e, const char *name,
                                     unsigned int *i)
 {
+        struct xt_mtchk_param mtpar;
        unsigned int j;
        int ret;
        j = 0;
-        ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip,
+        mtpar.table     = name;
-                                e->comefrom, &j);
+        mtpar.entryinfo = &e->ip;
+        mtpar.hook_mask = e->comefrom;
+        mtpar.family    = NFPROTO_IPV4;
+        ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
        if (ret)
                goto cleanup_matches;
@@ -2121,30 +2133,23 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 static bool
-icmp_match(const struct sk_buff *skb,
+icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
-           const struct net_device *in,
-           const struct net_device *out,
-           const struct xt_match *match,
-           const void *matchinfo,
-           int offset,
-           unsigned int protoff,
-           bool *hotdrop)
 {
        const struct icmphdr *ic;
        struct icmphdr _icmph;
-        const struct ipt_icmp *icmpinfo = matchinfo;
+        const struct ipt_icmp *icmpinfo = par->matchinfo;
        /* Must not be a fragment. */
-        if (offset)
+        if (par->fragoff != 0)
                return false;
-        ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
+        ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
        if (ic == NULL) {
                /* We've been asked to examine this packet, and we
                 * can't.  Hence, no choice but to drop.
                 */
                duprintf("Dropping evil ICMP tinygram.\n");
-                *hotdrop = true;
+                *par->hotdrop = true;
                return false;
        }
@@ -2155,15 +2160,9 @@ icmp_match(const struct sk_buff *skb,
                                    !!(icmpinfo->invflags&IPT_ICMP_INV));
 }
-/* Called when user tries to insert an entry of this type. */
+static bool icmp_checkentry(const struct xt_mtchk_param *par)
-static bool
-icmp_checkentry(const char *tablename,
-           const void *entry,
-           const struct xt_match *match,
-           void *matchinfo,
-           unsigned int hook_mask)
 {
-        const struct ipt_icmp *icmpinfo = matchinfo;
+        const struct ipt_icmp *icmpinfo = par->matchinfo;
        /* Must specify no unknown invflags */
        return !(icmpinfo->invflags & ~IPT_ICMP_INV);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index fafe8ebb4c55..7ac1677419a9 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -281,11 +281,9 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
 ***********************************************************************/
 static unsigned int
-clusterip_tg(struct sk_buff *skb, const struct net_device *in,
+clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
-             const struct net_device *out, unsigned int hooknum,
-             const struct xt_target *target, const void *targinfo)
 {
-        const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+        const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
        u_int32_t hash;
@@ -349,13 +347,10 @@ clusterip_tg(struct sk_buff *skb, const struct net_device *in,
        return XT_CONTINUE;
 }
-static bool
+static bool clusterip_tg_check(const struct xt_tgchk_param *par)
-clusterip_tg_check(const char *tablename, const void *e_void,
-                   const struct xt_target *target, void *targinfo,
-                   unsigned int hook_mask)
 {
-        struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+        struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
-        const struct ipt_entry *e = e_void;
+        const struct ipt_entry *e = par->entryinfo;
        struct clusterip_config *config;
@@ -406,9 +401,9 @@ clusterip_tg_check(const char *tablename, const void *e_void,
        }
        cipinfo->config = config;
-        if (nf_ct_l3proto_try_module_get(target->family) < 0) {
+        if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
                printk(KERN_WARNING "can't load conntrack support for "
-                                    "proto=%u\n", target->family);
+                                    "proto=%u\n", par->target->family);
                return false;
        }
@@ -416,9 +411,9 @@ clusterip_tg_check(const char *tablename, const void *e_void,
 }
 /* drop reference count of cluster config when rule is deleted */
-static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo)
+static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 {
-        const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+        const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
        /* if no more entries are referencing the config, remove it
         * from the list and destroy the proc entry */
@@ -426,7 +421,7 @@ static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo)
        clusterip_config_put(cipinfo->config);
-        nf_ct_l3proto_module_put(target->family);
+        nf_ct_l3proto_module_put(par->target->family);
 }
 #ifdef CONFIG_COMPAT
@@ -445,7 +440,7 @@ struct compat_ipt_clusterip_tgt_info
 static struct xt_target clusterip_tg_reg __read_mostly = {
        .name           = "CLUSTERIP",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .target         = clusterip_tg,
        .checkentry     = clusterip_tg_check,
        .destroy        = clusterip_tg_destroy,
@@ -546,7 +541,7 @@ arp_mangle(unsigned int hook,
 static struct nf_hook_ops cip_arp_ops __read_mostly = {
        .hook = arp_mangle,
-        .pf = NF_ARP,
+        .pf = NFPROTO_ARP,
        .hooknum = NF_ARP_OUT,
        .priority = -1
 };
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index d60139c134ca..f7e2fa0974dc 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -77,11 +77,9 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 }
 static unsigned int
-ecn_tg(struct sk_buff *skb, const struct net_device *in,
+ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
 {
-        const struct ipt_ECN_info *einfo = targinfo;
+        const struct ipt_ECN_info *einfo = par->targinfo;
        if (einfo->operation & IPT_ECN_OP_SET_IP)
                if (!set_ect_ip(skb, einfo))
@@ -95,13 +93,10 @@ ecn_tg(struct sk_buff *skb, const struct net_device *in,
        return XT_CONTINUE;
 }
-static bool
+static bool ecn_tg_check(const struct xt_tgchk_param *par)
-ecn_tg_check(const char *tablename, const void *e_void,
-             const struct xt_target *target, void *targinfo,
-             unsigned int hook_mask)
 {
-        const struct ipt_ECN_info *einfo = targinfo;
+        const struct ipt_ECN_info *einfo = par->targinfo;
-        const struct ipt_entry *e = e_void;
+        const struct ipt_entry *e = par->entryinfo;
        if (einfo->operation & IPT_ECN_OP_MASK) {
                printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
@@ -124,7 +119,7 @@ ecn_tg_check(const char *tablename, const void *e_void,
 static struct xt_target ecn_tg_reg __read_mostly = {
        .name           = "ECN",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .target         = ecn_tg,
        .targetsize     = sizeof(struct ipt_ECN_info),
        .table          = "mangle",
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 0af14137137b..fc6ce04a3e35 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -375,7 +375,7 @@ static struct nf_loginfo default_loginfo = {
 };
 static void
-ipt_log_packet(unsigned int pf,
+ipt_log_packet(u_int8_t pf,
               unsigned int hooknum,
               const struct sk_buff *skb,
               const struct net_device *in,
@@ -426,28 +426,23 @@ ipt_log_packet(unsigned int pf,
 }
 static unsigned int
-log_tg(struct sk_buff *skb, const struct net_device *in,
+log_tg(struct sk_buff *skb, const struct xt_target_param *par)
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
 {
-        const struct ipt_log_info *loginfo = targinfo;
+        const struct ipt_log_info *loginfo = par->targinfo;
        struct nf_loginfo li;
        li.type = NF_LOG_TYPE_LOG;
        li.u.log.level = loginfo->level;
        li.u.log.logflags = loginfo->logflags;
-        ipt_log_packet(PF_INET, hooknum, skb, in, out, &li,
+        ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li,
                       loginfo->prefix);
        return XT_CONTINUE;
 }
-static bool
+static bool log_tg_check(const struct xt_tgchk_param *par)
-log_tg_check(const char *tablename, const void *e,
-             const struct xt_target *target, void *targinfo,
-             unsigned int hook_mask)
 {
-        const struct ipt_log_info *loginfo = targinfo;
+        const struct ipt_log_info *loginfo = par->targinfo;
        if (loginfo->level >= 8) {
                pr_debug("LOG: level %u >= 8\n", loginfo->level);
@@ -463,7 +458,7 @@ log_tg_check(const char *tablename, const void *e,
 static struct xt_target log_tg_reg __read_mostly = {
        .name           = "LOG",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .target         = log_tg,
        .targetsize     = sizeof(struct ipt_log_info),
        .checkentry     = log_tg_check,
@@ -483,7 +478,7 @@ static int __init log_tg_init(void)
        ret = xt_register_target(&log_tg_reg);
        if (ret < 0)
                return ret;
-        nf_log_register(PF_INET, &ipt_log_logger);
+        nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
        return 0;
 }
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 0841aefaa503..f389f60cb105 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -31,12 +31,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 static DEFINE_RWLOCK(masq_lock);
 /* FIXME: Multiple targets. --RR */
-static bool
+static bool masquerade_tg_check(const struct xt_tgchk_param *par)
-masquerade_tg_check(const char *tablename, const void *e,
-                    const struct xt_target *target, void *targinfo,
-                    unsigned int hook_mask)
 {
-        const struct nf_nat_multi_range_compat *mr = targinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
        if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
                pr_debug("masquerade_check: bad MAP_IPS.\n");
@@ -50,9 +47,7 @@ masquerade_tg_check(const char *tablename, const void *e,
 }
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct net_device *in,
+masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
-              const struct net_device *out, unsigned int hooknum,
-              const struct xt_target *target, const void *targinfo)
 {
        struct nf_conn *ct;
        struct nf_conn_nat *nat;
@@ -62,7 +57,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in,
        const struct rtable *rt;
        __be32 newsrc;
-        NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+        NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
        ct = nf_ct_get(skb, &ctinfo);
        nat = nfct_nat(ct);
@@ -76,16 +71,16 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in,
        if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
                return NF_ACCEPT;
-        mr = targinfo;
+        mr = par->targinfo;
        rt = skb->rtable;
-        newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+        newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
        if (!newsrc) {
-                printk("MASQUERADE: %s ate my IP address\n", out->name);
+                printk("MASQUERADE: %s ate my IP address\n", par->out->name);
                return NF_DROP;
        }
        write_lock_bh(&masq_lock);
-        nat->masq_index = out->ifindex;
+        nat->masq_index = par->out->ifindex;
        write_unlock_bh(&masq_lock);
        /* Transfer from original range. */
@@ -119,9 +114,7 @@ static int masq_device_event(struct notifier_block *this,
                             void *ptr)
 {
        const struct net_device *dev = ptr;
+        struct net *net = dev_net(dev);
-        if (!net_eq(dev_net(dev), &init_net))
-                return NOTIFY_DONE;
        if (event == NETDEV_DOWN) {
                /* Device was downed.  Search entire table for
@@ -129,7 +122,8 @@ static int masq_device_event(struct notifier_block *this,
                   and forget them. */
                NF_CT_ASSERT(dev->ifindex != 0);
-                nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+                nf_ct_iterate_cleanup(net, device_cmp,
+                                      (void *)(long)dev->ifindex);
        }
        return NOTIFY_DONE;
@@ -153,7 +147,7 @@ static struct notifier_block masq_inet_notifier = {
 static struct xt_target masquerade_tg_reg __read_mostly = {
        .name           = "MASQUERADE",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .target         = masquerade_tg,
        .targetsize     = sizeof(struct nf_nat_multi_range_compat),
        .table          = "nat",
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 6739abfd1521..7c29582d4ec8 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -22,12 +22,9 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
 MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
-static bool
+static bool netmap_tg_check(const struct xt_tgchk_param *par)
-netmap_tg_check(const char *tablename, const void *e,
-                const struct xt_target *target, void *targinfo,
-                unsigned int hook_mask)
 {
-        const struct nf_nat_multi_range_compat *mr = targinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
        if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
                pr_debug("NETMAP:check: bad MAP_IPS.\n");
@@ -41,24 +38,23 @@ netmap_tg_check(const char *tablename, const void *e,
 }
 static unsigned int
-netmap_tg(struct sk_buff *skb, const struct net_device *in,
+netmap_tg(struct sk_buff *skb, const struct xt_target_param *par)
-          const struct net_device *out, unsigned int hooknum,
-          const struct xt_target *target, const void *targinfo)
 {
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
        __be32 new_ip, netmask;
-        const struct nf_nat_multi_range_compat *mr = targinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
        struct nf_nat_range newrange;
-        NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
+        NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
-                     || hooknum == NF_INET_POST_ROUTING
+                     par->hooknum == NF_INET_POST_ROUTING ||
-                     || hooknum == NF_INET_LOCAL_OUT);
+                     par->hooknum == NF_INET_LOCAL_OUT);
        ct = nf_ct_get(skb, &ctinfo);
        netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
-        if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_OUT)
+        if (par->hooknum == NF_INET_PRE_ROUTING ||
+            par->hooknum == NF_INET_LOCAL_OUT)
                new_ip = ip_hdr(skb)->daddr & ~netmask;
        else
                new_ip = ip_hdr(skb)->saddr & ~netmask;
@@ -70,12 +66,12 @@ netmap_tg(struct sk_buff *skb, const struct net_device *in,
                  mr->range[0].min, mr->range[0].max });
        /* Hand modified range to generic setup. */
-        return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(hooknum));
+        return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
 }
 static struct xt_target netmap_tg_reg __read_mostly = {
        .name           = "NETMAP",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .target         = netmap_tg,
        .targetsize     = sizeof(struct nf_nat_multi_range_compat),
        .table          = "nat",
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 5c6292449d13..698e5e78685b 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -26,12 +26,9 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
 /* FIXME: Take multiple ranges --RR */
-static bool
+static bool redirect_tg_check(const struct xt_tgchk_param *par)
-redirect_tg_check(const char *tablename, const void *e,
-                  const struct xt_target *target, void *targinfo,
-                  unsigned int hook_mask)
 {
-        const struct nf_nat_multi_range_compat *mr = targinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
        if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
                pr_debug("redirect_check: bad MAP_IPS.\n");
@@ -45,24 +42,22 @@ redirect_tg_check(const char *tablename, const void *e,
 }
 static unsigned int
-redirect_tg(struct sk_buff *skb, const struct net_device *in,
+redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
-            const struct net_device *out, unsigned int hooknum,
-            const struct xt_target *target, const void *targinfo)
 {
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
        __be32 newdst;
-        const struct nf_nat_multi_range_compat *mr = targinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
        struct nf_nat_range newrange;
-        NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING
+        NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
-                     || hooknum == NF_INET_LOCAL_OUT);
+                     par->hooknum == NF_INET_LOCAL_OUT);
        ct = nf_ct_get(skb, &ctinfo);
        NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
        /* Local packets: make them go to loopback */
-        if (hooknum == NF_INET_LOCAL_OUT)
+        if (par->hooknum == NF_INET_LOCAL_OUT)
                newdst = htonl(0x7F000001);
        else {
                struct in_device *indev;
@@ -92,7 +87,7 @@ redirect_tg(struct sk_buff *skb, const struct net_device *in,
 static struct xt_target redirect_tg_reg __read_mostly = {
        .name           = "REDIRECT",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .target         = redirect_tg,
        .targetsize     = sizeof(struct nf_nat_multi_range_compat),
        .table          = "nat",
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 2639872849da..0b4b6e0ff2b9 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -136,11 +136,9 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
 }
 static unsigned int
-reject_tg(struct sk_buff *skb, const struct net_device *in,
+reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
-          const struct net_device *out, unsigned int hooknum,
-          const struct xt_target *target, const void *targinfo)
 {
-        const struct ipt_reject_info *reject = targinfo;
+        const struct ipt_reject_info *reject = par->targinfo;
        /* WARNING: This code causes reentry within iptables.
           This means that the iptables jump stack is now crap.  We
@@ -168,7 +166,7 @@ reject_tg(struct sk_buff *skb, const struct net_device *in,
                send_unreach(skb, ICMP_PKT_FILTERED);
                break;
        case IPT_TCP_RESET:
-                send_reset(skb, hooknum);
+                send_reset(skb, par->hooknum);
        case IPT_ICMP_ECHOREPLY:
                /* Doesn't happen. */
                break;
@@ -177,13 +175,10 @@ reject_tg(struct sk_buff *skb, const struct net_device *in,
        return NF_DROP;
 }
-static bool
+static bool reject_tg_check(const struct xt_tgchk_param *par)
-reject_tg_check(const char *tablename, const void *e_void,
-                const struct xt_target *target, void *targinfo,
-                unsigned int hook_mask)
 {
-        const struct ipt_reject_info *rejinfo = targinfo;
+        const struct ipt_reject_info *rejinfo = par->targinfo;
-        const struct ipt_entry *e = e_void;
+        const struct ipt_entry *e = par->entryinfo;
        if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
                printk("ipt_REJECT: ECHOREPLY no longer supported.\n");
@@ -201,7 +196,7 @@ reject_tg_check(const char *tablename, const void *e_void,
 static struct xt_target reject_tg_reg __read_mostly = {
        .name           = "REJECT",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .target         = reject_tg,
        .targetsize     = sizeof(struct ipt_reject_info),
        .table          = "filter",
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 30eed65e7338..6d76aae90cc0 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -20,12 +20,10 @@ MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target");
 MODULE_LICENSE("GPL");
 static unsigned int
-ttl_tg(struct sk_buff *skb, const struct net_device *in,
+ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
-       const struct net_device *out, unsigned int hooknum,
-       const struct xt_target *target, const void *targinfo)
 {
        struct iphdr *iph;
-        const struct ipt_TTL_info *info = targinfo;
+        const struct ipt_TTL_info *info = par->targinfo;
        int new_ttl;
        if (!skb_make_writable(skb, skb->len))
@@ -61,12 +59,9 @@ ttl_tg(struct sk_buff *skb, const struct net_device *in,
        return XT_CONTINUE;
 }
-static bool
+static bool ttl_tg_check(const struct xt_tgchk_param *par)
-ttl_tg_check(const char *tablename, const void *e,
-             const struct xt_target *target, void *targinfo,
-             unsigned int hook_mask)
 {
-        const struct ipt_TTL_info *info = targinfo;
+        const struct ipt_TTL_info *info = par->targinfo;
        if (info->mode > IPT_TTL_MAXMODE) {
                printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
@@ -80,7 +75,7 @@ ttl_tg_check(const char *tablename, const void *e,
 static struct xt_target ttl_tg_reg __read_mostly = {
        .name           = "TTL",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .target         = ttl_tg,
        .targetsize     = sizeof(struct ipt_TTL_info),
        .table          = "mangle",
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b192756c6d0d..18a2826b57c6 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -281,18 +281,14 @@ alloc_failure:
 }
 static unsigned int
-ulog_tg(struct sk_buff *skb, const struct net_device *in,
+ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
-        const struct net_device *out, unsigned int hooknum,
-        const struct xt_target *target, const void *targinfo)
 {
-        struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
+        ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
+                        par->targinfo, NULL);
-        ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL);
        return XT_CONTINUE;
 }
-static void ipt_logfn(unsigned int pf,
+static void ipt_logfn(u_int8_t pf,
                      unsigned int hooknum,
                      const struct sk_buff *skb,
                      const struct net_device *in,
@@ -317,12 +313,9 @@ static void ipt_logfn(unsigned int pf,
        ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
-static bool
+static bool ulog_tg_check(const struct xt_tgchk_param *par)
-ulog_tg_check(const char *tablename, const void *e,
-              const struct xt_target *target, void *targinfo,
-              unsigned int hookmask)
 {
-        const struct ipt_ulog_info *loginfo = targinfo;
+        const struct ipt_ulog_info *loginfo = par->targinfo;
        if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
                pr_debug("ipt_ULOG: prefix term %i\n",
@@ -374,7 +367,7 @@ static int ulog_tg_compat_to_user(void __user *dst, void *src)
 static struct xt_target ulog_tg_reg __read_mostly = {
        .name           = "ULOG",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .target         = ulog_tg,
        .targetsize     = sizeof(struct ipt_ulog_info),
        .checkentry     = ulog_tg_check,
@@ -419,7 +412,7 @@ static int __init ulog_tg_init(void)
                return ret;
        }
        if (nflog)
-                nf_log_register(PF_INET, &ipt_ulog_logger);
+                nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
        return 0;
 }
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 462a22c97877..88762f02779d 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -30,12 +30,9 @@ static inline bool match_type(const struct net_device *dev, __be32 addr,
 }
 static bool
-addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in,
+addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-               const struct net_device *out, const struct xt_match *match,
-               const void *matchinfo, int offset, unsigned int protoff,
-               bool *hotdrop)
 {
-        const struct ipt_addrtype_info *info = matchinfo;
+        const struct ipt_addrtype_info *info = par->matchinfo;
        const struct iphdr *iph = ip_hdr(skb);
        bool ret = true;
@@ -50,20 +47,17 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in,
 }
 static bool
-addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in,
+addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
-               const struct net_device *out, const struct xt_match *match,
-               const void *matchinfo, int offset, unsigned int protoff,
-               bool *hotdrop)
 {
-        const struct ipt_addrtype_info_v1 *info = matchinfo;
+        const struct ipt_addrtype_info_v1 *info = par->matchinfo;
        const struct iphdr *iph = ip_hdr(skb);
        const struct net_device *dev = NULL;
        bool ret = true;
        if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
-                dev = in;
+                dev = par->in;
        else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
-                dev = out;
+                dev = par->out;
        if (info->source)
                ret &= match_type(dev, iph->saddr, info->source) ^
@@ -74,12 +68,9 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in,
        return ret;
 }
-static bool
+static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
-addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void,
-                          const struct xt_match *match, void *matchinfo,
-                          unsigned int hook_mask)
 {
-        struct ipt_addrtype_info_v1 *info = matchinfo;
+        struct ipt_addrtype_info_v1 *info = par->matchinfo;
        if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
            info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
@@ -88,14 +79,16 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void,
                return false;
        }
-        if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) &&
+        if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
+            (1 << NF_INET_LOCAL_IN)) &&
            info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
                printk(KERN_ERR "ipt_addrtype: output interface limitation "
                                "not valid in PRE_ROUTING and INPUT\n");
                return false;
        }
-        if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) &&
+        if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
+            (1 << NF_INET_LOCAL_OUT)) &&
            info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
                printk(KERN_ERR "ipt_addrtype: input interface limitation "
                                "not valid in POST_ROUTING and OUTPUT\n");
@@ -108,14 +101,14 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void,
 static struct xt_match addrtype_mt_reg[] __read_mostly = {
        {
                .name           = "addrtype",
-                .family         = AF_INET,
+                .family         = NFPROTO_IPV4,
                .match          = addrtype_mt_v0,
                .matchsize      = sizeof(struct ipt_addrtype_info),
                .me             = THIS_MODULE
        },
        {
                .name           = "addrtype",
-                .family         = AF_INET,
+                .family         = NFPROTO_IPV4,
                .revision       = 1,
                .match          = addrtype_mt_v1,
                .checkentry     = addrtype_mt_checkentry_v1,
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index e977989629c7..0104c0b399de 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -36,27 +36,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
        return r;
 }
-static bool
+static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
-ah_mt(const struct sk_buff *skb, const struct net_device *in,
-      const struct net_device *out, const struct xt_match *match,
-      const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
        struct ip_auth_hdr _ahdr;
        const struct ip_auth_hdr *ah;
-        const struct ipt_ah *ahinfo = matchinfo;
+        const struct ipt_ah *ahinfo = par->matchinfo;
        /* Must not be a fragment. */
-        if (offset)
+        if (par->fragoff != 0)
                return false;
-        ah = skb_header_pointer(skb, protoff,
+        ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr);
-                                sizeof(_ahdr), &_ahdr);
        if (ah == NULL) {
                /* We've been asked to examine this packet, and we
                 * can't.  Hence, no choice but to drop.
                 */
                duprintf("Dropping evil AH tinygram.\n");
-                *hotdrop = true;
+                *par->hotdrop = true;
                return 0;
        }
@@ -65,13 +61,9 @@ ah_mt(const struct sk_buff *skb, const struct net_device *in,
                         !!(ahinfo->invflags & IPT_AH_INV_SPI));
 }
-/* Called when user tries to insert an entry of this type. */
+static bool ah_mt_check(const struct xt_mtchk_param *par)
-static bool
-ah_mt_check(const char *tablename, const void *ip_void,
-            const struct xt_match *match, void *matchinfo,
-            unsigned int hook_mask)
 {
-        const struct ipt_ah *ahinfo = matchinfo;
+        const struct ipt_ah *ahinfo = par->matchinfo;
        /* Must specify no unknown invflags */
        if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
@@ -83,7 +75,7 @@ ah_mt_check(const char *tablename, const void *ip_void,
 static struct xt_match ah_mt_reg __read_mostly = {
        .name           = "ah",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .match          = ah_mt,
        .matchsize      = sizeof(struct ipt_ah),
        .proto          = IPPROTO_AH,
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 749de8284ce5..6289b64144c6 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,12 +67,9 @@ static inline bool match_tcp(const struct sk_buff *skb,
        return true;
 }
-static bool
+static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
-ecn_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
-        const struct ipt_ecn_info *info = matchinfo;
+        const struct ipt_ecn_info *info = par->matchinfo;
        if (info->operation & IPT_ECN_OP_MATCH_IP)
                if (!match_ip(skb, info))
@@ -81,20 +78,17 @@ ecn_mt(const struct sk_buff *skb, const struct net_device *in,
        if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
                if (ip_hdr(skb)->protocol != IPPROTO_TCP)
                        return false;
-                if (!match_tcp(skb, info, hotdrop))
+                if (!match_tcp(skb, info, par->hotdrop))
                        return false;
        }
        return true;
 }
-static bool
+static bool ecn_mt_check(const struct xt_mtchk_param *par)
-ecn_mt_check(const char *tablename, const void *ip_void,
-             const struct xt_match *match, void *matchinfo,
-             unsigned int hook_mask)
 {
-        const struct ipt_ecn_info *info = matchinfo;
+        const struct ipt_ecn_info *info = par->matchinfo;
-        const struct ipt_ip *ip = ip_void;
+        const struct ipt_ip *ip = par->entryinfo;
        if (info->operation & IPT_ECN_OP_MATCH_MASK)
                return false;
@@ -114,7 +108,7 @@ ecn_mt_check(const char *tablename, const void *ip_void,
 static struct xt_match ecn_mt_reg __read_mostly = {
        .name           = "ecn",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .match          = ecn_mt,
        .matchsize      = sizeof(struct ipt_ecn_info),
        .checkentry     = ecn_mt_check,
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
deleted file mode 100644
index 3974d7cae5c0..000000000000
--- a/net/ipv4/netfilter/ipt_recent.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
- * Copyright (c) 2006 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This is a replacement of the old ipt_recent module, which carried the
- * following copyright notice:
- *
- * Author: Stephen Frost <sfrost@snowman.net>
- * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
- */
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/moduleparam.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/list.h>
-#include <linux/random.h>
-#include <linux/jhash.h>
-#include <linux/bitops.h>
-#include <linux/skbuff.h>
-#include <linux/inet.h>
-#include <net/net_namespace.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_recent.h>
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
-MODULE_LICENSE("GPL");
-static unsigned int ip_list_tot = 100;
-static unsigned int ip_pkt_list_tot = 20;
-static unsigned int ip_list_hash_size = 0;
-static unsigned int ip_list_perms = 0644;
-static unsigned int ip_list_uid = 0;
-static unsigned int ip_list_gid = 0;
-module_param(ip_list_tot, uint, 0400);
-module_param(ip_pkt_list_tot, uint, 0400);
-module_param(ip_list_hash_size, uint, 0400);
-module_param(ip_list_perms, uint, 0400);
-module_param(ip_list_uid, uint, 0400);
-module_param(ip_list_gid, uint, 0400);
-MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
-MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
-MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
-MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
-MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
-MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
-struct recent_entry {
-        struct list_head        list;
-        struct list_head        lru_list;
-        __be32                  addr;
-        u_int8_t                ttl;
-        u_int8_t                index;
-        u_int16_t               nstamps;
-        unsigned long           stamps[0];
-};
-struct recent_table {
-        struct list_head        list;
-        char                    name[IPT_RECENT_NAME_LEN];
-#ifdef CONFIG_PROC_FS
-        struct proc_dir_entry   *proc;
-#endif
-        unsigned int            refcnt;
-        unsigned int            entries;
-        struct list_head        lru_list;
-        struct list_head        iphash[0];
-};
-static LIST_HEAD(tables);
-static DEFINE_SPINLOCK(recent_lock);
-static DEFINE_MUTEX(recent_mutex);
-#ifdef CONFIG_PROC_FS
-static struct proc_dir_entry    *proc_dir;
-static const struct file_operations     recent_fops;
-#endif
-static u_int32_t hash_rnd;
-static int hash_rnd_initted;
-static unsigned int recent_entry_hash(__be32 addr)
-{
-        if (!hash_rnd_initted) {
-                get_random_bytes(&hash_rnd, 4);
-                hash_rnd_initted = 1;
-        }
-        return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1);
-}
-static struct recent_entry *
-recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl)
-{
-        struct recent_entry *e;
-        unsigned int h;
-        h = recent_entry_hash(addr);
-        list_for_each_entry(e, &table->iphash[h], list)
-                if (e->addr == addr && (ttl == e->ttl || !ttl || !e->ttl))
-                        return e;
-        return NULL;
-}
-static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
-{
-        list_del(&e->list);
-        list_del(&e->lru_list);
-        kfree(e);
-        t->entries--;
-}
-static struct recent_entry *
-recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl)
-{
-        struct recent_entry *e;
-        if (t->entries >= ip_list_tot) {
-                e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
-                recent_entry_remove(t, e);
-        }
-        e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * ip_pkt_list_tot,
-                    GFP_ATOMIC);
-        if (e == NULL)
-                return NULL;
-        e->addr      = addr;
-        e->ttl       = ttl;
-        e->stamps[0] = jiffies;
-        e->nstamps   = 1;
-        e->index     = 1;
-        list_add_tail(&e->list, &t->iphash[recent_entry_hash(addr)]);
-        list_add_tail(&e->lru_list, &t->lru_list);
-        t->entries++;
-        return e;
-}
-static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
-{
-        e->stamps[e->index++] = jiffies;
-        if (e->index > e->nstamps)
-                e->nstamps = e->index;
-        e->index %= ip_pkt_list_tot;
-        list_move_tail(&e->lru_list, &t->lru_list);
-}
-static struct recent_table *recent_table_lookup(const char *name)
-{
-        struct recent_table *t;
-        list_for_each_entry(t, &tables, list)
-                if (!strcmp(t->name, name))
-                        return t;
-        return NULL;
-}
-static void recent_table_flush(struct recent_table *t)
-{
-        struct recent_entry *e, *next;
-        unsigned int i;
-        for (i = 0; i < ip_list_hash_size; i++)
-                list_for_each_entry_safe(e, next, &t->iphash[i], list)
-                        recent_entry_remove(t, e);
-}
-static bool
-recent_mt(const struct sk_buff *skb, const struct net_device *in,
-          const struct net_device *out, const struct xt_match *match,
-          const void *matchinfo, int offset, unsigned int protoff,
-          bool *hotdrop)
-{
-        const struct ipt_recent_info *info = matchinfo;
-        struct recent_table *t;
-        struct recent_entry *e;
-        __be32 addr;
-        u_int8_t ttl;
-        bool ret = info->invert;
-        if (info->side == IPT_RECENT_DEST)
-                addr = ip_hdr(skb)->daddr;
-        else
-                addr = ip_hdr(skb)->saddr;
-        ttl = ip_hdr(skb)->ttl;
-        /* use TTL as seen before forwarding */
-        if (out && !skb->sk)
-                ttl++;
-        spin_lock_bh(&recent_lock);
-        t = recent_table_lookup(info->name);
-        e = recent_entry_lookup(t, addr,
-                                info->check_set & IPT_RECENT_TTL ? ttl : 0);
-        if (e == NULL) {
-                if (!(info->check_set & IPT_RECENT_SET))
-                        goto out;
-                e = recent_entry_init(t, addr, ttl);
-                if (e == NULL)
-                        *hotdrop = true;
-                ret = !ret;
-                goto out;
-        }
-        if (info->check_set & IPT_RECENT_SET)
-                ret = !ret;
-        else if (info->check_set & IPT_RECENT_REMOVE) {
-                recent_entry_remove(t, e);
-                ret = !ret;
-        } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) {
-                unsigned long time = jiffies - info->seconds * HZ;
-                unsigned int i, hits = 0;
-                for (i = 0; i < e->nstamps; i++) {
-                        if (info->seconds && time_after(time, e->stamps[i]))
-                                continue;
-                        if (++hits >= info->hit_count) {
-                                ret = !ret;
-                                break;
-                        }
-                }
-        }
-        if (info->check_set & IPT_RECENT_SET ||
-            (info->check_set & IPT_RECENT_UPDATE && ret)) {
-                recent_entry_update(t, e);
-                e->ttl = ttl;
-        }
-out:
-        spin_unlock_bh(&recent_lock);
-        return ret;
-}
-static bool
-recent_mt_check(const char *tablename, const void *ip,
-                const struct xt_match *match, void *matchinfo,
-                unsigned int hook_mask)
-{
-        const struct ipt_recent_info *info = matchinfo;
-        struct recent_table *t;
-        unsigned i;
-        bool ret = false;
-        if (hweight8(info->check_set &
-                     (IPT_RECENT_SET | IPT_RECENT_REMOVE |
-                      IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) != 1)
-                return false;
-        if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) &&
-            (info->seconds || info->hit_count))
-                return false;
-        if (info->hit_count > ip_pkt_list_tot)
-                return false;
-        if (info->name[0] == '\0' ||
-            strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN)
-                return false;
-        mutex_lock(&recent_mutex);
-        t = recent_table_lookup(info->name);
-        if (t != NULL) {
-                t->refcnt++;
-                ret = true;
-                goto out;
-        }
-        t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
-                    GFP_KERNEL);
-        if (t == NULL)
-                goto out;
-        t->refcnt = 1;
-        strcpy(t->name, info->name);
-        INIT_LIST_HEAD(&t->lru_list);
-        for (i = 0; i < ip_list_hash_size; i++)
-                INIT_LIST_HEAD(&t->iphash[i]);
-#ifdef CONFIG_PROC_FS
-        t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops);
-        if (t->proc == NULL) {
-                kfree(t);
-                goto out;
-        }
-        t->proc->uid       = ip_list_uid;
-        t->proc->gid       = ip_list_gid;
-        t->proc->data      = t;
-#endif
-        spin_lock_bh(&recent_lock);
-        list_add_tail(&t->list, &tables);
-        spin_unlock_bh(&recent_lock);
-        ret = true;
-out:
-        mutex_unlock(&recent_mutex);
-        return ret;
-}
-static void recent_mt_destroy(const struct xt_match *match, void *matchinfo)
-{
-        const struct ipt_recent_info *info = matchinfo;
-        struct recent_table *t;
-        mutex_lock(&recent_mutex);
-        t = recent_table_lookup(info->name);
-        if (--t->refcnt == 0) {
-                spin_lock_bh(&recent_lock);
-                list_del(&t->list);
-                spin_unlock_bh(&recent_lock);
-#ifdef CONFIG_PROC_FS
-                remove_proc_entry(t->name, proc_dir);
-#endif
-                recent_table_flush(t);
-                kfree(t);
-        }
-        mutex_unlock(&recent_mutex);
-}
-#ifdef CONFIG_PROC_FS
-struct recent_iter_state {
-        struct recent_table     *table;
-        unsigned int            bucket;
-};
-static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
-        __acquires(recent_lock)
-{
-        struct recent_iter_state *st = seq->private;
-        const struct recent_table *t = st->table;
-        struct recent_entry *e;
-        loff_t p = *pos;
-        spin_lock_bh(&recent_lock);
-        for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++)
-                list_for_each_entry(e, &t->iphash[st->bucket], list)
-                        if (p-- == 0)
-                                return e;
-        return NULL;
-}
-static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-        struct recent_iter_state *st = seq->private;
-        const struct recent_table *t = st->table;
-        struct recent_entry *e = v;
-        struct list_head *head = e->list.next;
-        while (head == &t->iphash[st->bucket]) {
-                if (++st->bucket >= ip_list_hash_size)
-                        return NULL;
-                head = t->iphash[st->bucket].next;
-        }
-        (*pos)++;
-        return list_entry(head, struct recent_entry, list);
-}
-static void recent_seq_stop(struct seq_file *s, void *v)
-        __releases(recent_lock)
-{
-        spin_unlock_bh(&recent_lock);
-}
-static int recent_seq_show(struct seq_file *seq, void *v)
-{
-        const struct recent_entry *e = v;
-        unsigned int i;
-        i = (e->index - 1) % ip_pkt_list_tot;
-        seq_printf(seq, "src=%u.%u.%u.%u ttl: %u last_seen: %lu oldest_pkt: %u",
-                   NIPQUAD(e->addr), e->ttl, e->stamps[i], e->index);
-        for (i = 0; i < e->nstamps; i++)
-                seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]);
-        seq_printf(seq, "\n");
-        return 0;
-}
-static const struct seq_operations recent_seq_ops = {
-        .start          = recent_seq_start,
-        .next           = recent_seq_next,
-        .stop           = recent_seq_stop,
-        .show           = recent_seq_show,
-};
-static int recent_seq_open(struct inode *inode, struct file *file)
-{
-        struct proc_dir_entry *pde = PDE(inode);
-        struct recent_iter_state *st;
-        st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
-        if (st == NULL)
-                return -ENOMEM;
-        st->table    = pde->data;
-        return 0;
-}
-static ssize_t recent_proc_write(struct file *file, const char __user *input,
-                                 size_t size, loff_t *loff)
-{
-        const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
-        struct recent_table *t = pde->data;
-        struct recent_entry *e;
-        char buf[sizeof("+255.255.255.255")], *c = buf;
-        __be32 addr;
-        int add;
-        if (size > sizeof(buf))
-                size = sizeof(buf);
-        if (copy_from_user(buf, input, size))
-                return -EFAULT;
-        while (isspace(*c))
-                c++;
-        if (size - (c - buf) < 5)
-                return c - buf;
-        if (!strncmp(c, "clear", 5)) {
-                c += 5;
-                spin_lock_bh(&recent_lock);
-                recent_table_flush(t);
-                spin_unlock_bh(&recent_lock);
-                return c - buf;
-        }
-        switch (*c) {
-        case '-':
-                add = 0;
-                c++;
-                break;
-        case '+':
-                c++;
-        default:
-                add = 1;
-                break;
-        }
-        addr = in_aton(c);
-        spin_lock_bh(&recent_lock);
-        e = recent_entry_lookup(t, addr, 0);
-        if (e == NULL) {
-                if (add)
-                        recent_entry_init(t, addr, 0);
-        } else {
-                if (add)
-                        recent_entry_update(t, e);
-                else
-                        recent_entry_remove(t, e);
-        }
-        spin_unlock_bh(&recent_lock);
-        return size;
-}
-static const struct file_operations recent_fops = {
-        .open           = recent_seq_open,
-        .read           = seq_read,
-        .write          = recent_proc_write,
-        .release        = seq_release_private,
-        .owner          = THIS_MODULE,
-};
-#endif /* CONFIG_PROC_FS */
-static struct xt_match recent_mt_reg __read_mostly = {
-        .name           = "recent",
-        .family         = AF_INET,
-        .match          = recent_mt,
-        .matchsize      = sizeof(struct ipt_recent_info),
-        .checkentry     = recent_mt_check,
-        .destroy        = recent_mt_destroy,
-        .me             = THIS_MODULE,
-};
-static int __init recent_mt_init(void)
-{
-        int err;
-        if (!ip_list_tot || !ip_pkt_list_tot || ip_pkt_list_tot > 255)
-                return -EINVAL;
-        ip_list_hash_size = 1 << fls(ip_list_tot);
-        err = xt_register_match(&recent_mt_reg);
-#ifdef CONFIG_PROC_FS
-        if (err)
-                return err;
-        proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
-        if (proc_dir == NULL) {
-                xt_unregister_match(&recent_mt_reg);
-                err = -ENOMEM;
-        }
-#endif
-        return err;
-}
-static void __exit recent_mt_exit(void)
-{
-        BUG_ON(!list_empty(&tables));
-        xt_unregister_match(&recent_mt_reg);
-#ifdef CONFIG_PROC_FS
-        remove_proc_entry("ipt_recent", init_net.proc_net);
-#endif
-}
-module_init(recent_mt_init);
-module_exit(recent_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index e0b8caeb710c..297f1cbf4ff5 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -18,12 +18,9 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: IPv4 TTL field match");
 MODULE_LICENSE("GPL");
-static bool
+static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
-ttl_mt(const struct sk_buff *skb, const struct net_device *in,
-       const struct net_device *out, const struct xt_match *match,
-       const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop)
 {
-        const struct ipt_ttl_info *info = matchinfo;
+        const struct ipt_ttl_info *info = par->matchinfo;
        const u8 ttl = ip_hdr(skb)->ttl;
        switch (info->mode) {
@@ -46,7 +43,7 @@ ttl_mt(const struct sk_buff *skb, const struct net_device *in,
 static struct xt_match ttl_mt_reg __read_mostly = {
        .name           = "ttl",
-        .family         = AF_INET,
+        .family         = NFPROTO_IPV4,
        .match          = ttl_mt,
        .matchsize      = sizeof(struct ipt_ttl_info),
        .me             = THIS_MODULE,
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 1ea677dcf845..c9224310ebae 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -70,7 +70,7 @@ ipt_local_in_hook(unsigned int hook,
                  int (*okfn)(struct sk_buff *))
 {
        return ipt_do_table(skb, hook, in, out,
-                            nf_local_in_net(in, out)->ipv4.iptable_filter);
+                            dev_net(in)->ipv4.iptable_filter);
 }
 static unsigned int
@@ -81,7 +81,7 @@ ipt_hook(unsigned int hook,
         int (*okfn)(struct sk_buff *))
 {
        return ipt_do_table(skb, hook, in, out,
-                            nf_forward_net(in, out)->ipv4.iptable_filter);
+                            dev_net(in)->ipv4.iptable_filter);
 }
 static unsigned int
@@ -101,7 +101,7 @@ ipt_local_out_hook(unsigned int hook,
        }
        return ipt_do_table(skb, hook, in, out,
-                            nf_local_out_net(in, out)->ipv4.iptable_filter);
+                            dev_net(out)->ipv4.iptable_filter);
 }
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index da59182f2226..69f2c4287146 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -81,7 +81,7 @@ ipt_pre_routing_hook(unsigned int hook,
                     int (*okfn)(struct sk_buff *))
 {
        return ipt_do_table(skb, hook, in, out,
-                            nf_pre_routing_net(in, out)->ipv4.iptable_mangle);
+                            dev_net(in)->ipv4.iptable_mangle);
 }
 static unsigned int
@@ -92,7 +92,7 @@ ipt_post_routing_hook(unsigned int hook,
                      int (*okfn)(struct sk_buff *))
 {
        return ipt_do_table(skb, hook, in, out,
-                            nf_post_routing_net(in, out)->ipv4.iptable_mangle);
+                            dev_net(out)->ipv4.iptable_mangle);
 }
 static unsigned int
@@ -103,7 +103,7 @@ ipt_local_in_hook(unsigned int hook,
                  int (*okfn)(struct sk_buff *))
 {
        return ipt_do_table(skb, hook, in, out,
-                            nf_local_in_net(in, out)->ipv4.iptable_mangle);
+                            dev_net(in)->ipv4.iptable_mangle);
 }
 static unsigned int
@@ -114,7 +114,7 @@ ipt_forward_hook(unsigned int hook,
         int (*okfn)(struct sk_buff *))
 {
        return ipt_do_table(skb, hook, in, out,
-                            nf_forward_net(in, out)->ipv4.iptable_mangle);
+                            dev_net(in)->ipv4.iptable_mangle);
 }
 static unsigned int
@@ -147,7 +147,7 @@ ipt_local_hook(unsigned int hook,
        tos = iph->tos;
        ret = ipt_do_table(skb, hook, in, out,
-                           nf_local_out_net(in, out)->ipv4.iptable_mangle);
+                           dev_net(out)->ipv4.iptable_mangle);
        /* Reroute for ANY change. */
        if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
                iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index fddce7754b72..8faebfe638f1 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -53,7 +53,7 @@ ipt_hook(unsigned int hook,
         int (*okfn)(struct sk_buff *))
 {
        return ipt_do_table(skb, hook, in, out,
-                            nf_pre_routing_net(in, out)->ipv4.iptable_raw);
+                            dev_net(in)->ipv4.iptable_raw);
 }
 static unsigned int
@@ -72,7 +72,7 @@ ipt_local_hook(unsigned int hook,
                return NF_ACCEPT;
        }
        return ipt_do_table(skb, hook, in, out,
-                            nf_local_out_net(in, out)->ipv4.iptable_raw);
+                            dev_net(out)->ipv4.iptable_raw);
 }
 /* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index db6d312128e1..36f3be3cc428 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -73,7 +73,7 @@ ipt_local_in_hook(unsigned int hook,
                  int (*okfn)(struct sk_buff *))
 {
        return ipt_do_table(skb, hook, in, out,
-                            nf_local_in_net(in, out)->ipv4.iptable_security);
+                            dev_net(in)->ipv4.iptable_security);
 }
 static unsigned int
@@ -84,7 +84,7 @@ ipt_forward_hook(unsigned int hook,
                 int (*okfn)(struct sk_buff *))
 {
        return ipt_do_table(skb, hook, in, out,
-                            nf_forward_net(in, out)->ipv4.iptable_security);
+                            dev_net(in)->ipv4.iptable_security);
 }
 static unsigned int
@@ -103,7 +103,7 @@ ipt_local_out_hook(unsigned int hook,
                return NF_ACCEPT;
        }
        return ipt_do_table(skb, hook, in, out,
-                            nf_local_out_net(in, out)->ipv4.iptable_security);
+                            dev_net(out)->ipv4.iptable_security);
 }
 static struct nf_hook_ops ipt_ops[] __read_mostly = {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 5a955c440364..4a7c35275396 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -1,3 +1,4 @@
 /* (C) 1999-2001 Paul `Rusty' Russell
 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
 *
@@ -24,6 +25,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
                              struct nf_conn *ct,
@@ -63,23 +65,6 @@ static int ipv4_print_tuple(struct seq_file *s,
                          NIPQUAD(tuple->dst.u3.ip));
 }
-/* Returns new sk_buff, or NULL */
-static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
-        int err;
-        skb_orphan(skb);
-        local_bh_disable();
-        err = ip_defrag(skb, user);
-        local_bh_enable();
-        if (!err)
-                ip_send_check(ip_hdr(skb));
-        return err;
-}
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
                            unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -144,35 +129,13 @@ out:
        return nf_conntrack_confirm(skb);
 }
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
-                                          struct sk_buff *skb,
-                                          const struct net_device *in,
-                                          const struct net_device *out,
-                                          int (*okfn)(struct sk_buff *))
-{
-        /* Previously seen (loopback)?  Ignore.  Do this before
-           fragment check. */
-        if (skb->nfct)
-                return NF_ACCEPT;
-        /* Gather fragments. */
-        if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-                if (nf_ct_ipv4_gather_frags(skb,
-                                            hooknum == NF_INET_PRE_ROUTING ?
-                                            IP_DEFRAG_CONNTRACK_IN :
-                                            IP_DEFRAG_CONNTRACK_OUT))
-                        return NF_STOLEN;
-        }
-        return NF_ACCEPT;
-}
 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
                                      struct sk_buff *skb,
                                      const struct net_device *in,
                                      const struct net_device *out,
                                      int (*okfn)(struct sk_buff *))
 {
-        return nf_conntrack_in(PF_INET, hooknum, skb);
+        return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
 }
 static unsigned int ipv4_conntrack_local(unsigned int hooknum,
@@ -188,20 +151,13 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
                        printk("ipt_hook: happy cracking.\n");
                return NF_ACCEPT;
        }
-        return nf_conntrack_in(PF_INET, hooknum, skb);
+        return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
 }
 /* Connection tracking may drop packets, but never alters them, so
   make it the first hook. */
 static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
        {
-                .hook           = ipv4_conntrack_defrag,
-                .owner          = THIS_MODULE,
-                .pf             = PF_INET,
-                .hooknum        = NF_INET_PRE_ROUTING,
-                .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
-        },
-        {
                .hook           = ipv4_conntrack_in,
                .owner          = THIS_MODULE,
                .pf             = PF_INET,
@@ -209,13 +165,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
                .priority       = NF_IP_PRI_CONNTRACK,
        },
        {
-                .hook           = ipv4_conntrack_defrag,
-                .owner          = THIS_MODULE,
-                .pf             = PF_INET,
-                .hooknum        = NF_INET_LOCAL_OUT,
-                .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
-        },
-        {
                .hook           = ipv4_conntrack_local,
                .owner          = THIS_MODULE,
                .pf             = PF_INET,
@@ -254,7 +203,7 @@ static ctl_table ip_ct_sysctl_table[] = {
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_COUNT,
                .procname       = "ip_conntrack_count",
-                .data           = &nf_conntrack_count,
+                .data           = &init_net.ct.count,
                .maxlen         = sizeof(int),
                .mode           = 0444,
                .proc_handler   = &proc_dointvec,
@@ -270,7 +219,7 @@ static ctl_table ip_ct_sysctl_table[] = {
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_CHECKSUM,
                .procname       = "ip_conntrack_checksum",
-                .data           = &nf_conntrack_checksum,
+                .data           = &init_net.ct.sysctl_checksum,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
@@ -278,7 +227,7 @@ static ctl_table ip_ct_sysctl_table[] = {
        {
                .ctl_name       = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
                .procname       = "ip_conntrack_log_invalid",
-                .data           = &nf_ct_log_invalid,
+                .data           = &init_net.ct.sysctl_log_invalid,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec_minmax,
@@ -323,7 +272,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
                return -EINVAL;
        }
-        h = nf_conntrack_find_get(&tuple);
+        h = nf_conntrack_find_get(sock_net(sk), &tuple);
        if (h) {
                struct sockaddr_in sin;
                struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
@@ -422,6 +371,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
        int ret = 0;
        need_conntrack();
+        nf_defrag_ipv4_enable();
        ret = nf_register_sockopt(&so_getorigdst);
        if (ret < 0) {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 3a020720e40b..313ebf00ee36 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -21,18 +21,20 @@
 #include <net/netfilter/nf_conntrack_acct.h>
 struct ct_iter_state {
+        struct seq_net_private p;
        unsigned int bucket;
 };
 static struct hlist_node *ct_get_first(struct seq_file *seq)
 {
+        struct net *net = seq_file_net(seq);
        struct ct_iter_state *st = seq->private;
        struct hlist_node *n;
        for (st->bucket = 0;
             st->bucket < nf_conntrack_htable_size;
             st->bucket++) {
-                n = rcu_dereference(nf_conntrack_hash[st->bucket].first);
+                n = rcu_dereference(net->ct.hash[st->bucket].first);
                if (n)
                        return n;
        }
@@ -42,13 +44,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
 static struct hlist_node *ct_get_next(struct seq_file *seq,
                                      struct hlist_node *head)
 {
+        struct net *net = seq_file_net(seq);
        struct ct_iter_state *st = seq->private;
        head = rcu_dereference(head->next);
        while (head == NULL) {
                if (++st->bucket >= nf_conntrack_htable_size)
                        return NULL;
-                head = rcu_dereference(nf_conntrack_hash[st->bucket].first);
+                head = rcu_dereference(net->ct.hash[st->bucket].first);
        }
        return head;
 }
@@ -158,8 +161,8 @@ static const struct seq_operations ct_seq_ops = {
 static int ct_open(struct inode *inode, struct file *file)
 {
-        return seq_open_private(file, &ct_seq_ops,
+        return seq_open_net(inode, file, &ct_seq_ops,
-                        sizeof(struct ct_iter_state));
+                            sizeof(struct ct_iter_state));
 }
 static const struct file_operations ct_file_ops = {
@@ -167,21 +170,23 @@ static const struct file_operations ct_file_ops = {
        .open    = ct_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
-        .release = seq_release_private,
+        .release = seq_release_net,
 };
 /* expects */
 struct ct_expect_iter_state {
+        struct seq_net_private p;
        unsigned int bucket;
 };
 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 {
+        struct net *net = seq_file_net(seq);
        struct ct_expect_iter_state *st = seq->private;
        struct hlist_node *n;
        for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-                n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+                n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
                if (n)
                        return n;
        }
@@ -191,13 +196,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
                                             struct hlist_node *head)
 {
+        struct net *net = seq_file_net(seq);
        struct ct_expect_iter_state *st = seq->private;
        head = rcu_dereference(head->next);
        while (head == NULL) {
                if (++st->bucket >= nf_ct_expect_hsize)
                        return NULL;
-                head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
+                head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
        }
        return head;
 }
@@ -265,8 +271,8 @@ static const struct seq_operations exp_seq_ops = {
 static int exp_open(struct inode *inode, struct file *file)
 {
-        return seq_open_private(file, &exp_seq_ops,
+        return seq_open_net(inode, file, &exp_seq_ops,
-                        sizeof(struct ct_expect_iter_state));
+                            sizeof(struct ct_expect_iter_state));
 }
 static const struct file_operations ip_exp_file_ops = {
@@ -274,11 +280,12 @@ static const struct file_operations ip_exp_file_ops = {
        .open    = exp_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
-        .release = seq_release_private,
+        .release = seq_release_net,
 };
 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 {
+        struct net *net = seq_file_net(seq);
        int cpu;
        if (*pos == 0)
@@ -288,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
                if (!cpu_possible(cpu))
                        continue;
                *pos = cpu+1;
-                return &per_cpu(nf_conntrack_stat, cpu);
+                return per_cpu_ptr(net->ct.stat, cpu);
        }
        return NULL;
@@ -296,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+        struct net *net = seq_file_net(seq);
        int cpu;
        for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
                if (!cpu_possible(cpu))
                        continue;
                *pos = cpu+1;
-                return &per_cpu(nf_conntrack_stat, cpu);
+                return per_cpu_ptr(net->ct.stat, cpu);
        }
        return NULL;
@@ -314,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
-        unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
+        struct net *net = seq_file_net(seq);
+        unsigned int nr_conntracks = atomic_read(&net->ct.count);
        const struct ip_conntrack_stat *st = v;
        if (v == SEQ_START_TOKEN) {
@@ -354,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
 static int ct_cpu_seq_open(struct inode *inode, struct file *file)
 {
-        return seq_open(file, &ct_cpu_seq_ops);
+        return seq_open_net(inode, file, &ct_cpu_seq_ops,
+                            sizeof(struct seq_net_private));
 }
 static const struct file_operations ct_cpu_seq_fops = {
@@ -362,39 +372,54 @@ static const struct file_operations ct_cpu_seq_fops = {
        .open    = ct_cpu_seq_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
-        .release = seq_release,
+        .release = seq_release_net,
 };
-int __init nf_conntrack_ipv4_compat_init(void)
+static int __net_init ip_conntrack_net_init(struct net *net)
 {
        struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-        proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops);
+        proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops);
        if (!proc)
                goto err1;
-        proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440,
+        proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440,
                                        &ip_exp_file_ops);
        if (!proc_exp)
                goto err2;
        proc_stat = proc_create("ip_conntrack", S_IRUGO,
-                                init_net.proc_net_stat, &ct_cpu_seq_fops);
+                                net->proc_net_stat, &ct_cpu_seq_fops);
        if (!proc_stat)
                goto err3;
        return 0;
 err3:
-        proc_net_remove(&init_net, "ip_conntrack_expect");
+        proc_net_remove(net, "ip_conntrack_expect");
 err2:
-        proc_net_remove(&init_net, "ip_conntrack");
+        proc_net_remove(net, "ip_conntrack");
 err1:
        return -ENOMEM;
 }
+static void __net_exit ip_conntrack_net_exit(struct net *net)
+{
+        remove_proc_entry("ip_conntrack", net->proc_net_stat);
+        proc_net_remove(net, "ip_conntrack_expect");
+        proc_net_remove(net, "ip_conntrack");
+}
+static struct pernet_operations ip_conntrack_net_ops = {
+        .init = ip_conntrack_net_init,
+        .exit = ip_conntrack_net_exit,
+};
+int __init nf_conntrack_ipv4_compat_init(void)
+{
+        return register_pernet_subsys(&ip_conntrack_net_ops);
+}
 void __exit nf_conntrack_ipv4_compat_fini(void)
 {
-        remove_proc_entry("ip_conntrack", init_net.proc_net_stat);
+        unregister_pernet_subsys(&ip_conntrack_net_ops);
-        proc_net_remove(&init_net, "ip_conntrack_expect");
-        proc_net_remove(&init_net, "ip_conntrack");
 }
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 97791048fa9b..4e8879220222 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -79,7 +79,7 @@ static int icmp_packet(struct nf_conn *ct,
                       const struct sk_buff *skb,
                       unsigned int dataoff,
                       enum ip_conntrack_info ctinfo,
-                       int pf,
+                       u_int8_t pf,
                       unsigned int hooknum)
 {
        /* Try to delete connection immediately after all replies:
@@ -91,7 +91,7 @@ static int icmp_packet(struct nf_conn *ct,
                        nf_ct_kill_acct(ct, ctinfo, skb);
        } else {
                atomic_inc(&ct->proto.icmp.count);
-                nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+                nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
                nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
        }
@@ -123,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
 static int
-icmp_error_message(struct sk_buff *skb,
+icmp_error_message(struct net *net, struct sk_buff *skb,
                 enum ip_conntrack_info *ctinfo,
                 unsigned int hooknum)
 {
@@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb,
        *ctinfo = IP_CT_RELATED;
-        h = nf_conntrack_find_get(&innertuple);
+        h = nf_conntrack_find_get(net, &innertuple);
        if (!h) {
                pr_debug("icmp_error_message: no match\n");
                return -NF_ACCEPT;
@@ -172,8 +172,8 @@ icmp_error_message(struct sk_buff *skb,
 /* Small and modified version of icmp_rcv */
 static int
-icmp_error(struct sk_buff *skb, unsigned int dataoff,
+icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
-           enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
+           enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
 {
        const struct icmphdr *icmph;
        struct icmphdr _ih;
@@ -181,16 +181,16 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
        /* Not enough header? */
        icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
        if (icmph == NULL) {
-                if (LOG_INVALID(IPPROTO_ICMP))
+                if (LOG_INVALID(net, IPPROTO_ICMP))
                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                      "nf_ct_icmp: short packet ");
                return -NF_ACCEPT;
        }
        /* See ip_conntrack_proto_tcp.c */
-        if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+        if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
            nf_ip_checksum(skb, hooknum, dataoff, 0)) {
-                if (LOG_INVALID(IPPROTO_ICMP))
+                if (LOG_INVALID(net, IPPROTO_ICMP))
                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                      "nf_ct_icmp: bad HW ICMP checksum ");
                return -NF_ACCEPT;
@@ -203,7 +203,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
         *                discarded.
         */
        if (icmph->type > NR_ICMP_TYPES) {
-                if (LOG_INVALID(IPPROTO_ICMP))
+                if (LOG_INVALID(net, IPPROTO_ICMP))
                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                      "nf_ct_icmp: invalid ICMP type ");
                return -NF_ACCEPT;
@@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
            && icmph->type != ICMP_REDIRECT)
                return NF_ACCEPT;
-        return icmp_error_message(skb, ctinfo, hooknum);
+        return icmp_error_message(net, skb, ctinfo, hooknum);
 }
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
new file mode 100644
index 000000000000..aa2c50a180f7
--- /dev/null
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -0,0 +1,96 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/route.h>
+#include <net/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+/* Returns new sk_buff, or NULL */
+static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+        int err;
+        skb_orphan(skb);
+        local_bh_disable();
+        err = ip_defrag(skb, user);
+        local_bh_enable();
+        if (!err)
+                ip_send_check(ip_hdr(skb));
+        return err;
+}
+static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+                                          struct sk_buff *skb,
+                                          const struct net_device *in,
+                                          const struct net_device *out,
+                                          int (*okfn)(struct sk_buff *))
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+        /* Previously seen (loopback)?  Ignore.  Do this before
+           fragment check. */
+        if (skb->nfct)
+                return NF_ACCEPT;
+#endif
+        /* Gather fragments. */
+        if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+                if (nf_ct_ipv4_gather_frags(skb,
+                                            hooknum == NF_INET_PRE_ROUTING ?
+                                            IP_DEFRAG_CONNTRACK_IN :
+                                            IP_DEFRAG_CONNTRACK_OUT))
+                        return NF_STOLEN;
+        }
+        return NF_ACCEPT;
+}
+static struct nf_hook_ops ipv4_defrag_ops[] = {
+        {
+                .hook           = ipv4_conntrack_defrag,
+                .owner          = THIS_MODULE,
+                .pf             = PF_INET,
+                .hooknum        = NF_INET_PRE_ROUTING,
+                .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
+        },
+        {
+                .hook           = ipv4_conntrack_defrag,
+                .owner          = THIS_MODULE,
+                .pf             = PF_INET,
+                .hooknum        = NF_INET_LOCAL_OUT,
+                .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
+        },
+};
+static int __init nf_defrag_init(void)
+{
+        return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+static void __exit nf_defrag_fini(void)
+{
+        nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+void nf_defrag_ipv4_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
+module_init(nf_defrag_init);
+module_exit(nf_defrag_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 6c6a3cba8d50..2ac9eaf1a8c9 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly;
 /* Calculated at init based on memory size */
 static unsigned int nf_nat_htable_size __read_mostly;
-static int nf_nat_vmalloced;
-static struct hlist_head *bysource __read_mostly;
 #define MAX_IP_NAT_PROTO 256
 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
@@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct,
 /* Only called for SRC manip */
 static int
-find_appropriate_src(const struct nf_conntrack_tuple *tuple,
+find_appropriate_src(struct net *net,
+                     const struct nf_conntrack_tuple *tuple,
                     struct nf_conntrack_tuple *result,
                     const struct nf_nat_range *range)
 {
@@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
        const struct hlist_node *n;
        rcu_read_lock();
-        hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) {
+        hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
                ct = nat->ct;
                if (same_src(ct, tuple)) {
                        /* Copy source part from reply tuple. */
@@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
                 struct nf_conn *ct,
                 enum nf_nat_manip_type maniptype)
 {
+        struct net *net = nf_ct_net(ct);
        const struct nf_nat_protocol *proto;
        /* 1) If this srcip/proto/src-proto-part is currently mapped,
@@ -242,7 +241,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
           manips not an issue.  */
        if (maniptype == IP_NAT_MANIP_SRC &&
            !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-                if (find_appropriate_src(orig_tuple, tuple, range)) {
+                if (find_appropriate_src(net, orig_tuple, tuple, range)) {
                        pr_debug("get_unique_tuple: Found current src map\n");
                        if (!nf_nat_used_tuple(tuple, ct))
                                return;
@@ -283,6 +282,7 @@ nf_nat_setup_info(struct nf_conn *ct,
                  const struct nf_nat_range *range,
                  enum nf_nat_manip_type maniptype)
 {
+        struct net *net = nf_ct_net(ct);
        struct nf_conntrack_tuple curr_tuple, new_tuple;
        struct nf_conn_nat *nat;
        int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
@@ -334,7 +334,8 @@ nf_nat_setup_info(struct nf_conn *ct,
                /* nf_conntrack_alter_reply might re-allocate exntension aera */
                nat = nfct_nat(ct);
                nat->ct = ct;
-                hlist_add_head_rcu(&nat->bysource, &bysource[srchash]);
+                hlist_add_head_rcu(&nat->bysource,
+                                   &net->ipv4.nat_bysource[srchash]);
                spin_unlock_bh(&nf_nat_lock);
        }
@@ -583,6 +584,40 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
        .flags          = NF_CT_EXT_F_PREALLOC,
 };
+static int __net_init nf_nat_net_init(struct net *net)
+{
+        net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
+                                                      &net->ipv4.nat_vmalloced);
+        if (!net->ipv4.nat_bysource)
+                return -ENOMEM;
+        return 0;
+}
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int clean_nat(struct nf_conn *i, void *data)
+{
+        struct nf_conn_nat *nat = nfct_nat(i);
+        if (!nat)
+                return 0;
+        memset(nat, 0, sizeof(*nat));
+        i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
+        return 0;
+}
+static void __net_exit nf_nat_net_exit(struct net *net)
+{
+        nf_ct_iterate_cleanup(net, &clean_nat, NULL);
+        synchronize_rcu();
+        nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
+                             nf_nat_htable_size);
+}
+static struct pernet_operations nf_nat_net_ops = {
+        .init = nf_nat_net_init,
+        .exit = nf_nat_net_exit,
+};
 static int __init nf_nat_init(void)
 {
        size_t i;
@@ -599,12 +634,9 @@ static int __init nf_nat_init(void)
        /* Leave them the same for the moment. */
        nf_nat_htable_size = nf_conntrack_htable_size;
-        bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
+        ret = register_pernet_subsys(&nf_nat_net_ops);
-                                         &nf_nat_vmalloced);
+        if (ret < 0)
-        if (!bysource) {
-                ret = -ENOMEM;
                goto cleanup_extend;
-        }
        /* Sew in builtin protocols. */
        spin_lock_bh(&nf_nat_lock);
@@ -629,23 +661,9 @@ static int __init nf_nat_init(void)
        return ret;
 }
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct nf_conn *i, void *data)
-{
-        struct nf_conn_nat *nat = nfct_nat(i);
-        if (!nat)
-                return 0;
-        memset(nat, 0, sizeof(*nat));
-        i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
-        return 0;
-}
 static void __exit nf_nat_cleanup(void)
 {
-        nf_ct_iterate_cleanup(&clean_nat, NULL);
+        unregister_pernet_subsys(&nf_nat_net_ops);
-        synchronize_rcu();
-        nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size);
        nf_ct_l3proto_put(l3proto);
        nf_ct_extend_unregister(&nat_extend);
        rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 11976ea29884..cf7a42bf9820 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -16,6 +16,7 @@
 #include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
+#include <net/route.h>
 #include <linux/netfilter_ipv4.h>
 #include <net/netfilter/nf_conntrack.h>
@@ -192,7 +193,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
                nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
                                        ct, CTINFO2DIR(ctinfo));
-                nf_conntrack_event_cache(IPCT_NATSEQADJ, skb);
+                nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
        }
        return 1;
 }
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index da3d91a5ef5c..9eb171056c63 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -40,6 +40,7 @@ MODULE_ALIAS("ip_nat_pptp");
 static void pptp_nat_expected(struct nf_conn *ct,
                              struct nf_conntrack_expect *exp)
 {
+        struct net *net = nf_ct_net(ct);
        const struct nf_conn *master = ct->master;
        struct nf_conntrack_expect *other_exp;
        struct nf_conntrack_tuple t;
@@ -73,7 +74,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
        pr_debug("trying to unexpect other dir: ");
        nf_ct_dump_tuple_ip(&t);
-        other_exp = nf_ct_expect_find_get(&t);
+        other_exp = nf_ct_expect_find_get(net, &t);
        if (other_exp) {
                nf_ct_unexpect_related(other_exp);
                nf_ct_expect_put(other_exp);
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index e8b4d0d4439e..bea54a685109 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -33,7 +33,7 @@ static struct
        struct ipt_replace repl;
        struct ipt_standard entries[3];
        struct ipt_error term;
-} nat_initial_table __initdata = {
+} nat_initial_table __net_initdata = {
        .repl = {
                .name = "nat",
                .valid_hooks = NAT_VALID_HOOKS,
@@ -58,47 +58,42 @@ static struct
        .term = IPT_ERROR_INIT,                 /* ERROR */
 };
-static struct xt_table __nat_table = {
+static struct xt_table nat_table = {
        .name           = "nat",
        .valid_hooks    = NAT_VALID_HOOKS,
        .lock           = __RW_LOCK_UNLOCKED(__nat_table.lock),
        .me             = THIS_MODULE,
        .af             = AF_INET,
 };
-static struct xt_table *nat_table;
 /* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff *skb,
+static unsigned int
-                                    const struct net_device *in,
+ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
-                                    const struct net_device *out,
-                                    unsigned int hooknum,
-                                    const struct xt_target *target,
-                                    const void *targinfo)
 {
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
-        const struct nf_nat_multi_range_compat *mr = targinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
-        NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+        NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
        ct = nf_ct_get(skb, &ctinfo);
        /* Connection must be valid and new. */
        NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
                            ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
-        NF_CT_ASSERT(out);
+        NF_CT_ASSERT(par->out != NULL);
        return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
 }
 /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
+static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip)
 {
        static int warned = 0;
        struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
        struct rtable *rt;
-        if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+        if (ip_route_output_key(net, &rt, &fl) != 0)
                return;
        if (rt->rt_src != srcip && !warned) {
@@ -110,40 +105,32 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
        ip_rt_put(rt);
 }
-static unsigned int ipt_dnat_target(struct sk_buff *skb,
+static unsigned int
-                                    const struct net_device *in,
+ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
-                                    const struct net_device *out,
-                                    unsigned int hooknum,
-                                    const struct xt_target *target,
-                                    const void *targinfo)
 {
        struct nf_conn *ct;
        enum ip_conntrack_info ctinfo;
-        const struct nf_nat_multi_range_compat *mr = targinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
-        NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
+        NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
-                     hooknum == NF_INET_LOCAL_OUT);
+                     par->hooknum == NF_INET_LOCAL_OUT);
        ct = nf_ct_get(skb, &ctinfo);
        /* Connection must be valid and new. */
        NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-        if (hooknum == NF_INET_LOCAL_OUT &&
+        if (par->hooknum == NF_INET_LOCAL_OUT &&
            mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-                warn_if_extra_mangle(ip_hdr(skb)->daddr,
+                warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr,
                                     mr->range[0].min_ip);
        return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
 }
-static bool ipt_snat_checkentry(const char *tablename,
+static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
-                                const void *entry,
-                                const struct xt_target *target,
-                                void *targinfo,
-                                unsigned int hook_mask)
 {
-        const struct nf_nat_multi_range_compat *mr = targinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
        /* Must be a valid range */
        if (mr->rangesize != 1) {
@@ -153,13 +140,9 @@ static bool ipt_snat_checkentry(const char *tablename,
        return true;
 }
-static bool ipt_dnat_checkentry(const char *tablename,
+static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par)
-                                const void *entry,
-                                const struct xt_target *target,
-                                void *targinfo,
-                                unsigned int hook_mask)
 {
-        const struct nf_nat_multi_range_compat *mr = targinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
        /* Must be a valid range */
        if (mr->rangesize != 1) {
@@ -194,9 +177,10 @@ int nf_nat_rule_find(struct sk_buff *skb,
                     const struct net_device *out,
                     struct nf_conn *ct)
 {
+        struct net *net = nf_ct_net(ct);
        int ret;
-        ret = ipt_do_table(skb, hooknum, in, out, nat_table);
+        ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
        if (ret == NF_ACCEPT) {
                if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -226,14 +210,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
        .family         = AF_INET,
 };
+static int __net_init nf_nat_rule_net_init(struct net *net)
+{
+        net->ipv4.nat_table = ipt_register_table(net, &nat_table,
+                                                 &nat_initial_table.repl);
+        if (IS_ERR(net->ipv4.nat_table))
+                return PTR_ERR(net->ipv4.nat_table);
+        return 0;
+}
+static void __net_exit nf_nat_rule_net_exit(struct net *net)
+{
+        ipt_unregister_table(net->ipv4.nat_table);
+}
+static struct pernet_operations nf_nat_rule_net_ops = {
+        .init = nf_nat_rule_net_init,
+        .exit = nf_nat_rule_net_exit,
+};
 int __init nf_nat_rule_init(void)
 {
        int ret;
-        nat_table = ipt_register_table(&init_net, &__nat_table,
+        ret = register_pernet_subsys(&nf_nat_rule_net_ops);
-                                       &nat_initial_table.repl);
+        if (ret != 0)
-        if (IS_ERR(nat_table))
+                goto out;
-                return PTR_ERR(nat_table);
        ret = xt_register_target(&ipt_snat_reg);
        if (ret != 0)
                goto unregister_table;
@@ -247,8 +249,8 @@ int __init nf_nat_rule_init(void)
 unregister_snat:
        xt_unregister_target(&ipt_snat_reg);
 unregister_table:
-        ipt_unregister_table(nat_table);
+        unregister_pernet_subsys(&nf_nat_rule_net_ops);
+ out:
        return ret;
 }
@@ -256,5 +258,5 @@ void nf_nat_rule_cleanup(void)
 {
        xt_unregister_target(&ipt_dnat_reg);
        xt_unregister_target(&ipt_snat_reg);
-        ipt_unregister_table(nat_table);
+        unregister_pernet_subsys(&nf_nat_rule_net_ops);
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6ee5354c9aa1..a6d7c584f53b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -282,6 +282,8 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
        struct rtable *r = NULL;
        for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
+                if (!rt_hash_table[st->bucket].chain)
+                        continue;
                rcu_read_lock_bh();
                r = rcu_dereference(rt_hash_table[st->bucket].chain);
                while (r) {
@@ -299,11 +301,14 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
                                          struct rtable *r)
 {
        struct rt_cache_iter_state *st = seq->private;
        r = r->u.dst.rt_next;
        while (!r) {
                rcu_read_unlock_bh();
-                if (--st->bucket < 0)
+                do {
-                        break;
+                        if (--st->bucket < 0)
+                                return NULL;
+                } while (!rt_hash_table[st->bucket].chain);
                rcu_read_lock_bh();
                r = rt_hash_table[st->bucket].chain;
        }
@@ -2356,11 +2361,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                    ipv4_is_zeronet(oldflp->fl4_src))
                        goto out;
-                /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-                dev_out = ip_dev_find(net, oldflp->fl4_src);
-                if (dev_out == NULL)
-                        goto out;
                /* I removed check for oif == dev_out->oif here.
                   It was wrong for two reasons:
                   1. ip_dev_find(net, saddr) can return wrong iface, if saddr
@@ -2372,6 +2372,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                if (oldflp->oif == 0
                    && (ipv4_is_multicast(oldflp->fl4_dst) ||
                        oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
+                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+                        dev_out = ip_dev_find(net, oldflp->fl4_src);
+                        if (dev_out == NULL)
+                                goto out;
                        /* Special hack: user can direct multicasts
                           and limited broadcast via necessary interface
                           without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
@@ -2390,9 +2395,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                        fl.oif = dev_out->ifindex;
                        goto make_route;
                }
-                if (dev_out)
+                if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
+                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+                        dev_out = ip_dev_find(net, oldflp->fl4_src);
+                        if (dev_out == NULL)
+                                goto out;
                        dev_put(dev_out);
-                dev_out = NULL;
+                        dev_out = NULL;
+                }
        }
@@ -2840,7 +2851,9 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
        if (s_h < 0)
                s_h = 0;
        s_idx = idx = cb->args[1];
-        for (h = s_h; h <= rt_hash_mask; h++) {
+        for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) {
+                if (!rt_hash_table[h].chain)
+                        continue;
                rcu_read_lock_bh();
                for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
                     rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
@@ -2859,7 +2872,6 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
                        dst_release(xchg(&skb->dst, NULL));
                }
                rcu_read_unlock_bh();
-                s_idx = 0;
        }
 done:
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 9d38005abbac..d346c22aa6ae 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -16,6 +16,7 @@
 #include <linux/cryptohash.h>
 #include <linux/kernel.h>
 #include <net/tcp.h>
+#include <net/route.h>
 /* Timestamps: lowest 9 bits store TCP options */
 #define TSBITS 9
@@ -296,6 +297,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
        treq->rcv_isn           = ntohl(th->seq) - 1;
        treq->snt_isn           = cookie;
        req->mss                = mss;
+        ireq->loc_port          = th->dest;
        ireq->rmt_port          = th->source;
        ireq->loc_addr          = ip_hdr(skb)->daddr;
        ireq->rmt_addr          = ip_hdr(skb)->saddr;
@@ -337,6 +339,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
                                                .saddr = ireq->loc_addr,
                                                .tos = RT_CONN_FLAGS(sk) } },
                                    .proto = IPPROTO_TCP,
+                                    .flags = inet_sk_flowi_flags(sk),
                                    .uli_u = { .ports =
                                               { .sport = th->dest,
                                                 .dport = th->source } } };
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e0689fd7b798..276d047fb85a 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,16 +26,13 @@ static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
-extern seqlock_t sysctl_port_range_lock;
-extern int sysctl_local_port_range[2];
 /* Update system visible IP port range */
 static void set_local_port_range(int range[2])
 {
-        write_seqlock(&sysctl_port_range_lock);
+        write_seqlock(&sysctl_local_ports.lock);
-        sysctl_local_port_range[0] = range[0];
+        sysctl_local_ports.range[0] = range[0];
-        sysctl_local_port_range[1] = range[1];
+        sysctl_local_ports.range[1] = range[1];
-        write_sequnlock(&sysctl_port_range_lock);
+        write_sequnlock(&sysctl_local_ports.lock);
 }
 /* Validate changes from /proc interface. */
@@ -44,8 +41,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
                                 size_t *lenp, loff_t *ppos)
 {
        int ret;
-        int range[2] = { sysctl_local_port_range[0],
+        int range[2];
-                         sysctl_local_port_range[1] };
        ctl_table tmp = {
                .data = &range,
                .maxlen = sizeof(range),
@@ -54,6 +50,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
                .extra2 = &ip_local_port_range_max,
        };
+        inet_get_local_port_range(range, range + 1);
        ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
        if (write && ret == 0) {
@@ -73,8 +70,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
                                        void __user *newval, size_t newlen)
 {
        int ret;
-        int range[2] = { sysctl_local_port_range[0],
+        int range[2];
-                         sysctl_local_port_range[1] };
        ctl_table tmp = {
                .data = &range,
                .maxlen = sizeof(range),
@@ -83,6 +79,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
                .extra2 = &ip_local_port_range_max,
        };
+        inet_get_local_port_range(range, range + 1);
        ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
        if (ret == 0 && newval && newlen) {
                if (range[1] < range[0])
@@ -396,8 +393,8 @@ static struct ctl_table ipv4_table[] = {
        {
                .ctl_name       = NET_IPV4_LOCAL_PORT_RANGE,
                .procname       = "ip_local_port_range",
-                .data           = &sysctl_local_port_range,
+                .data           = &sysctl_local_ports.range,
-                .maxlen         = sizeof(sysctl_local_port_range),
+                .maxlen         = sizeof(sysctl_local_ports.range),
                .mode           = 0644,
                .proc_handler   = &ipv4_local_port_range,
                .strategy       = &ipv4_sysctl_local_port_range,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1ab341e5d3e0..eccb7165a80c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -384,13 +384,17 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
        /* Connected? */
        if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+                int target = sock_rcvlowat(sk, 0, INT_MAX);
+                if (tp->urg_seq == tp->copied_seq &&
+                    !sock_flag(sk, SOCK_URGINLINE) &&
+                    tp->urg_data)
+                        target--;
                /* Potential race condition. If read of tp below will
                 * escape above sk->sk_state, we can be illegally awaken
                 * in SYN_* states. */
-                if ((tp->rcv_nxt != tp->copied_seq) &&
+                if (tp->rcv_nxt - tp->copied_seq >= target)
-                    (tp->urg_seq != tp->copied_seq ||
-                     tp->rcv_nxt != tp->copied_seq + 1 ||
-                     sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
                        mask |= POLLIN | POLLRDNORM;
                if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
@@ -493,10 +497,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
                                struct sk_buff *skb)
 {
-        if (flags & MSG_OOB) {
+        if (flags & MSG_OOB)
-                tp->urg_mode = 1;
                tp->snd_up = tp->write_seq;
-        }
 }
 static inline void tcp_push(struct sock *sk, int flags, int mss_now,
@@ -1157,7 +1159,7 @@ static void tcp_prequeue_process(struct sock *sk)
         * necessary */
        local_bh_disable();
        while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-                sk->sk_backlog_rcv(sk, skb);
+                sk_backlog_rcv(sk, skb);
        local_bh_enable();
        /* Clear memory counter. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7abc6b80d47d..d77c0d29e239 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -979,6 +979,39 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
        }
 }
+/* This must be called before lost_out is incremented */
+static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
+{
+        if ((tp->retransmit_skb_hint == NULL) ||
+            before(TCP_SKB_CB(skb)->seq,
+                   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+                tp->retransmit_skb_hint = skb;
+        if (!tp->lost_out ||
+            after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high))
+                tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
+}
+static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
+{
+        if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
+                tcp_verify_retransmit_hint(tp, skb);
+                tp->lost_out += tcp_skb_pcount(skb);
+                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+        }
+}
+void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
+{
+        tcp_verify_retransmit_hint(tp, skb);
+        if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
+                tp->lost_out += tcp_skb_pcount(skb);
+                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+        }
+}
 /* This procedure tags the retransmission queue when SACKs arrive.
 *
 * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -1155,13 +1188,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                        tp->retrans_out -= tcp_skb_pcount(skb);
-                        /* clear lost hint */
+                        tcp_skb_mark_lost_uncond_verify(tp, skb);
-                        tp->retransmit_skb_hint = NULL;
-                        if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
-                                tp->lost_out += tcp_skb_pcount(skb);
-                                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                        }
                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
                } else {
                        if (before(ack_seq, new_low_seq))
@@ -1271,9 +1298,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
                                        ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
                                tp->lost_out -= tcp_skb_pcount(skb);
                                tp->retrans_out -= tcp_skb_pcount(skb);
-                                /* clear lost hint */
-                                tp->retransmit_skb_hint = NULL;
                        }
                } else {
                        if (!(sacked & TCPCB_RETRANS)) {
@@ -1292,9 +1316,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
                        if (sacked & TCPCB_LOST) {
                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                                tp->lost_out -= tcp_skb_pcount(skb);
-                                /* clear lost hint */
-                                tp->retransmit_skb_hint = NULL;
                        }
                }
@@ -1324,7 +1345,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
        if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) {
                TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                tp->retrans_out -= tcp_skb_pcount(skb);
-                tp->retransmit_skb_hint = NULL;
        }
        return flag;
@@ -1726,6 +1746,8 @@ int tcp_use_frto(struct sock *sk)
                return 0;
        skb = tcp_write_queue_head(sk);
+        if (tcp_skb_is_last(sk, skb))
+                return 1;
        skb = tcp_write_queue_next(sk, skb);    /* Skips head */
        tcp_for_write_queue_from(skb, sk) {
                if (skb == tcp_send_head(sk))
@@ -1867,6 +1889,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
                if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out += tcp_skb_pcount(skb);
+                        tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
                }
        }
        tcp_verify_left_out(tp);
@@ -1883,7 +1906,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
        tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
-        tcp_clear_retrans_hints_partial(tp);
+        tcp_clear_all_retrans_hints(tp);
 }
 static void tcp_clear_retrans_partial(struct tcp_sock *tp)
@@ -1934,12 +1957,11 @@ void tcp_enter_loss(struct sock *sk, int how)
                /* Push undo marker, if it was plain RTO and nothing
                 * was retransmitted. */
                tp->undo_marker = tp->snd_una;
-                tcp_clear_retrans_hints_partial(tp);
        } else {
                tp->sacked_out = 0;
                tp->fackets_out = 0;
-                tcp_clear_all_retrans_hints(tp);
        }
+        tcp_clear_all_retrans_hints(tp);
        tcp_for_write_queue(skb, sk) {
                if (skb == tcp_send_head(sk))
@@ -1952,6 +1974,7 @@ void tcp_enter_loss(struct sock *sk, int how)
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out += tcp_skb_pcount(skb);
+                        tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
                }
        }
        tcp_verify_left_out(tp);
@@ -2157,19 +2180,6 @@ static int tcp_time_to_recover(struct sock *sk)
        return 0;
 }
-/* RFC: This is from the original, I doubt that this is necessary at all:
- * clear xmit_retrans hint if seq of this skb is beyond hint. How could we
- * retransmitted past LOST markings in the first place? I'm not fully sure
- * about undo and end of connection cases, which can cause R without L?
- */
-static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
-{
-        if ((tp->retransmit_skb_hint != NULL) &&
-            before(TCP_SKB_CB(skb)->seq,
-                   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
-                tp->retransmit_skb_hint = NULL;
-}
 /* Mark head of queue up as lost. With RFC3517 SACK, the packets is
 * is against sacked "cnt", otherwise it's against facked "cnt"
 */
@@ -2217,11 +2227,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
                        cnt = packets;
                }
-                if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
+                tcp_skb_mark_lost(tp, skb);
-                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                        tp->lost_out += tcp_skb_pcount(skb);
-                        tcp_verify_retransmit_hint(tp, skb);
-                }
        }
        tcp_verify_left_out(tp);
 }
@@ -2263,11 +2269,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
                        if (!tcp_skb_timedout(sk, skb))
                                break;
-                        if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
+                        tcp_skb_mark_lost(tp, skb);
-                                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                                tp->lost_out += tcp_skb_pcount(skb);
-                                tcp_verify_retransmit_hint(tp, skb);
-                        }
                }
                tp->scoreboard_skb_hint = skb;
@@ -2378,10 +2380,6 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
        }
        tcp_moderate_cwnd(tp);
        tp->snd_cwnd_stamp = tcp_time_stamp;
-        /* There is something screwy going on with the retrans hints after
-           an undo */
-        tcp_clear_all_retrans_hints(tp);
 }
 static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -2838,7 +2836,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
 * is before the ack sequence we can discard it as it's confirmed to have
 * arrived at the other end.
 */
-static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
+static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
+                               u32 prior_snd_una)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2848,6 +2847,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
        int flag = 0;
        u32 pkts_acked = 0;
        u32 reord = tp->packets_out;
+        u32 prior_sacked = tp->sacked_out;
        s32 seq_rtt = -1;
        s32 ca_seq_rtt = -1;
        ktime_t last_ackt = net_invalid_timestamp();
@@ -2904,9 +2904,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
                if (sacked & TCPCB_LOST)
                        tp->lost_out -= acked_pcount;
-                if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up)))
-                        tp->urg_mode = 0;
                tp->packets_out -= acked_pcount;
                pkts_acked += acked_pcount;
@@ -2929,9 +2926,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
                tcp_unlink_write_queue(skb, sk);
                sk_wmem_free_skb(sk, skb);
-                tcp_clear_all_retrans_hints(tp);
+                tp->scoreboard_skb_hint = NULL;
+                if (skb == tp->retransmit_skb_hint)
+                        tp->retransmit_skb_hint = NULL;
+                if (skb == tp->lost_skb_hint)
+                        tp->lost_skb_hint = NULL;
        }
+        if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
+                tp->snd_up = tp->snd_una;
        if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
                flag |= FLAG_SACK_RENEGING;
@@ -2948,6 +2952,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
                        /* Non-retransmitted hole got filled? That's reordering */
                        if (reord < prior_fackets)
                                tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+                        /* No need to care for underflows here because
+                         * the lost_skb_hint gets NULLed if we're past it
+                         * (or something non-trivial happened)
+                         */
+                        if (tcp_is_fack(tp))
+                                tp->lost_cnt_hint -= pkts_acked;
+                        else
+                                tp->lost_cnt_hint -= prior_sacked - tp->sacked_out;
                }
                tp->fackets_out -= min(pkts_acked, tp->fackets_out);
@@ -3299,7 +3312,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
                goto no_queue;
        /* See if we can take anything off of the retransmit queue. */
-        flag |= tcp_clean_rtx_queue(sk, prior_fackets);
+        flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
        if (tp->frto_counter)
                frto_cwnd = tcp_process_frto(sk, flag);
@@ -3442,6 +3455,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
        }
 }
+static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
+{
+        __be32 *ptr = (__be32 *)(th + 1);
+        if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+                          | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+                tp->rx_opt.saw_tstamp = 1;
+                ++ptr;
+                tp->rx_opt.rcv_tsval = ntohl(*ptr);
+                ++ptr;
+                tp->rx_opt.rcv_tsecr = ntohl(*ptr);
+                return 1;
+        }
+        return 0;
+}
 /* Fast parse options. This hopes to only see timestamps.
 * If it is wrong it falls back on tcp_parse_options().
 */
@@ -3453,16 +3482,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
                return 0;
        } else if (tp->rx_opt.tstamp_ok &&
                   th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
-                __be32 *ptr = (__be32 *)(th + 1);
+                if (tcp_parse_aligned_timestamp(tp, th))
-                if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
-                                  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
-                        tp->rx_opt.saw_tstamp = 1;
-                        ++ptr;
-                        tp->rx_opt.rcv_tsval = ntohl(*ptr);
-                        ++ptr;
-                        tp->rx_opt.rcv_tsecr = ntohl(*ptr);
                        return 1;
-                }
        }
        tcp_parse_options(skb, &tp->rx_opt, 1);
        return 1;
@@ -4138,7 +4159,7 @@ drop:
                                skb1 = skb1->prev;
                        }
                }
-                __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
+                __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
                /* And clean segments covered by new one as whole. */
                while ((skb1 = skb->next) !=
@@ -4161,6 +4182,18 @@ add_sack:
        }
 }
+static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
+                                        struct sk_buff_head *list)
+{
+        struct sk_buff *next = skb->next;
+        __skb_unlink(skb, list);
+        __kfree_skb(skb);
+        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
+        return next;
+}
 /* Collapse contiguous sequence of skbs head..tail with
 * sequence numbers start..end.
 * Segments with FIN/SYN are not collapsed (only because this
@@ -4178,11 +4211,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
        for (skb = head; skb != tail;) {
                /* No new bits? It is possible on ofo queue. */
                if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-                        struct sk_buff *next = skb->next;
+                        skb = tcp_collapse_one(sk, skb, list);
-                        __skb_unlink(skb, list);
-                        __kfree_skb(skb);
-                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
-                        skb = next;
                        continue;
                }
@@ -4228,7 +4257,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
                memcpy(nskb->head, skb->head, header);
                memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
                TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
-                __skb_insert(nskb, skb->prev, skb, list);
+                __skb_queue_before(list, skb, nskb);
                skb_set_owner_r(nskb, sk);
                /* Copy data, releasing collapsed skbs. */
@@ -4246,11 +4275,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
                                start += size;
                        }
                        if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-                                struct sk_buff *next = skb->next;
+                                skb = tcp_collapse_one(sk, skb, list);
-                                __skb_unlink(skb, list);
-                                __kfree_skb(skb);
-                                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
-                                skb = next;
                                if (skb == tail ||
                                    tcp_hdr(skb)->syn ||
                                    tcp_hdr(skb)->fin)
@@ -4436,8 +4461,8 @@ static void tcp_new_space(struct sock *sk)
        if (tcp_should_expand_sndbuf(sk)) {
                int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
-                        MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
+                        MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
-                    demanded = max_t(unsigned int, tp->snd_cwnd,
+                int demanded = max_t(unsigned int, tp->snd_cwnd,
                                     tp->reordering + 1);
                sndmem *= 2 * demanded;
                if (sndmem > sk->sk_sndbuf)
@@ -4691,6 +4716,67 @@ out:
 }
 #endif /* CONFIG_NET_DMA */
+/* Does PAWS and seqno based validation of an incoming segment, flags will
+ * play significant role here.
+ */
+static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
+                              struct tcphdr *th, int syn_inerr)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        /* RFC1323: H1. Apply PAWS check first. */
+        if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+            tcp_paws_discard(sk, skb)) {
+                if (!th->rst) {
+                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+                        tcp_send_dupack(sk, skb);
+                        goto discard;
+                }
+                /* Reset is accepted even if it did not pass PAWS. */
+        }
+        /* Step 1: check sequence number */
+        if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+                /* RFC793, page 37: "In all states except SYN-SENT, all reset
+                 * (RST) segments are validated by checking their SEQ-fields."
+                 * And page 69: "If an incoming segment is not acceptable,
+                 * an acknowledgment should be sent in reply (unless the RST
+                 * bit is set, if so drop the segment and return)".
+                 */
+                if (!th->rst)
+                        tcp_send_dupack(sk, skb);
+                goto discard;
+        }
+        /* Step 2: check RST bit */
+        if (th->rst) {
+                tcp_reset(sk);
+                goto discard;
+        }
+        /* ts_recent update must be made after we are sure that the packet
+         * is in window.
+         */
+        tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
+        /* step 3: check security and precedence [ignored] */
+        /* step 4: Check for a SYN in window. */
+        if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+                if (syn_inerr)
+                        TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
+                tcp_reset(sk);
+                return -1;
+        }
+        return 1;
+discard:
+        __kfree_skb(skb);
+        return 0;
+}
 /*
 *      TCP receive function for the ESTABLISHED state.
 *
@@ -4718,6 +4804,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                        struct tcphdr *th, unsigned len)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+        int res;
        /*
         *      Header prediction.
@@ -4756,19 +4843,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                /* Check timestamp */
                if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
-                        __be32 *ptr = (__be32 *)(th + 1);
                        /* No? Slow path! */
-                        if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+                        if (!tcp_parse_aligned_timestamp(tp, th))
-                                          | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
                                goto slow_path;
-                        tp->rx_opt.saw_tstamp = 1;
-                        ++ptr;
-                        tp->rx_opt.rcv_tsval = ntohl(*ptr);
-                        ++ptr;
-                        tp->rx_opt.rcv_tsecr = ntohl(*ptr);
                        /* If PAWS failed, check it more carefully in slow path */
                        if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
                                goto slow_path;
@@ -4900,51 +4978,12 @@ slow_path:
                goto csum_error;
        /*
-         * RFC1323: H1. Apply PAWS check first.
-         */
-        if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-            tcp_paws_discard(sk, skb)) {
-                if (!th->rst) {
-                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
-                        tcp_send_dupack(sk, skb);
-                        goto discard;
-                }
-                /* Resets are accepted even if PAWS failed.
-                   ts_recent update must be made after we are sure
-                   that the packet is in window.
-                 */
-        }
-        /*
         *      Standard slow path.
         */
-        if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+        res = tcp_validate_incoming(sk, skb, th, 1);
-                /* RFC793, page 37: "In all states except SYN-SENT, all reset
+        if (res <= 0)
-                 * (RST) segments are validated by checking their SEQ-fields."
+                return -res;
-                 * And page 69: "If an incoming segment is not acceptable,
-                 * an acknowledgment should be sent in reply (unless the RST bit
-                 * is set, if so drop the segment and return)".
-                 */
-                if (!th->rst)
-                        tcp_send_dupack(sk, skb);
-                goto discard;
-        }
-        if (th->rst) {
-                tcp_reset(sk);
-                goto discard;
-        }
-        tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-        if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-                TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
-                tcp_reset(sk);
-                return 1;
-        }
 step5:
        if (th->ack)
@@ -5226,6 +5265,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
        int queued = 0;
+        int res;
        tp->rx_opt.saw_tstamp = 0;
@@ -5278,42 +5318,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                return 0;
        }
-        if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
+        res = tcp_validate_incoming(sk, skb, th, 0);
-            tcp_paws_discard(sk, skb)) {
+        if (res <= 0)
-                if (!th->rst) {
+                return -res;
-                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
-                        tcp_send_dupack(sk, skb);
-                        goto discard;
-                }
-                /* Reset is accepted even if it did not pass PAWS. */
-        }
-        /* step 1: check sequence number */
-        if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
-                if (!th->rst)
-                        tcp_send_dupack(sk, skb);
-                goto discard;
-        }
-        /* step 2: check RST bit */
-        if (th->rst) {
-                tcp_reset(sk);
-                goto discard;
-        }
-        tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
-        /* step 3: check security and precedence [ignored] */
-        /*      step 4:
-         *
-         *      Check for a SYN in window.
-         */
-        if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
-                tcp_reset(sk);
-                return 1;
-        }
        /* step 5: check the ACK field */
        if (th->ack) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 011478e46c40..5c8fa7f1e327 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -583,14 +583,15 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
                rep.th.doff = arg.iov[0].iov_len / 4;
                tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
-                                     key, ip_hdr(skb)->daddr,
+                                     key, ip_hdr(skb)->saddr,
-                                     ip_hdr(skb)->saddr, &rep.th);
+                                     ip_hdr(skb)->daddr, &rep.th);
        }
 #endif
        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
                                      ip_hdr(skb)->saddr, /* XXX */
-                                      sizeof(struct tcphdr), IPPROTO_TCP, 0);
+                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
        arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+        arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
        net = dev_net(skb->dst->dev);
        ip_send_reply(net->ipv4.tcp_sock, skb,
@@ -606,7 +607,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
                            u32 win, u32 ts, int oif,
-                            struct tcp_md5sig_key *key)
+                            struct tcp_md5sig_key *key,
+                            int reply_flags)
 {
        struct tcphdr *th = tcp_hdr(skb);
        struct {
@@ -659,6 +661,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
                                    ip_hdr(skb)->daddr, &rep.th);
        }
 #endif
+        arg.flags = reply_flags;
        arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
                                      ip_hdr(skb)->saddr, /* XXX */
                                      arg.iov[0].iov_len, IPPROTO_TCP, 0);
@@ -681,7 +684,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
                        tcptw->tw_ts_recent,
                        tw->tw_bound_dev_if,
-                        tcp_twsk_md5_key(tcptw)
+                        tcp_twsk_md5_key(tcptw),
+                        tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
                        );
        inet_twsk_put(tw);
@@ -694,7 +698,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
                        tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
                        req->ts_recent,
                        0,
-                        tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr));
+                        tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
+                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
 }
 /*
@@ -1244,6 +1249,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        ireq = inet_rsk(req);
        ireq->loc_addr = daddr;
        ireq->rmt_addr = saddr;
+        ireq->no_srccheck = inet_sk(sk)->transparent;
        ireq->opt = tcp_v4_save_options(sk, skb);
        if (!want_cookie)
                TCP_ECN_create_request(req, tcp_hdr(skb));
@@ -1364,6 +1370,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        tcp_mtup_init(newsk);
        tcp_sync_mss(newsk, dst_mtu(dst));
        newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+        if (tcp_sk(sk)->rx_opt.user_mss &&
+            tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
+                newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
        tcp_initialize_rcv_mss(newsk);
 #ifdef CONFIG_TCP_MD5SIG
@@ -1567,8 +1577,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
        TCP_SKB_CB(skb)->flags   = iph->tos;
        TCP_SKB_CB(skb)->sacked  = 0;
-        sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr,
+        sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
-                        th->source, iph->daddr, th->dest, inet_iif(skb));
        if (!sk)
                goto no_tcp_socket;
@@ -1946,6 +1955,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
        return rc;
 }
+static inline int empty_bucket(struct tcp_iter_state *st)
+{
+        return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
+                hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
+}
 static void *established_get_first(struct seq_file *seq)
 {
        struct tcp_iter_state* st = seq->private;
@@ -1958,6 +1973,10 @@ static void *established_get_first(struct seq_file *seq)
                struct inet_timewait_sock *tw;
                rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
+                /* Lockless fast path for the common case of empty buckets */
+                if (empty_bucket(st))
+                        continue;
                read_lock_bh(lock);
                sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
                        if (sk->sk_family != st->family ||
@@ -2008,13 +2027,15 @@ get_tw:
                read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
                st->state = TCP_SEQ_STATE_ESTABLISHED;
-                if (++st->bucket < tcp_hashinfo.ehash_size) {
+                /* Look for next non empty bucket */
-                        read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+                while (++st->bucket < tcp_hashinfo.ehash_size &&
-                        sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
+                                empty_bucket(st))
-                } else {
+                        ;
-                        cur = NULL;
+                if (st->bucket >= tcp_hashinfo.ehash_size)
-                        goto out;
+                        return NULL;
-                }
+                read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+                sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
        } else
                sk = sk_next(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f976fc57892c..779f2e9d0689 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -395,6 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                newtp->pred_flags = 0;
                newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
                newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
+                newtp->snd_up = treq->snt_isn + 1;
                tcp_prequeue_init(newtp);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8165f5aa8c71..990a58493235 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -345,6 +345,11 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
        TCP_SKB_CB(skb)->end_seq = seq;
 }
+static inline int tcp_urg_mode(const struct tcp_sock *tp)
+{
+        return tp->snd_una != tp->snd_up;
+}
 #define OPTION_SACK_ADVERTISE   (1 << 0)
 #define OPTION_TS               (1 << 1)
 #define OPTION_MD5              (1 << 2)
@@ -646,7 +651,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
        th->check               = 0;
        th->urg_ptr             = 0;
-        if (unlikely(tp->urg_mode &&
+        /* The urg_mode check is necessary during a below snd_una win probe */
+        if (unlikely(tcp_urg_mode(tp) &&
                     between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
                th->urg_ptr             = htons(tp->snd_up - tcb->seq);
                th->urg                 = 1;
@@ -1012,7 +1018,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 /* Compute the current effective MSS, taking SACKs and IP options,
 * and even PMTU discovery events into account.
 *
- * LARGESEND note: !urg_mode is overkill, only frames up to snd_up
+ * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up
 * cannot be large. However, taking into account rare use of URG, this
 * is not a big flaw.
 */
@@ -1029,7 +1035,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
        mss_now = tp->mss_cache;
-        if (large_allowed && sk_can_gso(sk) && !tp->urg_mode)
+        if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp))
                doing_tso = 1;
        if (dst) {
@@ -1193,7 +1199,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
        /* Don't use the nagle rule for urgent data (or for the final FIN).
         * Nagle can be ignored during F-RTO too (see RFC4138).
         */
-        if (tp->urg_mode || (tp->frto_counter == 2) ||
+        if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
            (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
                return 1;
@@ -1824,6 +1830,8 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
        /* changed transmit queue under us so clear hints */
        tcp_clear_retrans_hints_partial(tp);
+        if (next_skb == tp->retransmit_skb_hint)
+                tp->retransmit_skb_hint = skb;
        sk_wmem_free_skb(sk, next_skb);
 }
@@ -1838,7 +1846,7 @@ void tcp_simple_retransmit(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        unsigned int mss = tcp_current_mss(sk, 0);
-        int lost = 0;
+        u32 prior_lost = tp->lost_out;
        tcp_for_write_queue(skb, sk) {
                if (skb == tcp_send_head(sk))
@@ -1849,17 +1857,13 @@ void tcp_simple_retransmit(struct sock *sk)
                                TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                                tp->retrans_out -= tcp_skb_pcount(skb);
                        }
-                        if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) {
+                        tcp_skb_mark_lost_uncond_verify(tp, skb);
-                                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
-                                tp->lost_out += tcp_skb_pcount(skb);
-                                lost = 1;
-                        }
                }
        }
-        tcp_clear_all_retrans_hints(tp);
+        tcp_clear_retrans_hints_partial(tp);
-        if (!lost)
+        if (prior_lost == tp->lost_out)
                return;
        if (tcp_is_reno(tp))
@@ -1934,8 +1938,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
        /* Collapse two adjacent packets if worthwhile and we can. */
        if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
            (skb->len < (cur_mss >> 1)) &&
-            (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
            (!tcp_skb_is_last(sk, skb)) &&
+            (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
            (skb_shinfo(skb)->nr_frags == 0 &&
             skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
            (tcp_skb_pcount(skb) == 1 &&
@@ -1996,86 +2000,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
        return err;
 }
-/* This gets called after a retransmit timeout, and the initially
+static int tcp_can_forward_retransmit(struct sock *sk)
- * retransmitted data is acknowledged.  It tries to continue
- * resending the rest of the retransmit queue, until either
- * we've sent it all or the congestion window limit is reached.
- * If doing SACK, the first ACK which comes back for a timeout
- * based retransmit packet might feed us FACK information again.
- * If so, we use it to avoid unnecessarily retransmissions.
- */
-void tcp_xmit_retransmit_queue(struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
-        struct sk_buff *skb;
-        int packet_cnt;
-        if (tp->retransmit_skb_hint) {
-                skb = tp->retransmit_skb_hint;
-                packet_cnt = tp->retransmit_cnt_hint;
-        } else {
-                skb = tcp_write_queue_head(sk);
-                packet_cnt = 0;
-        }
-        /* First pass: retransmit lost packets. */
-        if (tp->lost_out) {
-                tcp_for_write_queue_from(skb, sk) {
-                        __u8 sacked = TCP_SKB_CB(skb)->sacked;
-                        if (skb == tcp_send_head(sk))
-                                break;
-                        /* we could do better than to assign each time */
-                        tp->retransmit_skb_hint = skb;
-                        tp->retransmit_cnt_hint = packet_cnt;
-                        /* Assume this retransmit will generate
-                         * only one packet for congestion window
-                         * calculation purposes.  This works because
-                         * tcp_retransmit_skb() will chop up the
-                         * packet to be MSS sized and all the
-                         * packet counting works out.
-                         */
-                        if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
-                                return;
-                        if (sacked & TCPCB_LOST) {
-                                if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
-                                        int mib_idx;
-                                        if (tcp_retransmit_skb(sk, skb)) {
-                                                tp->retransmit_skb_hint = NULL;
-                                                return;
-                                        }
-                                        if (icsk->icsk_ca_state != TCP_CA_Loss)
-                                                mib_idx = LINUX_MIB_TCPFASTRETRANS;
-                                        else
-                                                mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
-                                        NET_INC_STATS_BH(sock_net(sk), mib_idx);
-                                        if (skb == tcp_write_queue_head(sk))
-                                                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-                                                                          inet_csk(sk)->icsk_rto,
-                                                                          TCP_RTO_MAX);
-                                }
-                                packet_cnt += tcp_skb_pcount(skb);
-                                if (packet_cnt >= tp->lost_out)
-                                        break;
-                        }
-                }
-        }
-        /* OK, demanded retransmission is finished. */
        /* Forward retransmissions are possible only during Recovery. */
        if (icsk->icsk_ca_state != TCP_CA_Recovery)
-                return;
+                return 0;
        /* No forward retransmissions in Reno are possible. */
        if (tcp_is_reno(tp))
-                return;
+                return 0;
        /* Yeah, we have to make difficult choice between forward transmission
         * and retransmission... Both ways have their merits...
@@ -2086,43 +2022,104 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
         */
        if (tcp_may_send_now(sk))
-                return;
+                return 0;
-        /* If nothing is SACKed, highest_sack in the loop won't be valid */
+        return 1;
-        if (!tp->sacked_out)
+}
-                return;
-        if (tp->forward_skb_hint)
+/* This gets called after a retransmit timeout, and the initially
-                skb = tp->forward_skb_hint;
+ * retransmitted data is acknowledged.  It tries to continue
-        else
+ * resending the rest of the retransmit queue, until either
+ * we've sent it all or the congestion window limit is reached.
+ * If doing SACK, the first ACK which comes back for a timeout
+ * based retransmit packet might feed us FACK information again.
+ * If so, we use it to avoid unnecessarily retransmissions.
+ */
+void tcp_xmit_retransmit_queue(struct sock *sk)
+{
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct sk_buff *skb;
+        struct sk_buff *hole = NULL;
+        u32 last_lost;
+        int mib_idx;
+        int fwd_rexmitting = 0;
+        if (!tp->lost_out)
+                tp->retransmit_high = tp->snd_una;
+        if (tp->retransmit_skb_hint) {
+                skb = tp->retransmit_skb_hint;
+                last_lost = TCP_SKB_CB(skb)->end_seq;
+                if (after(last_lost, tp->retransmit_high))
+                        last_lost = tp->retransmit_high;
+        } else {
                skb = tcp_write_queue_head(sk);
+                last_lost = tp->snd_una;
+        }
+        /* First pass: retransmit lost packets. */
        tcp_for_write_queue_from(skb, sk) {
-                if (skb == tcp_send_head(sk))
+                __u8 sacked = TCP_SKB_CB(skb)->sacked;
-                        break;
-                tp->forward_skb_hint = skb;
-                if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+                if (skb == tcp_send_head(sk))
                        break;
+                /* we could do better than to assign each time */
+                if (hole == NULL)
+                        tp->retransmit_skb_hint = skb;
+                /* Assume this retransmit will generate
+                 * only one packet for congestion window
+                 * calculation purposes.  This works because
+                 * tcp_retransmit_skb() will chop up the
+                 * packet to be MSS sized and all the
+                 * packet counting works out.
+                 */
                if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
-                        break;
+                        return;
+                if (fwd_rexmitting) {
+begin_fwd:
+                        if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+                                break;
+                        mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
+                } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
+                        tp->retransmit_high = last_lost;
+                        if (!tcp_can_forward_retransmit(sk))
+                                break;
+                        /* Backtrack if necessary to non-L'ed skb */
+                        if (hole != NULL) {
+                                skb = hole;
+                                hole = NULL;
+                        }
+                        fwd_rexmitting = 1;
+                        goto begin_fwd;
-                if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
+                } else if (!(sacked & TCPCB_LOST)) {
+                        if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS))
+                                hole = skb;
                        continue;
-                /* Ok, retransmit it. */
+                } else {
-                if (tcp_retransmit_skb(sk, skb)) {
+                        last_lost = TCP_SKB_CB(skb)->end_seq;
-                        tp->forward_skb_hint = NULL;
+                        if (icsk->icsk_ca_state != TCP_CA_Loss)
-                        break;
+                                mib_idx = LINUX_MIB_TCPFASTRETRANS;
+                        else
+                                mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
                }
+                if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
+                        continue;
+                if (tcp_retransmit_skb(sk, skb))
+                        return;
+                NET_INC_STATS_BH(sock_net(sk), mib_idx);
                if (skb == tcp_write_queue_head(sk))
                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                                  inet_csk(sk)->icsk_rto,
                                                  TCP_RTO_MAX);
-                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS);
        }
 }
@@ -2241,6 +2238,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
        struct sk_buff *skb;
        struct tcp_md5sig_key *md5;
        __u8 *md5_hash_location;
+        int mss;
        skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
        if (skb == NULL)
@@ -2251,13 +2249,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
        skb->dst = dst_clone(dst);
+        mss = dst_metric(dst, RTAX_ADVMSS);
+        if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
+                mss = tp->rx_opt.user_mss;
        if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
                __u8 rcv_wscale;
                /* Set this up on the first call only */
                req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
                /* tcp_full_space because it is guaranteed to be the first packet */
                tcp_select_initial_window(tcp_full_space(sk),
-                        dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+                        mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
                        &req->rcv_wnd,
                        &req->window_clamp,
                        ireq->wscale_ok,
@@ -2267,8 +2269,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
        memset(&opts, 0, sizeof(opts));
        TCP_SKB_CB(skb)->when = tcp_time_stamp;
-        tcp_header_size = tcp_synack_options(sk, req,
+        tcp_header_size = tcp_synack_options(sk, req, mss,
-                                             dst_metric(dst, RTAX_ADVMSS),
                                             skb, &opts, &md5) +
                          sizeof(struct tcphdr);
@@ -2280,7 +2281,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
        th->syn = 1;
        th->ack = 1;
        TCP_ECN_make_synack(req, th);
-        th->source = inet_sk(sk)->sport;
+        th->source = ireq->loc_port;
        th->dest = ireq->rmt_port;
        /* Setting of flags are superfluous here for callers (and ECE is
         * not even correctly set)
@@ -2342,6 +2343,9 @@ static void tcp_connect_init(struct sock *sk)
        if (!tp->window_clamp)
                tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
        tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+        if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
+                tp->advmss = tp->rx_opt.user_mss;
        tcp_initialize_rcv_mss(sk);
        tcp_select_initial_window(tcp_full_space(sk),
@@ -2360,6 +2364,7 @@ static void tcp_connect_init(struct sock *sk)
        tcp_init_wl(tp, tp->write_seq, 0);
        tp->snd_una = tp->write_seq;
        tp->snd_sml = tp->write_seq;
+        tp->snd_up = tp->write_seq;
        tp->rcv_nxt = 0;
        tp->rcv_wup = 0;
        tp->copied_seq = 0;
@@ -2569,8 +2574,7 @@ int tcp_write_wakeup(struct sock *sk)
                        tcp_event_new_data_sent(sk, skb);
                return err;
        } else {
-                if (tp->urg_mode &&
+                if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
-                    between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
                        tcp_xmit_probe_skb(sk, 1);
                return tcp_xmit_probe_skb(sk, 0);
        }
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 5ab6ba19c3ce..6b6dff1164b9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -201,7 +201,7 @@ static void tcp_delack_timer(unsigned long data)
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
                while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-                        sk->sk_backlog_rcv(sk, skb);
+                        sk_backlog_rcv(sk, skb);
                tp->ucopy.memory = 0;
        }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 57e26fa66185..eacf4cfef146 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -108,9 +108,6 @@
 *      Snmp MIB for the UDP layer
 */
-DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
-EXPORT_SYMBOL(udp_stats_in6);
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
@@ -125,14 +122,23 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
 atomic_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
-static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
+static int udp_lib_lport_inuse(struct net *net, __u16 num,
-                                        const struct hlist_head udptable[])
+                               const struct hlist_head udptable[],
+                               struct sock *sk,
+                               int (*saddr_comp)(const struct sock *sk1,
+                                                 const struct sock *sk2))
 {
-        struct sock *sk;
+        struct sock *sk2;
        struct hlist_node *node;
-        sk_for_each(sk, node, &udptable[udp_hashfn(net, num)])
+        sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)])
-                if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
+                if (net_eq(sock_net(sk2), net)                  &&
+                    sk2 != sk                                   &&
+                    sk2->sk_hash == num                         &&
+                    (!sk2->sk_reuse || !sk->sk_reuse)           &&
+                    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+                        || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+                    (*saddr_comp)(sk, sk2))
                        return 1;
        return 0;
 }
@@ -149,83 +155,37 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
                                         const struct sock *sk2 )    )
 {
        struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
-        struct hlist_node *node;
-        struct hlist_head *head;
-        struct sock *sk2;
        int    error = 1;
        struct net *net = sock_net(sk);
        write_lock_bh(&udp_hash_lock);
        if (!snum) {
-                int i, low, high, remaining;
+                int low, high, remaining;
-                unsigned rover, best, best_size_so_far;
+                unsigned rand;
+                unsigned short first;
                inet_get_local_port_range(&low, &high);
                remaining = (high - low) + 1;
-                best_size_so_far = UINT_MAX;
+                rand = net_random();
-                best = rover = net_random() % remaining + low;
+                snum = first = rand % remaining + low;
+                rand |= 1;
-                /* 1st pass: look for empty (or shortest) hash chain */
+                while (udp_lib_lport_inuse(net, snum, udptable, sk,
-                for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+                                           saddr_comp)) {
-                        int size = 0;
+                        do {
+                                snum = snum + rand;
-                        head = &udptable[udp_hashfn(net, rover)];
+                        } while (snum < low || snum > high);
-                        if (hlist_empty(head))
+                        if (snum == first)
-                                goto gotit;
+                                goto fail;
-                        sk_for_each(sk2, node, head) {
-                                if (++size >= best_size_so_far)
-                                        goto next;
-                        }
-                        best_size_so_far = size;
-                        best = rover;
-                next:
-                        /* fold back if end of range */
-                        if (++rover > high)
-                                rover = low + ((rover - low)
-                                               & (UDP_HTABLE_SIZE - 1));
-                }
-                /* 2nd pass: find hole in shortest hash chain */
-                rover = best;
-                for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
-                        if (! __udp_lib_lport_inuse(net, rover, udptable))
-                                goto gotit;
-                        rover += UDP_HTABLE_SIZE;
-                        if (rover > high)
-                                rover = low + ((rover - low)
-                                               & (UDP_HTABLE_SIZE - 1));
                }
+        } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp))
-                /* All ports in use! */
                goto fail;
-gotit:
-                snum = rover;
-        } else {
-                head = &udptable[udp_hashfn(net, snum)];
-                sk_for_each(sk2, node, head)
-                        if (sk2->sk_hash == snum                             &&
-                            sk2 != sk                                        &&
-                            net_eq(sock_net(sk2), net)                       &&
-                            (!sk2->sk_reuse        || !sk->sk_reuse)         &&
-                            (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
-                             || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-                            (*saddr_comp)(sk, sk2)                             )
-                                goto fail;
-        }
        inet_sk(sk)->num = snum;
        sk->sk_hash = snum;
        if (sk_unhashed(sk)) {
-                head = &udptable[udp_hashfn(net, snum)];
+                sk_add_node(sk, &udptable[udp_hashfn(net, snum)]);
-                sk_add_node(sk, head);
                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
        }
        error = 0;
@@ -302,6 +262,28 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
        return result;
 }
+static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
+                                                 __be16 sport, __be16 dport,
+                                                 struct hlist_head udptable[])
+{
+        struct sock *sk;
+        const struct iphdr *iph = ip_hdr(skb);
+        if (unlikely(sk = skb_steal_sock(skb)))
+                return sk;
+        else
+                return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport,
+                                         iph->daddr, dport, inet_iif(skb),
+                                         udptable);
+}
+struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+                             __be32 daddr, __be16 dport, int dif)
+{
+        return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash);
+}
+EXPORT_SYMBOL_GPL(udp4_lib_lookup);
 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
                                             __be16 loc_port, __be32 loc_addr,
                                             __be16 rmt_port, __be32 rmt_addr,
@@ -1201,8 +1183,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
                return __udp4_lib_mcast_deliver(net, skb, uh,
                                saddr, daddr, udptable);
-        sk = __udp4_lib_lookup(net, saddr, uh->source, daddr,
+        sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
-                        uh->dest, inet_iif(skb), udptable);
        if (sk != NULL) {
                int ret = udp_queue_rcv_skb(sk, skb);