363 files changed, 27251 insertions, 7889 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index 640d34e026c2..282c4ab1abe6 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -87,7 +87,7 @@ static int fc_rebuild_header(struct sk_buff *skb)
        struct fch_hdr *fch=(struct fch_hdr *)skb->data;
        struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr));
        if(fcllc->ethertype != htons(ETH_P_IP)) {
-                printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(fcllc->ethertype));
+                printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype));
                return 0;
        }
 #ifdef CONFIG_INET
diff --git a/net/802/fddi.c b/net/802/fddi.c
index ebcf4830d6f1..ac242a4bc346 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -108,8 +108,8 @@ static int fddi_rebuild_header(struct sk_buff	*skb)
        else
 #endif  
        {
-                printk("%s: Don't know how to resolve type %02X addresses.\n",
+                printk("%s: Don't know how to resolve type %04X addresses.\n",
-                       skb->dev->name, htons(fddi->hdr.llc_snap.ethertype));
+                       skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
                return(0);
        }
 }
@@ -122,10 +122,10 @@ static int fddi_rebuild_header(struct sk_buff	*skb)
 * the proper pointer to the start of packet data (skb->data).
 */
 
-unsigned short fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
+__be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
        struct fddihdr *fddi = (struct fddihdr *)skb->data;
-        unsigned short type;
+        __be16 type;
        
        /*
         * Set mac.raw field to point to FC byte, set data field to point
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 051e8af56a77..6d7fed3dd99a 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -51,6 +51,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
                        unsigned len)
 {
        struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN);
+        struct hippi_cb *hcb = (struct hippi_cb *) skb->cb;
        if (!len){
                len = skb->len - HIPPI_HLEN;
@@ -84,9 +85,10 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
        if (daddr)
        {
                memcpy(hip->le.dest_switch_addr, daddr + 3, 3);
-                memcpy(&skb->private.ifield, daddr + 2, 4);
+                memcpy(&hcb->ifield, daddr + 2, 4);
                return HIPPI_HLEN;
        }
+        hcb->ifield = 0;
        return -((int)HIPPI_HLEN);
 }
@@ -122,7 +124,7 @@ static int hippi_rebuild_header(struct sk_buff *skb)
 *      Determine the packet's protocol ID.
 */
 
-unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
+__be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
        struct hippi_hdr *hip;
        
diff --git a/net/802/p8022.c b/net/802/p8022.c
index 5ae63416df6d..b24817c63ca8 100644
--- a/net/802/p8022.c
+++ b/net/802/p8022.c
@@ -35,7 +35,8 @@ static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb,
 struct datalink_proto *register_8022_client(unsigned char type,
                                            int (*func)(struct sk_buff *skb,
                                                        struct net_device *dev,
-                                                        struct packet_type *pt))
+                                                        struct packet_type *pt,
+                                                        struct net_device *orig_dev))
 {
        struct datalink_proto *proto;
diff --git a/net/802/p8023.c b/net/802/p8023.c
index a0b61b40225f..6368d3dce444 100644
--- a/net/802/p8023.c
+++ b/net/802/p8023.c
@@ -20,6 +20,7 @@
 #include <linux/skbuff.h>
 #include <net/datalink.h>
+#include <net/p8022.h>
 /*
 *      Place an 802.3 header on a packet. The driver will do the mac
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 1053821ddf93..ab80b1fab53c 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -47,7 +47,7 @@ static struct datalink_proto *find_snap_client(unsigned char *desc)
 *      A SNAP packet has arrived
 */
 static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
-                    struct packet_type *pt)
+                    struct packet_type *pt, struct net_device *orig_dev)
 {
        int rc = 1;
        struct datalink_proto *proto;
@@ -61,7 +61,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
                /* Pass the frame on. */
                skb->h.raw  += 5;
                skb_pull(skb, 5);
-                rc = proto->rcvfunc(skb, dev, &snap_packet_type);
+                rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev);
        } else {
                skb->sk = NULL;
                kfree_skb(skb);
@@ -118,7 +118,8 @@ module_exit(snap_exit);
 struct datalink_proto *register_snap_client(unsigned char *desc,
                                            int (*rcvfunc)(struct sk_buff *,
                                                           struct net_device *,
-                                                           struct packet_type *))
+                                                           struct packet_type *,
+                                                           struct net_device *))
 {
        struct datalink_proto *proto = NULL;
diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c
index 36079630c49f..700129556c13 100644
--- a/net/802/sysctl_net_802.c
+++ b/net/802/sysctl_net_802.c
@@ -10,9 +10,10 @@
 *              2 of the License, or (at your option) any later version.
 */
+#include <linux/config.h>
 #include <linux/mm.h>
+#include <linux/if_tr.h>
 #include <linux/sysctl.h>
-#include <linux/config.h>
 #ifdef CONFIG_TR
 extern int sysctl_tr_rif_timeout;
diff --git a/net/802/tr.c b/net/802/tr.c
index a755e880f4ba..1bb7dc1b85cd 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -251,10 +251,11 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device *
        unsigned int hash;
        struct rif_cache *entry;
        unsigned char *olddata;
+        unsigned long flags;
        static const unsigned char mcast_func_addr[] 
                = {0xC0,0x00,0x00,0x04,0x00,0x00};
        
-        spin_lock_bh(&rif_lock);
+        spin_lock_irqsave(&rif_lock, flags);
        /*
         *      Broadcasts are single route as stated in RFC 1042 
@@ -323,7 +324,7 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0],
        else 
                slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8);
        olddata = skb->data;
-        spin_unlock_bh(&rif_lock);
+        spin_unlock_irqrestore(&rif_lock, flags);
        skb_pull(skb, slack);
        memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack);
@@ -337,10 +338,11 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0],
 static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
 {
        unsigned int hash, rii_p = 0;
+        unsigned long flags;
        struct rif_cache *entry;
-        spin_lock_bh(&rif_lock);
+        spin_lock_irqsave(&rif_lock, flags);
        
        /*
         *      Firstly see if the entry exists
@@ -378,7 +380,7 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
                if(!entry) 
                {
                        printk(KERN_DEBUG "tr.c: Couldn't malloc rif cache entry !\n");
-                        spin_unlock_bh(&rif_lock);
+                        spin_unlock_irqrestore(&rif_lock, flags);
                        return;
                }
@@ -420,7 +422,7 @@ printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
                    }                                         
                entry->last_used=jiffies;               
        }
-        spin_unlock_bh(&rif_lock);
+        spin_unlock_irqrestore(&rif_lock, flags);
 }
 /*
@@ -430,9 +432,9 @@ printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
 static void rif_check_expire(unsigned long dummy) 
 {
        int i;
-        unsigned long next_interval = jiffies + sysctl_tr_rif_timeout/2;
+        unsigned long flags, next_interval = jiffies + sysctl_tr_rif_timeout/2;
-        spin_lock_bh(&rif_lock);
+        spin_lock_irqsave(&rif_lock, flags);
        
        for(i =0; i < RIF_TABLE_SIZE; i++) {
                struct rif_cache *entry, **pentry;
@@ -454,7 +456,7 @@ static void rif_check_expire(unsigned long dummy)
                }
        }
        
-        spin_unlock_bh(&rif_lock);
+        spin_unlock_irqrestore(&rif_lock, flags);
        mod_timer(&rif_timer, next_interval);
@@ -485,7 +487,7 @@ static struct rif_cache *rif_get_idx(loff_t pos)
 static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
 {
-        spin_lock_bh(&rif_lock);
+        spin_lock_irq(&rif_lock);
        return *pos ? rif_get_idx(*pos - 1) : SEQ_START_TOKEN;
 }
@@ -516,7 +518,7 @@ static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void rif_seq_stop(struct seq_file *seq, void *v)
 {
-        spin_unlock_bh(&rif_lock);
+        spin_unlock_irq(&rif_lock);
 }
 static int rif_seq_show(struct seq_file *seq, void *v)
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
new file mode 100644
index 000000000000..c4a382e450e2
--- /dev/null
+++ b/net/8021q/Kconfig
@@ -0,0 +1,19 @@
+#
+# Configuration for 802.1Q VLAN support
+#
+config VLAN_8021Q
+        tristate "802.1Q VLAN Support"
+        ---help---
+          Select this and you will be able to create 802.1Q VLAN interfaces
+          on your ethernet interfaces.  802.1Q VLAN supports almost
+          everything a regular ethernet interface does, including
+          firewalling, bridging, and of course IP traffic.  You will need
+          the 'vconfig' tool from the VLAN project in order to effectively
+          use VLANs.  See the VLAN web page for more information:
+          <http://www.candelatech.com/~greear/vlan.html>
+          To compile this code as a module, choose M here: the module
+          will be called 8021q.
+          If unsure, say N.
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 1f6d31670bc7..91e412b0ab00 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -578,6 +578,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
                        if (!vlandev)
                                continue;
+                        if (netif_carrier_ok(dev)) {
+                                if (!netif_carrier_ok(vlandev))
+                                        netif_carrier_on(vlandev);
+                        } else {
+                                if (netif_carrier_ok(vlandev))
+                                        netif_carrier_off(vlandev);
+                        }
                        if ((vlandev->state & VLAN_LINK_STATE_MASK) != flgs) {
                                vlandev->state = (vlandev->state &~ VLAN_LINK_STATE_MASK) 
                                        | flgs;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 508b1fa14546..9ae3a14dd016 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -51,7 +51,7 @@ struct net_device *__find_vlan_dev(struct net_device* real_dev,
 /* found in vlan_dev.c */
 int vlan_dev_rebuild_header(struct sk_buff *skb);
 int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-                  struct packet_type* ptype);
+                  struct packet_type *ptype, struct net_device *orig_dev);
 int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
                         unsigned short type, void *daddr, void *saddr,
                         unsigned len);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 49c487413518..145f5cde96cf 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -113,7 +113,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
 *
 */
 int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
-                  struct packet_type* ptype)
+                  struct packet_type* ptype, struct net_device *orig_dev)
 {
        unsigned char *rawp = NULL;
        struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data);
diff --git a/net/Kconfig b/net/Kconfig
index 9251b28e8d5d..2bdd5623fdd5 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -2,7 +2,7 @@
 # Network configuration
 #
-menu "Networking support"
+menu "Networking"
 config NET
        bool "Networking support"
@@ -10,7 +10,9 @@ config NET
          Unless you really know what you are doing, you should say Y here.
          The reason is that some programs need kernel networking support even
          when running on a stand-alone machine that isn't connected to any
-          other computer. If you are upgrading from an older kernel, you
+          other computer.
+          
+          If you are upgrading from an older kernel, you
          should consider updating your networking tools too because changes
          in the kernel and the tools often go hand in hand. The tools are
          contained in the package net-tools, the location and version number
@@ -20,57 +22,14 @@ config NET
          recommended to read the NET-HOWTO, available from
          <http://www.tldp.org/docs.html#howto>.
-menu "Networking options"
+# Make sure that all config symbols are dependent on NET
-        depends on NET
+if NET
-config PACKET
-        tristate "Packet socket"
-        ---help---
-          The Packet protocol is used by applications which communicate
-          directly with network devices without an intermediate network
-          protocol implemented in the kernel, e.g. tcpdump.  If you want them
-          to work, choose Y.
-          To compile this driver as a module, choose M here: the module will
+menu "Networking options"
-          be called af_packet.
-          If unsure, say Y.
-config PACKET_MMAP
-        bool "Packet socket: mmapped IO"
-        depends on PACKET
-        help
-          If you say Y here, the Packet protocol driver will use an IO
-          mechanism that results in faster communication.
-          If unsure, say N.
-config UNIX
-        tristate "Unix domain sockets"
-        ---help---
-          If you say Y here, you will include support for Unix domain sockets;
-          sockets are the standard Unix mechanism for establishing and
-          accessing network connections.  Many commonly used programs such as
-          the X Window system and syslog use these sockets even if your
-          machine is not connected to any network.  Unless you are working on
-          an embedded system or something similar, you therefore definitely
-          want to say Y here.
-          To compile this driver as a module, choose M here: the module will be
-          called unix.  Note that several important services won't work
-          correctly if you say M here and then neglect to load the module.
-          Say Y unless you know what you are doing.
-config NET_KEY
-        tristate "PF_KEY sockets"
-        select XFRM
-        ---help---
-          PF_KEYv2 socket family, compatible to KAME ones.
-          They are required if you are going to use IPsec tools ported
-          from KAME.
-          Say Y unless you know what you are doing.
+source "net/packet/Kconfig"
+source "net/unix/Kconfig"
+source "net/xfrm/Kconfig"
 config INET
        bool "TCP/IP networking"
@@ -94,30 +53,12 @@ config INET
          Short answer: say Y.
+if INET
 source "net/ipv4/Kconfig"
-#   IPv6 as module will cause a CRASH if you try to unload it
-config IPV6
-        tristate "The IPv6 protocol"
-        depends on INET
-        default m
-        select CRYPTO if IPV6_PRIVACY
-        select CRYPTO_MD5 if IPV6_PRIVACY
-        ---help---
-          This is complemental support for the IP version 6.
-          You will still be able to do traditional IPv4 networking as well.
-          For general information about IPv6, see
-          <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
-          For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
-          For specific information about IPv6 under Linux, read the HOWTO at
-          <http://www.bieringer.de/linux/IPv6/>.
-          To compile this protocol support as a module, choose M here: the 
-          module will be called ipv6.
 source "net/ipv6/Kconfig"
+endif # if INET
 menuconfig NETFILTER
        bool "Network packet filtering (replaces ipchains)"
        ---help---
@@ -206,269 +147,17 @@ source "net/bridge/netfilter/Kconfig"
 endif
-config XFRM
+source "net/dccp/Kconfig"
-       bool
-       depends on NET
-source "net/xfrm/Kconfig"
 source "net/sctp/Kconfig"
+source "net/atm/Kconfig"
-config ATM
+source "net/bridge/Kconfig"
-        tristate "Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)"
+source "net/8021q/Kconfig"
-        depends on EXPERIMENTAL
-        ---help---
-          ATM is a high-speed networking technology for Local Area Networks
-          and Wide Area Networks.  It uses a fixed packet size and is
-          connection oriented, allowing for the negotiation of minimum
-          bandwidth requirements.
-          In order to participate in an ATM network, your Linux box needs an
-          ATM networking card. If you have that, say Y here and to the driver
-          of your ATM card below.
-          Note that you need a set of user-space programs to actually make use
-          of ATM.  See the file <file:Documentation/networking/atm.txt> for
-          further details.
-config ATM_CLIP
-        tristate "Classical IP over ATM (EXPERIMENTAL)"
-        depends on ATM && INET
-        help
-          Classical IP over ATM for PVCs and SVCs, supporting InARP and
-          ATMARP. If you want to communication with other IP hosts on your ATM
-          network, you will typically either say Y here or to "LAN Emulation
-          (LANE)" below.
-config ATM_CLIP_NO_ICMP
-        bool "Do NOT send ICMP if no neighbour (EXPERIMENTAL)"
-        depends on ATM_CLIP
-        help
-          Normally, an "ICMP host unreachable" message is sent if a neighbour
-          cannot be reached because there is no VC to it in the kernel's
-          ATMARP table. This may cause problems when ATMARP table entries are
-          briefly removed during revalidation. If you say Y here, packets to
-          such neighbours are silently discarded instead.
-config ATM_LANE
-        tristate "LAN Emulation (LANE) support (EXPERIMENTAL)"
-        depends on ATM
-        help
-          LAN Emulation emulates services of existing LANs across an ATM
-          network. Besides operating as a normal ATM end station client, Linux
-          LANE client can also act as an proxy client bridging packets between
-          ELAN and Ethernet segments. You need LANE if you want to try MPOA.
-config ATM_MPOA
-        tristate "Multi-Protocol Over ATM (MPOA) support (EXPERIMENTAL)"
-        depends on ATM && INET && ATM_LANE!=n
-        help
-          Multi-Protocol Over ATM allows ATM edge devices such as routers,
-          bridges and ATM attached hosts establish direct ATM VCs across
-          subnetwork boundaries. These shortcut connections bypass routers
-          enhancing overall network performance.
-config ATM_BR2684
-        tristate "RFC1483/2684 Bridged protocols"
-        depends on ATM && INET
-        help
-          ATM PVCs can carry ethernet PDUs according to rfc2684 (formerly 1483)
-          This device will act like an ethernet from the kernels point of view,
-          with the traffic being carried by ATM PVCs (currently 1 PVC/device).
-          This is sometimes used over DSL lines.  If in doubt, say N.
-config ATM_BR2684_IPFILTER
-        bool "Per-VC IP filter kludge"
-        depends on ATM_BR2684
-        help
-          This is an experimental mechanism for users who need to terminating a
-          large number of IP-only vcc's.  Do not enable this unless you are sure
-          you know what you are doing.
-config BRIDGE
-        tristate "802.1d Ethernet Bridging"
-        ---help---
-          If you say Y here, then your Linux box will be able to act as an
-          Ethernet bridge, which means that the different Ethernet segments it
-          is connected to will appear as one Ethernet to the participants.
-          Several such bridges can work together to create even larger
-          networks of Ethernets using the IEEE 802.1 spanning tree algorithm.
-          As this is a standard, Linux bridges will cooperate properly with
-          other third party bridge products.
-          In order to use the Ethernet bridge, you'll need the bridge
-          configuration tools; see <file:Documentation/networking/bridge.txt>
-          for location. Please read the Bridge mini-HOWTO for more
-          information.
-          If you enable iptables support along with the bridge support then you
-          turn your bridge into a bridging IP firewall.
-          iptables will then see the IP packets being bridged, so you need to
-          take this into account when setting up your firewall rules.
-          Enabling arptables support when bridging will let arptables see
-          bridged ARP traffic in the arptables FORWARD chain.
-          To compile this code as a module, choose M here: the module
-          will be called bridge.
-          If unsure, say N.
-config VLAN_8021Q
-        tristate "802.1Q VLAN Support"
-        ---help---
-          Select this and you will be able to create 802.1Q VLAN interfaces
-          on your ethernet interfaces.  802.1Q VLAN supports almost
-          everything a regular ethernet interface does, including
-          firewalling, bridging, and of course IP traffic.  You will need
-          the 'vconfig' tool from the VLAN project in order to effectively
-          use VLANs.  See the VLAN web page for more information:
-          <http://www.candelatech.com/~greear/vlan.html>
-          To compile this code as a module, choose M here: the module
-          will be called 8021q.
-          If unsure, say N.
-config DECNET
-        tristate "DECnet Support"
-        ---help---
-          The DECnet networking protocol was used in many products made by
-          Digital (now Compaq).  It provides reliable stream and sequenced
-          packet communications over which run a variety of services similar
-          to those which run over TCP/IP.
-          To find some tools to use with the kernel layer support, please
-          look at Patrick Caulfield's web site:
-          <http://linux-decnet.sourceforge.net/>.
-          More detailed documentation is available in
-          <file:Documentation/networking/decnet.txt>.
-          Be sure to say Y to "/proc file system support" and "Sysctl support"
-          below when using DECnet, since you will need sysctl support to aid
-          in configuration at run time.
-          The DECnet code is also available as a module ( = code which can be
-          inserted in and removed from the running kernel whenever you want).
-          The module is called decnet.
 source "net/decnet/Kconfig"
 source "net/llc/Kconfig"
-config IPX
-        tristate "The IPX protocol"
-        select LLC
-        ---help---
-          This is support for the Novell networking protocol, IPX, commonly
-          used for local networks of Windows machines.  You need it if you
-          want to access Novell NetWare file or print servers using the Linux
-          Novell client ncpfs (available from
-          <ftp://platan.vc.cvut.cz/pub/linux/ncpfs/>) or from
-          within the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO,
-          available from <http://www.tldp.org/docs.html#howto>).  In order
-          to do the former, you'll also have to say Y to "NCP file system
-          support", below.
-          IPX is similar in scope to IP, while SPX, which runs on top of IPX,
-          is similar to TCP. There is also experimental support for SPX in
-          Linux (see "SPX networking", below).
-          To turn your Linux box into a fully featured NetWare file server and
-          IPX router, say Y here and fetch either lwared from
-          <ftp://ibiblio.org/pub/Linux/system/network/daemons/> or
-          mars_nwe from <ftp://www.compu-art.de/mars_nwe/>. For more
-          information, read the IPX-HOWTO available from
-          <http://www.tldp.org/docs.html#howto>.
-          General information about how to connect Linux, Windows machines and
-          Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
-          The IPX driver would enlarge your kernel by about 16 KB. To compile
-          this driver as a module, choose M here: the module will be called ipx.
-          Unless you want to integrate your Linux box with a local Novell
-          network, say N.
 source "net/ipx/Kconfig"
-config ATALK
-        tristate "Appletalk protocol support"
-        select LLC
-        ---help---
-          AppleTalk is the protocol that Apple computers can use to communicate
-          on a network.  If your Linux box is connected to such a network and you
-          wish to connect to it, say Y.  You will need to use the netatalk package
-          so that your Linux box can act as a print and file server for Macs as
-          well as access AppleTalk printers.  Check out
-          <http://www.zettabyte.net/netatalk/> on the WWW for details.
-          EtherTalk is the name used for AppleTalk over Ethernet and the
-          cheaper and slower LocalTalk is AppleTalk over a proprietary Apple
-          network using serial links.  EtherTalk and LocalTalk are fully
-          supported by Linux.
-          General information about how to connect Linux, Windows machines and
-          Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.  The
-          NET-3-HOWTO, available from
-          <http://www.tldp.org/docs.html#howto>, contains valuable
-          information as well.
-          To compile this driver as a module, choose M here: the module will be
-          called appletalk. You almost certainly want to compile it as a
-          module so you can restart your AppleTalk stack without rebooting
-          your machine. I hear that the GNU boycott of Apple is over, so
-          even politically correct people are allowed to say Y here.
 source "drivers/net/appletalk/Kconfig"
+source "net/x25/Kconfig"
-config X25
+source "net/lapb/Kconfig"
-        tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
-        depends on EXPERIMENTAL
-        ---help---
-          X.25 is a set of standardized network protocols, similar in scope to
-          frame relay; the one physical line from your box to the X.25 network
-          entry point can carry several logical point-to-point connections
-          (called "virtual circuits") to other computers connected to the X.25
-          network. Governments, banks, and other organizations tend to use it
-          to connect to each other or to form Wide Area Networks (WANs). Many
-          countries have public X.25 networks. X.25 consists of two
-          protocols: the higher level Packet Layer Protocol (PLP) (say Y here
-          if you want that) and the lower level data link layer protocol LAPB
-          (say Y to "LAPB Data Link Driver" below if you want that).
-          You can read more about X.25 at <http://www.sangoma.com/x25.htm> and
-          <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>.
-          Information about X.25 for Linux is contained in the files
-          <file:Documentation/networking/x25.txt> and
-          <file:Documentation/networking/x25-iface.txt>.
-          One connects to an X.25 network either with a dedicated network card
-          using the X.21 protocol (not yet supported by Linux) or one can do
-          X.25 over a standard telephone line using an ordinary modem (say Y
-          to "X.25 async driver" below) or over Ethernet using an ordinary
-          Ethernet card and the LAPB over Ethernet (say Y to "LAPB Data Link
-          Driver" and "LAPB over Ethernet driver" below).
-          To compile this driver as a module, choose M here: the module
-          will be called x25. If unsure, say N.
-config LAPB
-        tristate "LAPB Data Link Driver (EXPERIMENTAL)"
-        depends on EXPERIMENTAL
-        ---help---
-          Link Access Procedure, Balanced (LAPB) is the data link layer (i.e.
-          the lower) part of the X.25 protocol. It offers a reliable
-          connection service to exchange data frames with one other host, and
-          it is used to transport higher level protocols (mostly X.25 Packet
-          Layer, the higher part of X.25, but others are possible as well).
-          Usually, LAPB is used with specialized X.21 network cards, but Linux
-          currently supports LAPB only over Ethernet connections. If you want
-          to use LAPB connections over Ethernet, say Y here and to "LAPB over
-          Ethernet driver" below. Read
-          <file:Documentation/networking/lapb-module.txt> for technical
-          details.
-          To compile this driver as a module, choose M here: the
-          module will be called lapb.  If unsure, say N.
 config NET_DIVERT
        bool "Frame Diverter (EXPERIMENTAL)"
@@ -496,107 +185,10 @@ config NET_DIVERT
          If unsure, say N.
-config ECONET
+source "net/econet/Kconfig"
-        tristate "Acorn Econet/AUN protocols (EXPERIMENTAL)"
+source "net/wanrouter/Kconfig"
-        depends on EXPERIMENTAL && INET
-        ---help---
-          Econet is a fairly old and slow networking protocol mainly used by
-          Acorn computers to access file and print servers. It uses native
-          Econet network cards. AUN is an implementation of the higher level
-          parts of Econet that runs over ordinary Ethernet connections, on
-          top of the UDP packet protocol, which in turn runs on top of the
-          Internet protocol IP.
-          If you say Y here, you can choose with the next two options whether
-          to send Econet/AUN traffic over a UDP Ethernet connection or over
-          a native Econet network card.
-          To compile this driver as a module, choose M here: the module
-          will be called econet.
-config ECONET_AUNUDP
-        bool "AUN over UDP"
-        depends on ECONET
-        help
-          Say Y here if you want to send Econet/AUN traffic over a UDP
-          connection (UDP is a packet based protocol that runs on top of the
-          Internet protocol IP) using an ordinary Ethernet network card.
-config ECONET_NATIVE
-        bool "Native Econet"
-        depends on ECONET
-        help
-          Say Y here if you have a native Econet network card installed in
-          your computer.
-config WAN_ROUTER
-        tristate "WAN router"
-        depends on EXPERIMENTAL
-        ---help---
-          Wide Area Networks (WANs), such as X.25, frame relay and leased
-          lines, are used to interconnect Local Area Networks (LANs) over vast
-          distances with data transfer rates significantly higher than those
-          achievable with commonly used asynchronous modem connections.
-          Usually, a quite expensive external device called a `WAN router' is
-          needed to connect to a WAN.
-          As an alternative, WAN routing can be built into the Linux kernel.
-          With relatively inexpensive WAN interface cards available on the
-          market, a perfectly usable router can be built for less than half
-          the price of an external router.  If you have one of those cards and
-          wish to use your Linux box as a WAN router, say Y here and also to
-          the WAN driver for your card, below.  You will then need the
-          wan-tools package which is available from <ftp://ftp.sangoma.com/>.
-          Read <file:Documentation/networking/wan-router.txt> for more
-          information.
-          To compile WAN routing support as a module, choose M here: the
-          module will be called wanrouter.
-          If unsure, say N.
-menu "QoS and/or fair queueing"
-config NET_SCHED
-        bool "QoS and/or fair queueing"
-        ---help---
-          When the kernel has several packets to send out over a network
-          device, it has to decide which ones to send first, which ones to
-          delay, and which ones to drop. This is the job of the packet
-          scheduler, and several different algorithms for how to do this
-          "fairly" have been proposed.
-          If you say N here, you will get the standard packet scheduler, which
-          is a FIFO (first come, first served). If you say Y here, you will be
-          able to choose from among several alternative algorithms which can
-          then be attached to different network devices. This is useful for
-          example if some of your network devices are real time devices that
-          need a certain minimum data flow rate, or if you need to limit the
-          maximum data flow rate for traffic which matches specified criteria.
-          This code is considered to be experimental.
-          To administer these schedulers, you'll need the user-level utilities
-          from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
-          That package also contains some documentation; for more, check out
-          <http://snafu.freedom.org/linux2.2/iproute-notes.html>.
-          This Quality of Service (QoS) support will enable you to use
-          Differentiated Services (diffserv) and Resource Reservation Protocol
-          (RSVP) on your Linux router if you also say Y to "QoS support",
-          "Packet classifier API" and to some classifiers below. Documentation
-          and software is at <http://diffserv.sourceforge.net/>.
-          If you say Y here and to "/proc file system" below, you will be able
-          to read status information about packet schedulers from the file
-          /proc/net/psched.
-          The available schedulers are listed in the following questions; you
-          can say Y to as many as you like. If unsure, say N now.
 source "net/sched/Kconfig"
-endmenu
 menu "Network testing"
 config NET_PKTGEN
@@ -614,33 +206,17 @@ config NET_PKTGEN
          To compile this code as a module, choose M here: the
          module will be called pktgen.
-endmenu
+source "net/netfilter/Kconfig"
 endmenu
-config NETPOLL
+endmenu
-        def_bool NETCONSOLE
-config NETPOLL_RX
-        bool "Netpoll support for trapping incoming packets"
-        default n
-        depends on NETPOLL
-config NETPOLL_TRAP
-        bool "Netpoll traffic trapping"
-        default n
-        depends on NETPOLL
-config NET_POLL_CONTROLLER
-        def_bool NETPOLL
 source "net/ax25/Kconfig"
 source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
+source "net/ieee80211/Kconfig"
-source "drivers/net/Kconfig"
+endif   # if NET
+endmenu # Networking
-endmenu
diff --git a/net/Makefile b/net/Makefile
index 8e2bdc025ab8..4aa2f46d2a56 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_NET)		+= $(tmp-y)
 obj-$(CONFIG_LLC)               += llc/
 obj-$(CONFIG_NET)               += ethernet/ 802/ sched/ netlink/
 obj-$(CONFIG_INET)              += ipv4/
+obj-$(CONFIG_NETFILTER)         += netfilter/
 obj-$(CONFIG_XFRM)              += xfrm/
 obj-$(CONFIG_UNIX)              += unix/
 ifneq ($(CONFIG_IPV6),)
@@ -41,7 +42,9 @@ obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_DECNET)            += decnet/
 obj-$(CONFIG_ECONET)            += econet/
 obj-$(CONFIG_VLAN_8021Q)        += 8021q/
+obj-$(CONFIG_IP_DCCP)           += dccp/
 obj-$(CONFIG_IP_SCTP)           += sctp/
+obj-$(CONFIG_IEEE80211)         += ieee80211/
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)            += sysctl_net.o
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index c34614ea5fce..7076097debc2 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -698,7 +698,7 @@ static void __aarp_resolved(struct aarp_entry **list, struct aarp_entry *a,
 *      frame. We currently only support Ethernet.
 */
 static int aarp_rcv(struct sk_buff *skb, struct net_device *dev,
-                    struct packet_type *pt)
+                    struct packet_type *pt, struct net_device *orig_dev)
 {
        struct elapaarp *ea = aarp_hdr(skb);
        int hash, ret = 0;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 192b529f86a4..1d31b3a3f1e5 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -53,12 +53,12 @@
 #include <linux/config.h>
 #include <linux/module.h>
-#include <linux/tcp.h>
 #include <linux/if_arp.h>
 #include <linux/termios.h>      /* For TIOCOUTQ/INQ */
 #include <net/datalink.h>
 #include <net/psnap.h>
 #include <net/sock.h>
+#include <net/tcp_states.h>
 #include <net/route.h>
 #include <linux/atalk.h>
@@ -1390,7 +1390,7 @@ free_it:
 *      [ie ARPHRD_ETHERTALK]
 */
 static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
-                     struct packet_type *pt)
+                     struct packet_type *pt, struct net_device *orig_dev)
 {
        struct ddpehdr *ddp;
        struct sock *sock;
@@ -1482,7 +1482,7 @@ freeit:
 * header and append a long one.
 */
 static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
-                        struct packet_type *pt)
+                     struct packet_type *pt, struct net_device *orig_dev)
 {
        /* Expand any short form frames */
        if (skb->mac.raw[2] == 1) {
@@ -1528,7 +1528,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
        }
        skb->h.raw = skb->data;
-        return atalk_rcv(skb, dev, pt);
+        return atalk_rcv(skb, dev, pt, orig_dev);
 freeit:
        kfree_skb(skb);
        return 0;
diff --git a/net/atm/Kconfig b/net/atm/Kconfig
new file mode 100644
index 000000000000..21ff276b2d80
--- /dev/null
+++ b/net/atm/Kconfig
@@ -0,0 +1,74 @@
+#
+# Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)
+#
+config ATM
+        tristate "Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)"
+        depends on EXPERIMENTAL
+        ---help---
+          ATM is a high-speed networking technology for Local Area Networks
+          and Wide Area Networks.  It uses a fixed packet size and is
+          connection oriented, allowing for the negotiation of minimum
+          bandwidth requirements.
+          In order to participate in an ATM network, your Linux box needs an
+          ATM networking card. If you have that, say Y here and to the driver
+          of your ATM card below.
+          Note that you need a set of user-space programs to actually make use
+          of ATM.  See the file <file:Documentation/networking/atm.txt> for
+          further details.
+config ATM_CLIP
+        tristate "Classical IP over ATM (EXPERIMENTAL)"
+        depends on ATM && INET
+        help
+          Classical IP over ATM for PVCs and SVCs, supporting InARP and
+          ATMARP. If you want to communication with other IP hosts on your ATM
+          network, you will typically either say Y here or to "LAN Emulation
+          (LANE)" below.
+config ATM_CLIP_NO_ICMP
+        bool "Do NOT send ICMP if no neighbour (EXPERIMENTAL)"
+        depends on ATM_CLIP
+        help
+          Normally, an "ICMP host unreachable" message is sent if a neighbour
+          cannot be reached because there is no VC to it in the kernel's
+          ATMARP table. This may cause problems when ATMARP table entries are
+          briefly removed during revalidation. If you say Y here, packets to
+          such neighbours are silently discarded instead.
+config ATM_LANE
+        tristate "LAN Emulation (LANE) support (EXPERIMENTAL)"
+        depends on ATM
+        help
+          LAN Emulation emulates services of existing LANs across an ATM
+          network. Besides operating as a normal ATM end station client, Linux
+          LANE client can also act as an proxy client bridging packets between
+          ELAN and Ethernet segments. You need LANE if you want to try MPOA.
+config ATM_MPOA
+        tristate "Multi-Protocol Over ATM (MPOA) support (EXPERIMENTAL)"
+        depends on ATM && INET && ATM_LANE!=n
+        help
+          Multi-Protocol Over ATM allows ATM edge devices such as routers,
+          bridges and ATM attached hosts establish direct ATM VCs across
+          subnetwork boundaries. These shortcut connections bypass routers
+          enhancing overall network performance.
+config ATM_BR2684
+        tristate "RFC1483/2684 Bridged protocols"
+        depends on ATM && INET
+        help
+          ATM PVCs can carry ethernet PDUs according to RFC2684 (formerly 1483)
+          This device will act like an ethernet from the kernels point of view,
+          with the traffic being carried by ATM PVCs (currently 1 PVC/device).
+          This is sometimes used over DSL lines.  If in doubt, say N.
+config ATM_BR2684_IPFILTER
+        bool "Per-VC IP filter kludge"
+        depends on ATM_BR2684
+        help
+          This is an experimental mechanism for users who need to terminate a
+          large number of IP-only vcc's.  Do not enable this unless you are sure
+          you know what you are doing.
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index e6954cf1459d..289956c4dd3e 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -289,8 +289,7 @@ xmit will add the additional header part in that case */
 * This is similar to eth_type_trans, which cannot be used because of
 * our dev->hard_header_len
 */
-static inline unsigned short br_type_trans(struct sk_buff *skb,
+static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev)
-                                               struct net_device *dev)
 {
        struct ethhdr *eth;
        unsigned char *rawp;
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 4dbb5af34a5e..d89056ec44d4 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -21,6 +21,7 @@
 #include "resources.h"
 #include "signaling.h"          /* for WAITING and sigd_attach */
+#include "common.h"
 static DECLARE_MUTEX(ioctl_mutex);
diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c
index 181a3002d8ad..4b1faca5013f 100644
--- a/net/atm/ipcommon.c
+++ b/net/atm/ipcommon.c
@@ -34,7 +34,6 @@
 void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to)
 {
-        struct sk_buff *skb;
        unsigned long flags;
        struct sk_buff *skb_from = (struct sk_buff *) from;
        struct sk_buff *skb_to = (struct sk_buff *) to;
@@ -47,8 +46,6 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to)
        prev->next = skb_to;
        to->prev->next = from->next;
        to->prev = from->prev;
-        for (skb = from->next; skb != skb_to; skb = skb->next)
-                skb->list = to;
        to->qlen += from->qlen;
        spin_unlock(&to->lock);
        from->prev = skb_from;
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 02f5374a51f2..08e46052a3e4 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -118,10 +118,6 @@ static int svc_bind(struct socket *sock,struct sockaddr *sockaddr,
                goto out;
        }
        vcc = ATM_SD(sock);
-        if (test_bit(ATM_VF_SESSION, &vcc->flags)) {
-                error = -EINVAL;
-                goto out;
-        }
        addr = (struct sockaddr_atmsvc *) sockaddr;
        if (addr->sas_family != AF_ATMSVC) {
                error = -EAFNOSUPPORT;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 707097deac3d..ea43dfb774e2 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -45,7 +45,7 @@
 #include <linux/sysctl.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <net/ip.h>
 #include <net/arp.h>
@@ -875,12 +875,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
        sk->sk_sndbuf   = osk->sk_sndbuf;
        sk->sk_state    = TCP_ESTABLISHED;
        sk->sk_sleep    = osk->sk_sleep;
+        sock_copy_flags(sk, osk);
-        if (sock_flag(osk, SOCK_DBG))
-                sock_set_flag(sk, SOCK_DBG);
-        if (sock_flag(osk, SOCK_ZAPPED))
-                sock_set_flag(sk, SOCK_ZAPPED);
        oax25 = ax25_sk(osk);
@@ -1007,7 +1002,8 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct sock *sk = sock->sk;
        struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr;
        ax25_dev *ax25_dev = NULL;
-        ax25_address *call;
+        ax25_uid_assoc *user;
+        ax25_address call;
        ax25_cb *ax25;
        int err = 0;
@@ -1026,9 +1022,15 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        if (addr->fsa_ax25.sax25_family != AF_AX25)
                return -EINVAL;
-        call = ax25_findbyuid(current->euid);
+        user = ax25_findbyuid(current->euid);
-        if (call == NULL && ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
+        if (user) {
-                return -EACCES;
+                call = user->call;
+                ax25_uid_put(user);
+        } else {
+                if (ax25_uid_policy && !capable(CAP_NET_ADMIN))
+                        return -EACCES;
+                call = addr->fsa_ax25.sax25_call;
        }
        lock_sock(sk);
@@ -1039,10 +1041,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                goto out;
        }
-        if (call == NULL)
+        ax25->source_addr = call;
-                ax25->source_addr = addr->fsa_ax25.sax25_call;
-        else
-                ax25->source_addr = *call;
        /*
         * User already set interface with SO_BINDTODEVICE
diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c
index 8adc0022cf58..edcaa897027c 100644
--- a/net/ax25/ax25_ds_in.c
+++ b/net/ax25/ax25_ds_in.c
@@ -22,8 +22,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/ip.h>                     /* For ip_rcv */
+#include <net/tcp_states.h>
-#include <net/tcp.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 3a8b67316fc3..061083efc1dc 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -18,7 +18,7 @@
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <net/ax25.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 3dc808fde33f..810c9c76c2e0 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -9,7 +9,6 @@
 * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de)
 * Copyright (C) Hans-Joachim Hetscher DD8NE (dd8ne@bnv-bamberg.de)
 */
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -26,9 +25,7 @@
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
 #include <net/sock.h>
-#include <net/ip.h>                     /* For ip_rcv */
+#include <net/tcp_states.h>
-#include <net/tcp.h>
-#include <net/arp.h>                    /* For arp_rcv */
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
@@ -114,7 +111,6 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
        pid = *skb->data;
-#ifdef CONFIG_INET
        if (pid == AX25_P_IP) {
                /* working around a TCP bug to keep additional listeners
                 * happy. TCP re-uses the buffer and destroys the original
@@ -132,10 +128,9 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
                skb->dev      = ax25->ax25_dev->dev;
                skb->pkt_type = PACKET_HOST;
                skb->protocol = htons(ETH_P_IP);
-                ip_rcv(skb, skb->dev, NULL);    /* Wrong ptype */
+                netif_rx(skb);
                return 1;
        }
-#endif
        if (pid == AX25_P_SEGMENT) {
                skb_pull(skb, 1);       /* Remove PID */
                return ax25_rx_fragment(ax25, skb);
@@ -250,7 +245,6 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
                /* Now we are pointing at the pid byte */
                switch (skb->data[1]) {
-#ifdef CONFIG_INET
                case AX25_P_IP:
                        skb_pull(skb,2);                /* drop PID/CTRL */
                        skb->h.raw    = skb->data;
@@ -258,7 +252,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
                        skb->dev      = dev;
                        skb->pkt_type = PACKET_HOST;
                        skb->protocol = htons(ETH_P_IP);
-                        ip_rcv(skb, dev, ptype);        /* Note ptype here is the wrong one, fix me later */
+                        netif_rx(skb);
                        break;
                case AX25_P_ARP:
@@ -268,9 +262,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
                        skb->dev      = dev;
                        skb->pkt_type = PACKET_HOST;
                        skb->protocol = htons(ETH_P_ARP);
-                        arp_rcv(skb, dev, ptype);       /* Note ptype here is wrong... */
+                        netif_rx(skb);
                        break;
-#endif
                case AX25_P_TEXT:
                        /* Now find a suitable dgram socket */
                        sk = ax25_get_socket(&dest, &src, SOCK_DGRAM);
@@ -454,7 +447,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 *      Receive an AX.25 frame via a SLIP interface.
 */
 int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev,
-                  struct packet_type *ptype)
+                  struct packet_type *ptype, struct net_device *orig_dev)
 {
        skb->sk = NULL;         /* Initially we don't know who it's for */
        skb->destructor = NULL; /* Who initializes this, dammit?! */
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 44b99b1ff9f8..c288526da4ce 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -422,8 +422,8 @@ static inline void ax25_adjust_path(ax25_address *addr, ax25_digi *digipeat)
 */
 int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
 {
+        ax25_uid_assoc *user;
        ax25_route *ax25_rt;
-        ax25_address *call;
        int err;
        if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
@@ -434,16 +434,18 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
                goto put;
        }
-        if ((call = ax25_findbyuid(current->euid)) == NULL) {
+        user = ax25_findbyuid(current->euid);
+        if (user) {
+                ax25->source_addr = user->call;
+                ax25_uid_put(user);
+        } else {
                if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
                        err = -EPERM;
                        goto put;
                }
-                call = (ax25_address *)ax25->ax25_dev->dev->dev_addr;
+                ax25->source_addr = *(ax25_address *)ax25->ax25_dev->dev->dev_addr;
        }
-        ax25->source_addr = *call;
        if (ax25_rt->digipeat != NULL) {
                if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
                        err = -ENOMEM;
diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c
index 7131873322c4..f6ed283e9de8 100644
--- a/net/ax25/ax25_std_in.c
+++ b/net/ax25/ax25_std_in.c
@@ -29,8 +29,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/ip.h>                     /* For ip_rcv */
+#include <net/tcp_states.h>
-#include <net/tcp.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 066897bc0749..a29c480a4dc1 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -24,7 +24,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 99694b57f6f5..c41dbe5fadee 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -24,7 +24,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
@@ -76,7 +76,7 @@ void ax25_requeue_frames(ax25_cb *ax25)
                if (skb_prev == NULL)
                        skb_queue_head(&ax25->write_queue, skb);
                else
-                        skb_append(skb_prev, skb);
+                        skb_append(skb_prev, skb, &ax25->write_queue);
                skb_prev = skb;
        }
 }
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index cea6b7d19729..a8b3822f3ee4 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -28,6 +28,7 @@
 #include <linux/fcntl.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
+#include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -41,38 +42,41 @@
 *      Callsign/UID mapper. This is in kernel space for security on multi-amateur machines.
 */
-static ax25_uid_assoc *ax25_uid_list;
+HLIST_HEAD(ax25_uid_list);
 static DEFINE_RWLOCK(ax25_uid_lock);
 int ax25_uid_policy = 0;
-ax25_address *ax25_findbyuid(uid_t uid)
+ax25_uid_assoc *ax25_findbyuid(uid_t uid)
 {
-        ax25_uid_assoc *ax25_uid;
+        ax25_uid_assoc *ax25_uid, *res = NULL;
-        ax25_address *res = NULL;
+        struct hlist_node *node;
        read_lock(&ax25_uid_lock);
-        for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) {
+        ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
                if (ax25_uid->uid == uid) {
-                        res = &ax25_uid->call;
+                        ax25_uid_hold(ax25_uid);
+                        res = ax25_uid;
                        break;
                }
        }
        read_unlock(&ax25_uid_lock);
-        return NULL;
+        return res;
 }
 int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 {
-        ax25_uid_assoc *s, *ax25_uid;
+        ax25_uid_assoc *ax25_uid;
+        struct hlist_node *node;
+        ax25_uid_assoc *user;
        unsigned long res;
        switch (cmd) {
        case SIOCAX25GETUID:
                res = -ENOENT;
                read_lock(&ax25_uid_lock);
-                for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) {
+                ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
                        if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
                                res = ax25_uid->uid;
                                break;
@@ -85,19 +89,22 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
        case SIOCAX25ADDUID:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
-                if (ax25_findbyuid(sax->sax25_uid))
+                user = ax25_findbyuid(sax->sax25_uid);
+                if (user) {
+                        ax25_uid_put(user);
                        return -EEXIST;
+                }
                if (sax->sax25_uid == 0)
                        return -EINVAL;
                if ((ax25_uid = kmalloc(sizeof(*ax25_uid), GFP_KERNEL)) == NULL)
                        return -ENOMEM;
+                atomic_set(&ax25_uid->refcount, 1);
                ax25_uid->uid  = sax->sax25_uid;
                ax25_uid->call = sax->sax25_call;
                write_lock(&ax25_uid_lock);
-                ax25_uid->next = ax25_uid_list;
+                hlist_add_head(&ax25_uid->uid_node, &ax25_uid_list);
-                ax25_uid_list  = ax25_uid;
                write_unlock(&ax25_uid_lock);
                return 0;
@@ -106,34 +113,21 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
+                ax25_uid = NULL;
                write_lock(&ax25_uid_lock);
-                for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) {
+                ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
-                        if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
+                        if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0)
                                break;
-                        }
                }
                if (ax25_uid == NULL) {
                        write_unlock(&ax25_uid_lock);
                        return -ENOENT;
                }
-                if ((s = ax25_uid_list) == ax25_uid) {
+                hlist_del_init(&ax25_uid->uid_node);
-                        ax25_uid_list = s->next;
+                ax25_uid_put(ax25_uid);
-                        write_unlock(&ax25_uid_lock);
-                        kfree(ax25_uid);
-                        return 0;
-                }
-                while (s != NULL && s->next != NULL) {
-                        if (s->next == ax25_uid) {
-                                s->next = ax25_uid->next;
-                                write_unlock(&ax25_uid_lock);
-                                kfree(ax25_uid);
-                                return 0;
-                        }
-                        s = s->next;
-                }
                write_unlock(&ax25_uid_lock);
-                return -ENOENT;
+                return 0;
        default:
                return -EINVAL;
@@ -147,13 +141,11 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
 {
        struct ax25_uid_assoc *pt;
-        int i = 1;
+        struct hlist_node *node;
+        int i = 0;
        read_lock(&ax25_uid_lock);
-        if (*pos == 0)
+        ax25_uid_for_each(pt, node, &ax25_uid_list) {
-                return SEQ_START_TOKEN;
-        for (pt = ax25_uid_list; pt != NULL; pt = pt->next) {
                if (i == *pos)
                        return pt;
                ++i;
@@ -164,8 +156,9 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
 static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        ++*pos;
-        return (v == SEQ_START_TOKEN) ? ax25_uid_list : 
-                ((struct ax25_uid_assoc *) v)->next;
+        return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next,
+                           ax25_uid_assoc, uid_node);
 }
 static void ax25_uid_seq_stop(struct seq_file *seq, void *v)
@@ -179,7 +172,6 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v)
                seq_printf(seq, "Policy: %d\n", ax25_uid_policy);
        else {
                struct ax25_uid_assoc *pt = v;
-                
                seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(&pt->call));
        }
@@ -213,16 +205,13 @@ struct file_operations ax25_uid_fops = {
 */
 void __exit ax25_uid_free(void)
 {
-        ax25_uid_assoc *s, *ax25_uid;
+        ax25_uid_assoc *ax25_uid;
+        struct hlist_node *node;
        write_lock(&ax25_uid_lock);
-        ax25_uid = ax25_uid_list;
+        ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
-        while (ax25_uid != NULL) {
+                hlist_del_init(&ax25_uid->uid_node);
-                s        = ax25_uid;
+                ax25_uid_put(ax25_uid);
-                ax25_uid = ax25_uid->next;
-                kfree(s);
        }
-        ax25_uid_list = NULL;
        write_unlock(&ax25_uid_lock);
 }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index fb5524365bc2..55dc42eac92c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -191,7 +191,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
        /* Special commands */
        while ((skb = skb_dequeue(&hdev->driver_init))) {
-                skb->pkt_type = HCI_COMMAND_PKT;
+                bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
                skb->dev = (void *) hdev;
                skb_queue_tail(&hdev->cmd_q, skb);
                hci_sched_cmd(hdev);
@@ -299,7 +299,6 @@ struct hci_dev *hci_dev_get(int index)
        read_unlock(&hci_dev_list_lock);
        return hdev;
 }
-EXPORT_SYMBOL(hci_dev_get);
 /* ---- Inquiry support ---- */
 static void inquiry_cache_flush(struct hci_dev *hdev)
@@ -996,11 +995,11 @@ static int hci_send_frame(struct sk_buff *skb)
                return -ENODEV;
        }
-        BT_DBG("%s type %d len %d", hdev->name, skb->pkt_type, skb->len);
+        BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len);
        if (atomic_read(&hdev->promisc)) {
                /* Time stamp */
-                do_gettimeofday(&skb->stamp);
+                __net_timestamp(skb);
                hci_send_to_sock(hdev, skb);
        }
@@ -1035,14 +1034,13 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p
        BT_DBG("skb len %d", skb->len);
-        skb->pkt_type = HCI_COMMAND_PKT;
+        bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
        skb->dev = (void *) hdev;
        skb_queue_tail(&hdev->cmd_q, skb);
        hci_sched_cmd(hdev);
        return 0;
 }
-EXPORT_SYMBOL(hci_send_cmd);
 /* Get data from the previously sent command */
 void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf)
@@ -1083,7 +1081,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
        BT_DBG("%s conn %p flags 0x%x", hdev->name, conn, flags);
        skb->dev = (void *) hdev;
-        skb->pkt_type = HCI_ACLDATA_PKT;
+        bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
        hci_add_acl_hdr(skb, conn->handle, flags | ACL_START);
        if (!(list = skb_shinfo(skb)->frag_list)) {
@@ -1105,7 +1103,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
                        skb = list; list = list->next;
                        
                        skb->dev = (void *) hdev;
-                        skb->pkt_type = HCI_ACLDATA_PKT;
+                        bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
                        hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT);
                        BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len);
@@ -1141,7 +1139,7 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
        memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE);
        skb->dev = (void *) hdev;
-        skb->pkt_type = HCI_SCODATA_PKT;
+        bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
        skb_queue_tail(&conn->data_q, skb);
        hci_sched_tx(hdev);
        return 0;
@@ -1371,7 +1369,7 @@ void hci_rx_task(unsigned long arg)
                if (test_bit(HCI_INIT, &hdev->flags)) {
                        /* Don't process data packets in this states. */
-                        switch (skb->pkt_type) {
+                        switch (bt_cb(skb)->pkt_type) {
                        case HCI_ACLDATA_PKT:
                        case HCI_SCODATA_PKT:
                                kfree_skb(skb);
@@ -1380,7 +1378,7 @@ void hci_rx_task(unsigned long arg)
                }
                /* Process frame */
-                switch (skb->pkt_type) {
+                switch (bt_cb(skb)->pkt_type) {
                case HCI_EVENT_PKT:
                        hci_event_packet(hdev, skb);
                        break;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index c4b592b4ef10..d6da0939216d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -484,14 +484,18 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff
 /* Inquiry Result */
 static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
+        struct inquiry_data data;
        struct inquiry_info *info = (struct inquiry_info *) (skb->data + 1);
        int num_rsp = *((__u8 *) skb->data);
        BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
+        if (!num_rsp)
+                return;
        hci_dev_lock(hdev);
        for (; num_rsp; num_rsp--) {
-                struct inquiry_data data;
                bacpy(&data.bdaddr, &info->bdaddr);
                data.pscan_rep_mode     = info->pscan_rep_mode;
                data.pscan_period_mode  = info->pscan_period_mode;
@@ -502,30 +506,55 @@ static inline void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *
                info++;
                hci_inquiry_cache_update(hdev, &data);
        }
        hci_dev_unlock(hdev);
 }
 /* Inquiry Result With RSSI */
 static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
-        struct inquiry_info_with_rssi *info = (struct inquiry_info_with_rssi *) (skb->data + 1);
+        struct inquiry_data data;
        int num_rsp = *((__u8 *) skb->data);
        BT_DBG("%s num_rsp %d", hdev->name, num_rsp);
+        if (!num_rsp)
+                return;
        hci_dev_lock(hdev);
-        for (; num_rsp; num_rsp--) {
-                struct inquiry_data data;
+        if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) {
-                bacpy(&data.bdaddr, &info->bdaddr);
+                struct inquiry_info_with_rssi_and_pscan_mode *info =
-                data.pscan_rep_mode     = info->pscan_rep_mode;
+                        (struct inquiry_info_with_rssi_and_pscan_mode *) (skb->data + 1);
-                data.pscan_period_mode  = info->pscan_period_mode;
-                data.pscan_mode         = 0x00;
+                for (; num_rsp; num_rsp--) {
-                memcpy(data.dev_class, info->dev_class, 3);
+                        bacpy(&data.bdaddr, &info->bdaddr);
-                data.clock_offset       = info->clock_offset;
+                        data.pscan_rep_mode     = info->pscan_rep_mode;
-                data.rssi               = info->rssi;
+                        data.pscan_period_mode  = info->pscan_period_mode;
-                info++;
+                        data.pscan_mode         = info->pscan_mode;
-                hci_inquiry_cache_update(hdev, &data);
+                        memcpy(data.dev_class, info->dev_class, 3);
+                        data.clock_offset       = info->clock_offset;
+                        data.rssi               = info->rssi;
+                        info++;
+                        hci_inquiry_cache_update(hdev, &data);
+                }
+        } else {
+                struct inquiry_info_with_rssi *info =
+                        (struct inquiry_info_with_rssi *) (skb->data + 1);
+                for (; num_rsp; num_rsp--) {
+                        bacpy(&data.bdaddr, &info->bdaddr);
+                        data.pscan_rep_mode     = info->pscan_rep_mode;
+                        data.pscan_period_mode  = info->pscan_period_mode;
+                        data.pscan_mode         = 0x00;
+                        memcpy(data.dev_class, info->dev_class, 3);
+                        data.clock_offset       = info->clock_offset;
+                        data.rssi               = info->rssi;
+                        info++;
+                        hci_inquiry_cache_update(hdev, &data);
+                }
        }
        hci_dev_unlock(hdev);
 }
@@ -865,6 +894,24 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk
        hci_dev_unlock(hdev);
 }
+/* Page Scan Repetition Mode */
+static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+        struct hci_ev_pscan_rep_mode *ev = (struct hci_ev_pscan_rep_mode *) skb->data;
+        struct inquiry_entry *ie;
+        BT_DBG("%s", hdev->name);
+        hci_dev_lock(hdev);
+        if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) {
+                ie->data.pscan_rep_mode = ev->pscan_rep_mode;
+                ie->timestamp = jiffies;
+        }
+        hci_dev_unlock(hdev);
+}
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 {
        struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data;
@@ -937,6 +984,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
                hci_clock_offset_evt(hdev, skb);
                break;
+        case HCI_EV_PSCAN_REP_MODE:
+                hci_pscan_rep_mode_evt(hdev, skb);
+                break;
        case HCI_EV_CMD_STATUS:
                cs = (struct hci_ev_cmd_status *) skb->data;
                skb_pull(skb, sizeof(cs));
@@ -1035,9 +1086,11 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
        ev->type = type;
        memcpy(ev->data, data, dlen);
-        skb->pkt_type = HCI_EVENT_PKT;
+        bt_cb(skb)->incoming = 1;
+        __net_timestamp(skb);
+        bt_cb(skb)->pkt_type = HCI_EVENT_PKT;
        skb->dev = (void *) hdev;
        hci_send_to_sock(hdev, skb);
        kfree_skb(skb);
 }
-EXPORT_SYMBOL(hci_si_event);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index ebdcce5e7ca0..32ef7975a139 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -110,11 +110,11 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
                /* Apply filter */
                flt = &hci_pi(sk)->filter;
-                if (!test_bit((skb->pkt_type == HCI_VENDOR_PKT) ?
+                if (!test_bit((bt_cb(skb)->pkt_type == HCI_VENDOR_PKT) ?
-                                0 : (skb->pkt_type & HCI_FLT_TYPE_BITS), &flt->type_mask))
+                                0 : (bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS), &flt->type_mask))
                        continue;
-                if (skb->pkt_type == HCI_EVENT_PKT) {
+                if (bt_cb(skb)->pkt_type == HCI_EVENT_PKT) {
                        register int evt = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS);
                        if (!hci_test_bit(evt, &flt->event_mask))
@@ -131,7 +131,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
                        continue;
                /* Put type byte before the data */
-                memcpy(skb_push(nskb, 1), &nskb->pkt_type, 1);
+                memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1);
                if (sock_queue_rcv_skb(sk, nskb))
                        kfree_skb(nskb);
@@ -327,11 +327,17 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_
 {
        __u32 mask = hci_pi(sk)->cmsg_mask;
-        if (mask & HCI_CMSG_DIR)
+        if (mask & HCI_CMSG_DIR) {
-                put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(int), &bt_cb(skb)->incoming);
+                int incoming = bt_cb(skb)->incoming;
+                put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming), &incoming);
+        }
+        if (mask & HCI_CMSG_TSTAMP) {
+                struct timeval tv;
-        if (mask & HCI_CMSG_TSTAMP)
+                skb_get_timestamp(skb, &tv);
-                put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(skb->stamp), &skb->stamp);
+                put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(tv), &tv);
+        }
 }
 
 static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
@@ -405,11 +411,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
                goto drop;
        }
-        skb->pkt_type = *((unsigned char *) skb->data);
+        bt_cb(skb)->pkt_type = *((unsigned char *) skb->data);
        skb_pull(skb, 1);
        skb->dev = (void *) hdev;
-        if (skb->pkt_type == HCI_COMMAND_PKT) {
+        if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) {
                u16 opcode = __le16_to_cpu(get_unaligned((u16 *)skb->data));
                u16 ogf = hci_opcode_ogf(opcode);
                u16 ocf = hci_opcode_ocf(opcode);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 32fccfb5bfa5..d3d6bc547212 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -372,7 +372,7 @@ static struct proto l2cap_proto = {
        .obj_size       = sizeof(struct l2cap_pinfo)
 };
-static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, int prio)
+static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio)
 {
        struct sock *sk;
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index 9efb0a093612..ee6a66979913 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -34,31 +34,6 @@
 #include <net/bluetooth/bluetooth.h>
-void bt_dump(char *pref, __u8 *buf, int count)
-{
-        char *ptr;
-        char line[100];
-        unsigned int i;
-        printk(KERN_INFO "%s: dump, len %d\n", pref, count);
-        ptr = line;
-        *ptr = 0;
-        for (i = 0; i < count; i++) {
-                ptr += sprintf(ptr, " %2.2X", buf[i]);
-                if (i && !((i + 1) % 20)) {
-                        printk(KERN_INFO "%s:%s\n", pref, line);
-                        ptr = line;
-                        *ptr = 0;
-                }
-        }
-        if (line[0])
-                printk(KERN_INFO "%s:%s\n", pref, line);
-}
-EXPORT_SYMBOL(bt_dump);
 void baswap(bdaddr_t *dst, bdaddr_t *src)
 {
        unsigned char *d = (unsigned char *) dst;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index e9e6fda66f1a..173f46e8cdae 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -21,10 +21,6 @@
   SOFTWARE IS DISCLAIMED.
 */
-/* 
-   RPN support    -    Dirk Husemann <hud@zurich.ibm.com>
-*/
 /*
 * Bluetooth RFCOMM core.
 *
@@ -115,10 +111,10 @@ static void rfcomm_session_del(struct rfcomm_session *s);
 #define __get_mcc_len(b)  ((b & 0xfe) >> 1)
 /* RPN macros */
-#define __rpn_line_settings(data, stop, parity)  ((data & 0x3) | ((stop & 0x1) << 2) | ((parity & 0x3) << 3))
+#define __rpn_line_settings(data, stop, parity)  ((data & 0x3) | ((stop & 0x1) << 2) | ((parity & 0x7) << 3))
 #define __get_rpn_data_bits(line) ((line) & 0x3)
 #define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1)
-#define __get_rpn_parity(line)    (((line) >> 3) & 0x3)
+#define __get_rpn_parity(line)    (((line) >> 3) & 0x7)
 static inline void rfcomm_schedule(uint event)
 {
@@ -233,7 +229,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
        d->rx_credits = RFCOMM_DEFAULT_CREDITS;
 }
-struct rfcomm_dlc *rfcomm_dlc_alloc(int prio)
+struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio)
 {
        struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio);
        if (!d)
@@ -389,8 +385,6 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
                rfcomm_dlc_unlock(d);
                skb_queue_purge(&d->tx_queue);
-                rfcomm_session_put(s);
                rfcomm_dlc_unlink(d);
        }
@@ -600,8 +594,6 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
                goto failed;
        }
-        rfcomm_session_hold(s);
        s->initiator = 1;
        bacpy(&addr.l2_bdaddr, dst);
@@ -784,10 +776,10 @@ static int rfcomm_send_pn(struct rfcomm_session *s, int cr, struct rfcomm_dlc *d
        return rfcomm_send_frame(s, buf, ptr - buf);
 }
-static int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
+int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
-                           u8 bit_rate, u8 data_bits, u8 stop_bits,
+                        u8 bit_rate, u8 data_bits, u8 stop_bits,
-                           u8 parity, u8 flow_ctrl_settings, 
+                        u8 parity, u8 flow_ctrl_settings, 
-                           u8 xon_char, u8 xoff_char, u16 param_mask)
+                        u8 xon_char, u8 xoff_char, u16 param_mask)
 {
        struct rfcomm_hdr *hdr;
        struct rfcomm_mcc *mcc;
@@ -795,9 +787,9 @@ static int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
        u8 buf[16], *ptr = buf;
        BT_DBG("%p cr %d dlci %d bit_r 0x%x data_b 0x%x stop_b 0x%x parity 0x%x"
-               "flwc_s 0x%x xon_c 0x%x xoff_c 0x%x p_mask 0x%x", 
+                        " flwc_s 0x%x xon_c 0x%x xoff_c 0x%x p_mask 0x%x", 
-                        s, cr, dlci, bit_rate, data_bits, stop_bits, parity, 
+                s, cr, dlci, bit_rate, data_bits, stop_bits, parity, 
-                        flow_ctrl_settings, xon_char, xoff_char, param_mask);
+                flow_ctrl_settings, xon_char, xoff_char, param_mask);
        hdr = (void *) ptr; ptr += sizeof(*hdr);
        hdr->addr = __addr(s->initiator, 0);
@@ -1269,16 +1261,16 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
        u8 xon_char  = 0;
        u8 xoff_char = 0;
        u16 rpn_mask = RFCOMM_RPN_PM_ALL;
-        
-        BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x", 
+        BT_DBG("dlci %d cr %d len 0x%x bitr 0x%x line 0x%x flow 0x%x xonc 0x%x xoffc 0x%x pm 0x%x",
-               dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl,
+                dlci, cr, len, rpn->bit_rate, rpn->line_settings, rpn->flow_ctrl,
-               rpn->xon_char, rpn->xoff_char, rpn->param_mask);
+                rpn->xon_char, rpn->xoff_char, rpn->param_mask);
-        
-        if (!cr) 
+        if (!cr)
                return 0;
-        
        if (len == 1) {
-                /* request: return default setting */
+                /* This is a request, return default settings */
                bit_rate  = RFCOMM_RPN_BR_115200;
                data_bits = RFCOMM_RPN_DATA_8;
                stop_bits = RFCOMM_RPN_STOP_1;
@@ -1286,11 +1278,12 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                flow_ctrl = RFCOMM_RPN_FLOW_NONE;
                xon_char  = RFCOMM_RPN_XON_CHAR;
                xoff_char = RFCOMM_RPN_XOFF_CHAR;
                goto rpn_out;
        }
-        /* check for sane values: ignore/accept bit_rate, 8 bits, 1 stop bit, no parity,
-                                  no flow control lines, normal XON/XOFF chars */
+        /* Check for sane values, ignore/accept bit_rate, 8 bits, 1 stop bit,
+         * no parity, no flow control lines, normal XON/XOFF chars */
        if (rpn->param_mask & RFCOMM_RPN_PM_BITRATE) {
                bit_rate = rpn->bit_rate;
                if (bit_rate != RFCOMM_RPN_BR_115200) {
@@ -1299,6 +1292,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                        rpn_mask ^= RFCOMM_RPN_PM_BITRATE;
                }
        }
        if (rpn->param_mask & RFCOMM_RPN_PM_DATA) {
                data_bits = __get_rpn_data_bits(rpn->line_settings);
                if (data_bits != RFCOMM_RPN_DATA_8) {
@@ -1307,6 +1301,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                        rpn_mask ^= RFCOMM_RPN_PM_DATA;
                }
        }
        if (rpn->param_mask & RFCOMM_RPN_PM_STOP) {
                stop_bits = __get_rpn_stop_bits(rpn->line_settings);
                if (stop_bits != RFCOMM_RPN_STOP_1) {
@@ -1315,6 +1310,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                        rpn_mask ^= RFCOMM_RPN_PM_STOP;
                }
        }
        if (rpn->param_mask & RFCOMM_RPN_PM_PARITY) {
                parity = __get_rpn_parity(rpn->line_settings);
                if (parity != RFCOMM_RPN_PARITY_NONE) {
@@ -1323,6 +1319,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                        rpn_mask ^= RFCOMM_RPN_PM_PARITY;
                }
        }
        if (rpn->param_mask & RFCOMM_RPN_PM_FLOW) {
                flow_ctrl = rpn->flow_ctrl;
                if (flow_ctrl != RFCOMM_RPN_FLOW_NONE) {
@@ -1331,6 +1328,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                        rpn_mask ^= RFCOMM_RPN_PM_FLOW;
                }
        }
        if (rpn->param_mask & RFCOMM_RPN_PM_XON) {
                xon_char = rpn->xon_char;
                if (xon_char != RFCOMM_RPN_XON_CHAR) {
@@ -1339,6 +1337,7 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
                        rpn_mask ^= RFCOMM_RPN_PM_XON;
                }
        }
        if (rpn->param_mask & RFCOMM_RPN_PM_XOFF) {
                xoff_char = rpn->xoff_char;
                if (xoff_char != RFCOMM_RPN_XOFF_CHAR) {
@@ -1349,9 +1348,8 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
        }
 rpn_out:
-        rfcomm_send_rpn(s, 0, dlci, 
+        rfcomm_send_rpn(s, 0, dlci, bit_rate, data_bits, stop_bits,
-                        bit_rate, data_bits, stop_bits, parity, flow_ctrl,
+                        parity, flow_ctrl, xon_char, xoff_char, rpn_mask);
-                        xon_char, xoff_char, rpn_mask);
        return 0;
 }
@@ -1362,14 +1360,13 @@ static int rfcomm_recv_rls(struct rfcomm_session *s, int cr, struct sk_buff *skb
        u8 dlci = __get_dlci(rls->dlci);
        BT_DBG("dlci %d cr %d status 0x%x", dlci, cr, rls->status);
-        
        if (!cr)
                return 0;
-        /* FIXME: We should probably do something with this
+        /* We should probably do something with this information here. But
-           information here. But for now it's sufficient just
+         * for now it's sufficient just to reply -- Bluetooth 1.1 says it's
-           to reply -- Bluetooth 1.1 says it's mandatory to 
+         * mandatory to recognise and respond to RLS */
-           recognise and respond to RLS */
        rfcomm_send_rls(s, 0, dlci, rls->status);
@@ -1385,7 +1382,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb
        BT_DBG("dlci %d cr %d v24 0x%x", dlci, cr, msc->v24_sig);
        d = rfcomm_dlc_get(s, dlci);
-        if (!d) 
+        if (!d)
                return 0;
        if (cr) {
@@ -1393,7 +1390,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb
                        set_bit(RFCOMM_TX_THROTTLED, &d->flags);
                else
                        clear_bit(RFCOMM_TX_THROTTLED, &d->flags);
-                
                rfcomm_dlc_lock(d);
                if (d->modem_status)
                        d->modem_status(d, msc->v24_sig);
@@ -1402,7 +1399,7 @@ static int rfcomm_recv_msc(struct rfcomm_session *s, int cr, struct sk_buff *skb
                rfcomm_send_msc(s, 0, dlci, msc->v24_sig);
                d->mscex |= RFCOMM_MSCEX_RX;
-        } else 
+        } else
                d->mscex |= RFCOMM_MSCEX_TX;
        return 0;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 63a123c5c41b..90e19eb6d3cc 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -284,7 +284,7 @@ static struct proto rfcomm_proto = {
        .obj_size       = sizeof(struct rfcomm_pinfo)
 };
-static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, int prio)
+static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio)
 {
        struct rfcomm_dlc *d;
        struct sock *sk;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 6304590fd36a..1bca860a6109 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -286,7 +286,7 @@ static inline void rfcomm_set_owner_w(struct sk_buff *skb, struct rfcomm_dev *de
        skb->destructor = rfcomm_wfree;
 }
-static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, int priority)
+static struct sk_buff *rfcomm_wmalloc(struct rfcomm_dev *dev, unsigned long size, unsigned int __nocast priority)
 {
        if (atomic_read(&dev->wmem_alloc) < rfcomm_room(dev->dlc)) {
                struct sk_buff *skb = alloc_skb(size, priority);
@@ -528,9 +528,14 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
        struct rfcomm_dev *dev = dlc->owner;
        if (!dev)
                return;
-        
        BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig);
+        if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV)) {
+                if (dev->tty && !C_CLOCAL(dev->tty))
+                        tty_hangup(dev->tty);
+        }
        dev->modem_status = 
                ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) |
                ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) |
@@ -740,20 +745,143 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned
        return -ENOIOCTLCMD;
 }
-#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
 static void rfcomm_tty_set_termios(struct tty_struct *tty, struct termios *old)
 {
-        BT_DBG("tty %p", tty);
+        struct termios *new = (struct termios *) tty->termios;
+        int old_baud_rate = tty_termios_baud_rate(old);
+        int new_baud_rate = tty_termios_baud_rate(new);
-        if ((tty->termios->c_cflag == old->c_cflag) &&
+        u8 baud, data_bits, stop_bits, parity, x_on, x_off;
-                (RELEVANT_IFLAG(tty->termios->c_iflag) == RELEVANT_IFLAG(old->c_iflag)))
+        u16 changes = 0;
-                return;
+        struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+        BT_DBG("tty %p termios %p", tty, old);
+        /* Handle turning off CRTSCTS */
+        if ((old->c_cflag & CRTSCTS) && !(new->c_cflag & CRTSCTS)) 
+                BT_DBG("Turning off CRTSCTS unsupported");
+        /* Parity on/off and when on, odd/even */
+        if (((old->c_cflag & PARENB) != (new->c_cflag & PARENB)) ||
+                        ((old->c_cflag & PARODD) != (new->c_cflag & PARODD)) ) {
+                changes |= RFCOMM_RPN_PM_PARITY;
+                BT_DBG("Parity change detected.");
+        }
+        /* Mark and space parity are not supported! */
+        if (new->c_cflag & PARENB) {
+                if (new->c_cflag & PARODD) {
+                        BT_DBG("Parity is ODD");
+                        parity = RFCOMM_RPN_PARITY_ODD;
+                } else {
+                        BT_DBG("Parity is EVEN");
+                        parity = RFCOMM_RPN_PARITY_EVEN;
+                }
+        } else {
+                BT_DBG("Parity is OFF");
+                parity = RFCOMM_RPN_PARITY_NONE;
+        }
+        /* Setting the x_on / x_off characters */
+        if (old->c_cc[VSTOP] != new->c_cc[VSTOP]) {
+                BT_DBG("XOFF custom");
+                x_on = new->c_cc[VSTOP];
+                changes |= RFCOMM_RPN_PM_XON;
+        } else {
+                BT_DBG("XOFF default");
+                x_on = RFCOMM_RPN_XON_CHAR;
+        }
+        if (old->c_cc[VSTART] != new->c_cc[VSTART]) {
+                BT_DBG("XON custom");
+                x_off = new->c_cc[VSTART];
+                changes |= RFCOMM_RPN_PM_XOFF;
+        } else {
+                BT_DBG("XON default");
+                x_off = RFCOMM_RPN_XOFF_CHAR;
+        }
+        /* Handle setting of stop bits */
+        if ((old->c_cflag & CSTOPB) != (new->c_cflag & CSTOPB))
+                changes |= RFCOMM_RPN_PM_STOP;
+        /* POSIX does not support 1.5 stop bits and RFCOMM does not
+         * support 2 stop bits. So a request for 2 stop bits gets
+         * translated to 1.5 stop bits */
+        if (new->c_cflag & CSTOPB) {
+                stop_bits = RFCOMM_RPN_STOP_15;
+        } else {
+                stop_bits = RFCOMM_RPN_STOP_1;
+        }
+        /* Handle number of data bits [5-8] */
+        if ((old->c_cflag & CSIZE) != (new->c_cflag & CSIZE)) 
+                changes |= RFCOMM_RPN_PM_DATA;
+        switch (new->c_cflag & CSIZE) {
+        case CS5:
+                data_bits = RFCOMM_RPN_DATA_5;
+                break;
+        case CS6:
+                data_bits = RFCOMM_RPN_DATA_6;
+                break;
+        case CS7:
+                data_bits = RFCOMM_RPN_DATA_7;
+                break;
+        case CS8:
+                data_bits = RFCOMM_RPN_DATA_8;
+                break;
+        default:
+                data_bits = RFCOMM_RPN_DATA_8;
+                break;
+        }
+        /* Handle baudrate settings */
+        if (old_baud_rate != new_baud_rate)
+                changes |= RFCOMM_RPN_PM_BITRATE;
-        /* handle turning off CRTSCTS */
+        switch (new_baud_rate) {
-        if ((old->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) {
+        case 2400:
-                BT_DBG("turning off CRTSCTS");
+                baud = RFCOMM_RPN_BR_2400;
+                break;
+        case 4800:
+                baud = RFCOMM_RPN_BR_4800;
+                break;
+        case 7200:
+                baud = RFCOMM_RPN_BR_7200;
+                break;
+        case 9600:
+                baud = RFCOMM_RPN_BR_9600;
+                break;
+        case 19200: 
+                baud = RFCOMM_RPN_BR_19200;
+                break;
+        case 38400:
+                baud = RFCOMM_RPN_BR_38400;
+                break;
+        case 57600:
+                baud = RFCOMM_RPN_BR_57600;
+                break;
+        case 115200:
+                baud = RFCOMM_RPN_BR_115200;
+                break;
+        case 230400:
+                baud = RFCOMM_RPN_BR_230400;
+                break;
+        default:
+                /* 9600 is standard accordinag to the RFCOMM specification */
+                baud = RFCOMM_RPN_BR_9600;
+                break;
+        
        }
+        if (changes)
+                rfcomm_send_rpn(dev->dlc->session, 1, dev->dlc->dlci, baud,
+                                data_bits, stop_bits, parity,
+                                RFCOMM_RPN_FLOW_NONE, x_on, x_off, changes);
+        return;
 }
 static void rfcomm_tty_throttle(struct tty_struct *tty)
@@ -761,7 +889,7 @@ static void rfcomm_tty_throttle(struct tty_struct *tty)
        struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
        BT_DBG("tty %p dev %p", tty, dev);
-        
        rfcomm_dlc_throttle(dev->dlc);
 }
@@ -770,7 +898,7 @@ static void rfcomm_tty_unthrottle(struct tty_struct *tty)
        struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
        BT_DBG("tty %p dev %p", tty, dev);
-        
        rfcomm_dlc_unthrottle(dev->dlc);
 }
@@ -841,35 +969,35 @@ static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp)
 static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsigned int set, unsigned int clear)
 {
-        struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+        struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
-        struct rfcomm_dlc *dlc = dev->dlc;
+        struct rfcomm_dlc *dlc = dev->dlc;
-        u8 v24_sig;
+        u8 v24_sig;
        BT_DBG("tty %p dev %p set 0x%02x clear 0x%02x", tty, dev, set, clear);
-        rfcomm_dlc_get_modem_status(dlc, &v24_sig);
+        rfcomm_dlc_get_modem_status(dlc, &v24_sig);
-        if (set & TIOCM_DSR || set & TIOCM_DTR)
+        if (set & TIOCM_DSR || set & TIOCM_DTR)
-                v24_sig |= RFCOMM_V24_RTC;
+                v24_sig |= RFCOMM_V24_RTC;
-        if (set & TIOCM_RTS || set & TIOCM_CTS)
+        if (set & TIOCM_RTS || set & TIOCM_CTS)
-                v24_sig |= RFCOMM_V24_RTR;
+                v24_sig |= RFCOMM_V24_RTR;
-        if (set & TIOCM_RI)
+        if (set & TIOCM_RI)
-                v24_sig |= RFCOMM_V24_IC;
+                v24_sig |= RFCOMM_V24_IC;
-        if (set & TIOCM_CD)
+        if (set & TIOCM_CD)
-                v24_sig |= RFCOMM_V24_DV;
+                v24_sig |= RFCOMM_V24_DV;
-        if (clear & TIOCM_DSR || clear & TIOCM_DTR)
+        if (clear & TIOCM_DSR || clear & TIOCM_DTR)
-                v24_sig &= ~RFCOMM_V24_RTC;
+                v24_sig &= ~RFCOMM_V24_RTC;
-        if (clear & TIOCM_RTS || clear & TIOCM_CTS)
+        if (clear & TIOCM_RTS || clear & TIOCM_CTS)
-                v24_sig &= ~RFCOMM_V24_RTR;
+                v24_sig &= ~RFCOMM_V24_RTR;
-        if (clear & TIOCM_RI)
+        if (clear & TIOCM_RI)
-                v24_sig &= ~RFCOMM_V24_IC;
+                v24_sig &= ~RFCOMM_V24_IC;
-        if (clear & TIOCM_CD)
+        if (clear & TIOCM_CD)
-                v24_sig &= ~RFCOMM_V24_DV;
+                v24_sig &= ~RFCOMM_V24_DV;
-        rfcomm_dlc_set_modem_status(dlc, v24_sig);
+        rfcomm_dlc_set_modem_status(dlc, v24_sig);
-        return 0;
+        return 0;
 }
 /* ---- TTY structure ---- */
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 746c11fc017e..ce7ab7dfa0b2 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -418,7 +418,7 @@ static struct proto sco_proto = {
        .obj_size       = sizeof(struct sco_pinfo)
 };
-static struct sock *sco_sock_alloc(struct socket *sock, int proto, int prio)
+static struct sock *sco_sock_alloc(struct socket *sock, int proto, unsigned int __nocast prio)
 {
        struct sock *sk;
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
new file mode 100644
index 000000000000..db23d59746cf
--- /dev/null
+++ b/net/bridge/Kconfig
@@ -0,0 +1,31 @@
+#
+# 802.1d Ethernet Bridging
+#
+config BRIDGE
+        tristate "802.1d Ethernet Bridging"
+        ---help---
+          If you say Y here, then your Linux box will be able to act as an
+          Ethernet bridge, which means that the different Ethernet segments it
+          is connected to will appear as one Ethernet to the participants.
+          Several such bridges can work together to create even larger
+          networks of Ethernets using the IEEE 802.1 spanning tree algorithm.
+          As this is a standard, Linux bridges will cooperate properly with
+          other third party bridge products.
+          In order to use the Ethernet bridge, you'll need the bridge
+          configuration tools; see <file:Documentation/networking/bridge.txt>
+          for location. Please read the Bridge mini-HOWTO for more
+          information.
+          If you enable iptables support along with the bridge support then you
+          turn your bridge into a bridging IP firewall.
+          iptables will then see the IP packets being bridged, so you need to
+          take this into account when setting up your firewall rules.
+          Enabling arptables support when bridging will let arptables see
+          bridged ARP traffic in the arptables FORWARD chain.
+          To compile this code as a module, choose M here: the module
+          will be called bridge.
+          If unsure, say N.
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e6c2200b7ca3..24396b914d11 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -23,7 +23,7 @@
 #include <asm/atomic.h>
 #include "br_private.h"
-static kmem_cache_t *br_fdb_cache;
+static kmem_cache_t *br_fdb_cache __read_mostly;
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
                      const unsigned char *addr);
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 68ccef507b49..c70b3be23026 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -138,7 +138,7 @@ config BRIDGE_EBT_VLAN
 #
 config BRIDGE_EBT_ARPREPLY
        tristate "ebt: arp reply target support"
-        depends on BRIDGE_NF_EBTABLES
+        depends on BRIDGE_NF_EBTABLES && INET
        help
          This option adds the arp reply target, which allows
          automatically sending arp replies to arp requests.
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 02c632b4d325..c93d35ab95c0 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -23,10 +23,9 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
 {
        struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
-        if ((*pskb)->nfmark != info->mark) {
+        if ((*pskb)->nfmark != info->mark)
                (*pskb)->nfmark = info->mark;
-                (*pskb)->nfcache |= NFC_ALTERED;
-        }
        return info->target;
 }
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 01af4fcef26d..aae26ae2e61f 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -78,8 +78,8 @@ static void ulog_send(unsigned int nlgroup)
        if (ub->qlen > 1)
                ub->lastnlh->nlmsg_type = NLMSG_DONE;
-        NETLINK_CB(ub->skb).dst_groups = 1 << nlgroup;
+        NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
-        netlink_broadcast(ebtulognl, ub->skb, 0, 1 << nlgroup, GFP_ATOMIC);
+        netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
        ub->qlen = 0;
        ub->skb = NULL;
@@ -162,7 +162,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr,
        pm->version = EBT_ULOG_VERSION;
        do_gettimeofday(&pm->stamp);
        if (ub->qlen == 1)
-                ub->skb->stamp = pm->stamp;
+                skb_set_timestamp(ub->skb, &pm->stamp);
        pm->data_len = copy_len;
        pm->mark = skb->nfmark;
        pm->hook = hooknr;
@@ -258,7 +258,8 @@ static int __init init(void)
                spin_lock_init(&ulog_buffers[i].lock);
        }
-        ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL);
+        ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
+                                          NULL, THIS_MODULE);
        if (!ebtulognl)
                ret = -ENOMEM;
        else if ((ret = ebt_register_watcher(&ulog)))
diff --git a/net/compat.c b/net/compat.c
index be5d936dc423..d99ab9695893 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -91,20 +91,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
        } else
                kern_msg->msg_name = NULL;
-        if(kern_msg->msg_iovlen > UIO_FASTIOV) {
-                kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec),
-                                   GFP_KERNEL);
-                if(!kern_iov)
-                        return -ENOMEM;
-        }
        tot_len = iov_from_user_compat_to_kern(kern_iov,
                                          (struct compat_iovec __user *)kern_msg->msg_iov,
                                          kern_msg->msg_iovlen);
        if(tot_len >= 0)
                kern_msg->msg_iov = kern_iov;
-        else if(kern_msg->msg_iovlen > UIO_FASTIOV)
-                kfree(kern_iov);
        return tot_len;
 }
diff --git a/net/core/Makefile b/net/core/Makefile
index 5e0c56b7f607..630da0f0579e 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,11 +7,11 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y                += flow.o dev.o ethtool.o dev_mcast.o dst.o \
+obj-y                += dev.o ethtool.o dev_mcast.o dst.o \
                        neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+obj-$(CONFIG_XFRM) += flow.o
 obj-$(CONFIG_SYSFS) += net-sysfs.o
-obj-$(CONFIG_NETFILTER) += netfilter.o
 obj-$(CONFIG_NET_DIVERT) += dv.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NET_RADIO) += wireless.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index fcee054b6f75..da9bf71421a7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -43,7 +43,6 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/inet.h>
-#include <linux/tcp.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/poll.h>
@@ -51,9 +50,10 @@
 #include <net/protocol.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/checksum.h>
+#include <net/checksum.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
 /*
 *      Is a socket 'connection oriented' ?
diff --git a/net/core/dev.c b/net/core/dev.c
index ff9dc029233a..c01511e3d0c1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt)
        spin_unlock_bh(&ptype_lock);
 }
-extern void linkwatch_run_queue(void);
 /**
 *      __dev_remove_pack        - remove packet handler
 *      @pt: packet type declaration
@@ -901,8 +897,7 @@ int dev_close(struct net_device *dev)
        smp_mb__after_clear_bit(); /* Commit netif_running(). */
        while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
                /* No hurry. */
-                current->state = TASK_INTERRUPTIBLE;
+                msleep(1);
-                schedule_timeout(1);
        }
        /*
@@ -1010,13 +1005,22 @@ void net_disable_timestamp(void)
        atomic_dec(&netstamp_needed);
 }
-static inline void net_timestamp(struct timeval *stamp)
+void __net_timestamp(struct sk_buff *skb)
+{
+        struct timeval tv;
+        do_gettimeofday(&tv);
+        skb_set_timestamp(skb, &tv);
+}
+EXPORT_SYMBOL(__net_timestamp);
+static inline void net_timestamp(struct sk_buff *skb)
 {
        if (atomic_read(&netstamp_needed))
-                do_gettimeofday(stamp);
+                __net_timestamp(skb);
        else {
-                stamp->tv_sec = 0;
+                skb->tstamp.off_sec = 0;
-                stamp->tv_usec = 0;
+                skb->tstamp.off_usec = 0;
        }
 }
@@ -1028,7 +1032,8 @@ static inline void net_timestamp(struct timeval *stamp)
 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
        struct packet_type *ptype;
-        net_timestamp(&skb->stamp);
+        net_timestamp(skb);
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1059,7 +1064,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
                        skb2->h.raw = skb2->nh.raw;
                        skb2->pkt_type = PACKET_OUTGOING;
-                        ptype->func(skb2, skb->dev, ptype);
+                        ptype->func(skb2, skb->dev, ptype, skb->dev);
                }
        }
        rcu_read_unlock();
@@ -1124,8 +1129,6 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 #define illegal_highdma(dev, skb)       (0)
 #endif
-extern void skb_release_data(struct sk_buff *);
 /* Keep head the same: replace data */
 int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 {
@@ -1380,8 +1383,8 @@ int netif_rx(struct sk_buff *skb)
        if (netpoll_rx(skb))
                return NET_RX_DROP;
-        if (!skb->stamp.tv_sec)
+        if (!skb->tstamp.off_sec)
-                net_timestamp(&skb->stamp);
+                net_timestamp(skb);
        /*
         * The code is rearranged so that the path is the most
@@ -1426,14 +1429,14 @@ int netif_rx_ni(struct sk_buff *skb)
 EXPORT_SYMBOL(netif_rx_ni);
-static __inline__ void skb_bond(struct sk_buff *skb)
+static inline struct net_device *skb_bond(struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
-        if (dev->master) {
+        if (dev->master)
-                skb->real_dev = skb->dev;
                skb->dev = dev->master;
-        }
+        return dev;
 }
 static void net_tx_action(struct softirq_action *h)
@@ -1483,10 +1486,11 @@ static void net_tx_action(struct softirq_action *h)
 }
 static __inline__ int deliver_skb(struct sk_buff *skb,
-                                  struct packet_type *pt_prev)
+                                  struct packet_type *pt_prev,
+                                  struct net_device *orig_dev)
 {
        atomic_inc(&skb->users);
-        return pt_prev->func(skb, skb->dev, pt_prev);
+        return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
@@ -1497,7 +1501,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
 static __inline__ int handle_bridge(struct sk_buff **pskb,
-                                    struct packet_type **pt_prev, int *ret)
+                                    struct packet_type **pt_prev, int *ret,
+                                    struct net_device *orig_dev)
 {
        struct net_bridge_port *port;
@@ -1506,14 +1511,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb,
                return 0;
        if (*pt_prev) {
-                *ret = deliver_skb(*pskb, *pt_prev);
+                *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
                *pt_prev = NULL;
        } 
        
        return br_handle_frame_hook(port, pskb);
 }
 #else
-#define handle_bridge(skb, pt_prev, ret)        (0)
+#define handle_bridge(skb, pt_prev, ret, orig_dev)      (0)
 #endif
 #ifdef CONFIG_NET_CLS_ACT
@@ -1535,17 +1540,14 @@ static int ing_filter(struct sk_buff *skb)
                __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
                if (MAX_RED_LOOP < ttl++) {
                        printk("Redir loop detected Dropping packet (%s->%s)\n",
-                                skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
+                                skb->input_dev->name, skb->dev->name);
                        return TC_ACT_SHOT;
                }
                skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
                skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
-                if (NULL == skb->input_dev) {
-                        skb->input_dev = skb->dev;
-                        printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
-                }
                spin_lock(&dev->ingress_lock);
                if ((q = dev->qdisc_ingress) != NULL)
                        result = q->enqueue(skb, q);
@@ -1560,6 +1562,7 @@ static int ing_filter(struct sk_buff *skb)
 int netif_receive_skb(struct sk_buff *skb)
 {
        struct packet_type *ptype, *pt_prev;
+        struct net_device *orig_dev;
        int ret = NET_RX_DROP;
        unsigned short type;
@@ -1567,10 +1570,13 @@ int netif_receive_skb(struct sk_buff *skb)
        if (skb->dev->poll && netpoll_rx(skb))
                return NET_RX_DROP;
-        if (!skb->stamp.tv_sec)
+        if (!skb->tstamp.off_sec)
-                net_timestamp(&skb->stamp);
+                net_timestamp(skb);
+        if (!skb->input_dev)
+                skb->input_dev = skb->dev;
-        skb_bond(skb);
+        orig_dev = skb_bond(skb);
        __get_cpu_var(netdev_rx_stat).total++;
@@ -1591,14 +1597,14 @@ int netif_receive_skb(struct sk_buff *skb)
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
                if (!ptype->dev || ptype->dev == skb->dev) {
                        if (pt_prev) 
-                                ret = deliver_skb(skb, pt_prev);
+                                ret = deliver_skb(skb, pt_prev, orig_dev);
                        pt_prev = ptype;
                }
        }
 #ifdef CONFIG_NET_CLS_ACT
        if (pt_prev) {
-                ret = deliver_skb(skb, pt_prev);
+                ret = deliver_skb(skb, pt_prev, orig_dev);
                pt_prev = NULL; /* noone else should process this after*/
        } else {
                skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -1617,7 +1623,7 @@ ncls:
        handle_diverter(skb);
-        if (handle_bridge(&skb, &pt_prev, &ret))
+        if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
                goto out;
        type = skb->protocol;
@@ -1625,13 +1631,13 @@ ncls:
                if (ptype->type == type &&
                    (!ptype->dev || ptype->dev == skb->dev)) {
                        if (pt_prev) 
-                                ret = deliver_skb(skb, pt_prev);
+                                ret = deliver_skb(skb, pt_prev, orig_dev);
                        pt_prev = ptype;
                }
        }
        if (pt_prev) {
-                ret = pt_prev->func(skb, skb->dev, pt_prev);
+                ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
        } else {
                kfree_skb(skb);
                /* Jamal, now you will not able to escape explaining
@@ -1697,7 +1703,8 @@ static void net_rx_action(struct softirq_action *h)
        struct softnet_data *queue = &__get_cpu_var(softnet_data);
        unsigned long start_time = jiffies;
        int budget = netdev_budget;
-        
+        void *have;
        local_irq_disable();
        while (!list_empty(&queue->poll_list)) {
@@ -1710,10 +1717,10 @@ static void net_rx_action(struct softirq_action *h)
                dev = list_entry(queue->poll_list.next,
                                 struct net_device, poll_list);
-                netpoll_poll_lock(dev);
+                have = netpoll_poll_lock(dev);
                if (dev->quota <= 0 || dev->poll(dev, &budget)) {
-                        netpoll_poll_unlock(dev);
+                        netpoll_poll_unlock(have);
                        local_irq_disable();
                        list_del(&dev->poll_list);
                        list_add_tail(&dev->poll_list, &queue->poll_list);
@@ -1722,7 +1729,7 @@ static void net_rx_action(struct softirq_action *h)
                        else
                                dev->quota = dev->weight;
                } else {
-                        netpoll_poll_unlock(dev);
+                        netpoll_poll_unlock(have);
                        dev_put(dev);
                        local_irq_disable();
                }
diff --git a/net/core/dst.c b/net/core/dst.c
index fc434ade5270..334790da9f16 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -45,6 +45,7 @@ static struct timer_list dst_gc_timer =
 static void dst_run_gc(unsigned long dummy)
 {
        int    delayed = 0;
+        int    work_performed;
        struct dst_entry * dst, **dstp;
        if (!spin_trylock(&dst_lock)) {
@@ -52,9 +53,9 @@ static void dst_run_gc(unsigned long dummy)
                return;
        }
        del_timer(&dst_gc_timer);
        dstp = &dst_garbage_list;
+        work_performed = 0;
        while ((dst = *dstp) != NULL) {
                if (atomic_read(&dst->__refcnt)) {
                        dstp = &dst->next;
@@ -62,6 +63,7 @@ static void dst_run_gc(unsigned long dummy)
                        continue;
                }
                *dstp = dst->next;
+                work_performed = 1;
                dst = dst_destroy(dst);
                if (dst) {
@@ -86,9 +88,14 @@ static void dst_run_gc(unsigned long dummy)
                dst_gc_timer_inc = DST_GC_MAX;
                goto out;
        }
-        if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
+        if (!work_performed) {
-                dst_gc_timer_expires = DST_GC_MAX;
+                if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
-        dst_gc_timer_inc += DST_GC_INC;
+                        dst_gc_timer_expires = DST_GC_MAX;
+                dst_gc_timer_inc += DST_GC_INC;
+        } else {
+                dst_gc_timer_inc = DST_GC_INC;
+                dst_gc_timer_expires = DST_GC_MIN;
+        }
        dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
 #if RT_CACHE_DEBUG >= 2
        printk("dst_total: %d/%d %ld\n",
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a3eeb88e1c81..289c1b5a8e4a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -81,6 +81,18 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
        return 0;
 }
+int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data)
+{
+        unsigned char len = dev->addr_len;
+        if ( addr->size < len )
+                return -ETOOSMALL;
+        
+        addr->size = len;
+        memcpy(data, dev->perm_addr, len);
+        return 0;
+}
+ 
 /* Handlers for each ethtool command */
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -683,6 +695,39 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
        return ret;
 }
+static int ethtool_get_perm_addr(struct net_device *dev, void *useraddr)
+{
+        struct ethtool_perm_addr epaddr;
+        u8 *data;
+        int ret;
+        if (!dev->ethtool_ops->get_perm_addr)
+                return -EOPNOTSUPP;
+        if (copy_from_user(&epaddr,useraddr,sizeof(epaddr)))
+                return -EFAULT;
+        data = kmalloc(epaddr.size, GFP_USER);
+        if (!data)
+                return -ENOMEM;
+        ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data);
+        if (ret)
+                return ret;
+        ret = -EFAULT;
+        if (copy_to_user(useraddr, &epaddr, sizeof(epaddr)))
+                goto out;
+        useraddr += sizeof(epaddr);
+        if (copy_to_user(useraddr, data, epaddr.size))
+                goto out;
+        ret = 0;
+ out:
+        kfree(data);
+        return ret;
+}
 /* The main entry point in this file.  Called from net/core/dev.c */
 int dev_ethtool(struct ifreq *ifr)
@@ -806,6 +851,9 @@ int dev_ethtool(struct ifreq *ifr)
        case ETHTOOL_GSTATS:
                rc = ethtool_get_stats(dev, useraddr);
                break;
+        case ETHTOOL_GPERMADDR:
+                rc = ethtool_get_perm_addr(dev, useraddr);
+                break;
        default:
                rc =  -EOPNOTSUPP;
        }
@@ -826,6 +874,7 @@ int dev_ethtool(struct ifreq *ifr)
 EXPORT_SYMBOL(dev_ethtool);
 EXPORT_SYMBOL(ethtool_op_get_link);
+EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr);
 EXPORT_SYMBOL(ethtool_op_get_sg);
 EXPORT_SYMBOL(ethtool_op_get_tso);
 EXPORT_SYMBOL(ethtool_op_get_tx_csum);
diff --git a/net/core/filter.c b/net/core/filter.c
index cd91a24f9720..079c2edff789 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -182,7 +182,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
                                A = ntohl(*(u32 *)ptr);
                                continue;
                        }
-                        return 0;
+                        break;
                case BPF_LD|BPF_H|BPF_ABS:
                        k = fentry->k;
 load_h:
@@ -191,7 +191,7 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
                                A = ntohs(*(u16 *)ptr);
                                continue;
                        }
-                        return 0;
+                        break;
                case BPF_LD|BPF_B|BPF_ABS:
                        k = fentry->k;
 load_b:
@@ -200,7 +200,7 @@ load_b:
                                A = *(u8 *)ptr;
                                continue;
                        }
-                        return 0;
+                        break;
                case BPF_LD|BPF_W|BPF_LEN:
                        A = skb->len;
                        continue;
diff --git a/net/core/flow.c b/net/core/flow.c
index f289570b15a3..7e95b39de9fd 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
 #define flow_table(cpu) (per_cpu(flow_tables, cpu))
-static kmem_cache_t *flow_cachep;
+static kmem_cache_t *flow_cachep __read_mostly;
 static int flow_lwm, flow_hwm;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1beb782ac41b..39fc55edf691 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1217,7 +1217,7 @@ static void neigh_proxy_process(unsigned long arg)
        while (skb != (struct sk_buff *)&tbl->proxy_queue) {
                struct sk_buff *back = skb;
-                long tdif = back->stamp.tv_usec - now;
+                long tdif = NEIGH_CB(back)->sched_next - now;
                skb = skb->next;
                if (tdif <= 0) {
@@ -1248,8 +1248,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
                kfree_skb(skb);
                return;
        }
-        skb->stamp.tv_sec  = LOCALLY_ENQUEUED;
-        skb->stamp.tv_usec = sched_next;
+        NEIGH_CB(skb)->sched_next = sched_next;
+        NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
        spin_lock(&tbl->proxy_queue.lock);
        if (del_timer(&tbl->proxy_timer)) {
@@ -2342,8 +2343,8 @@ void neigh_app_ns(struct neighbour *n)
        }
        nlh                        = (struct nlmsghdr *)skb->data;
        nlh->nlmsg_flags           = NLM_F_REQUEST;
-        NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+        NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-        netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+        netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
 }
 static void neigh_app_notify(struct neighbour *n)
@@ -2360,8 +2361,8 @@ static void neigh_app_notify(struct neighbour *n)
                return;
        }
        nlh                        = (struct nlmsghdr *)skb->data;
-        NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
+        NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-        netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+        netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
 }
 #endif /* CONFIG_ARPD */
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
deleted file mode 100644
index 076c156d5eda..000000000000
--- a/net/core/netfilter.c
+++ /dev/null
@@ -1,648 +0,0 @@
-/* netfilter.c: look after the filters for various protocols. 
- * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
- *
- * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
- * way.
- *
- * Rusty Russell (C)2000 -- This code is GPL.
- *
- * February 2000: Modified by James Morris to have 1 queue per protocol.
- * 15-Mar-2000:   Added NF_REPEAT --RR.
- * 08-May-2003:   Internal logging interface added by Jozsef Kadlecsik.
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/wait.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <linux/ip.h>
-/* In this code, we can be waiting indefinitely for userspace to
- * service a packet if a hook returns NF_QUEUE.  We could keep a count
- * of skbuffs queued for userspace, and not deregister a hook unless
- * this is zero, but that sucks.  Now, we simply check when the
- * packets come back: if the hook is gone, the packet is discarded. */
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...)  printk(format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-/* Sockopts only registered and called from user context, so
-   net locking would be overkill.  Also, [gs]etsockopt calls may
-   sleep. */
-static DECLARE_MUTEX(nf_sockopt_mutex);
-struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
-static LIST_HEAD(nf_sockopts);
-static DEFINE_SPINLOCK(nf_hook_lock);
-/* 
- * A queue handler may be registered for each protocol.  Each is protected by
- * long term mutex.  The handler must provide an an outfn() to accept packets
- * for queueing and must reinject all packets it receives, no matter what.
- */
-static struct nf_queue_handler_t {
-        nf_queue_outfn_t outfn;
-        void *data;
-} queue_handler[NPROTO];
-static DEFINE_RWLOCK(queue_handler_lock);
-int nf_register_hook(struct nf_hook_ops *reg)
-{
-        struct list_head *i;
-        spin_lock_bh(&nf_hook_lock);
-        list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
-                if (reg->priority < ((struct nf_hook_ops *)i)->priority)
-                        break;
-        }
-        list_add_rcu(&reg->list, i->prev);
-        spin_unlock_bh(&nf_hook_lock);
-        synchronize_net();
-        return 0;
-}
-void nf_unregister_hook(struct nf_hook_ops *reg)
-{
-        spin_lock_bh(&nf_hook_lock);
-        list_del_rcu(&reg->list);
-        spin_unlock_bh(&nf_hook_lock);
-        synchronize_net();
-}
-/* Do exclusive ranges overlap? */
-static inline int overlap(int min1, int max1, int min2, int max2)
-{
-        return max1 > min2 && min1 < max2;
-}
-/* Functions to register sockopt ranges (exclusive). */
-int nf_register_sockopt(struct nf_sockopt_ops *reg)
-{
-        struct list_head *i;
-        int ret = 0;
-        if (down_interruptible(&nf_sockopt_mutex) != 0)
-                return -EINTR;
-        list_for_each(i, &nf_sockopts) {
-                struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
-                if (ops->pf == reg->pf
-                    && (overlap(ops->set_optmin, ops->set_optmax, 
-                                reg->set_optmin, reg->set_optmax)
-                        || overlap(ops->get_optmin, ops->get_optmax, 
-                                   reg->get_optmin, reg->get_optmax))) {
-                        NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
-                                ops->set_optmin, ops->set_optmax, 
-                                ops->get_optmin, ops->get_optmax, 
-                                reg->set_optmin, reg->set_optmax,
-                                reg->get_optmin, reg->get_optmax);
-                        ret = -EBUSY;
-                        goto out;
-                }
-        }
-        list_add(&reg->list, &nf_sockopts);
-out:
-        up(&nf_sockopt_mutex);
-        return ret;
-}
-void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
-{
-        /* No point being interruptible: we're probably in cleanup_module() */
- restart:
-        down(&nf_sockopt_mutex);
-        if (reg->use != 0) {
-                /* To be woken by nf_sockopt call... */
-                /* FIXME: Stuart Young's name appears gratuitously. */
-                set_current_state(TASK_UNINTERRUPTIBLE);
-                reg->cleanup_task = current;
-                up(&nf_sockopt_mutex);
-                schedule();
-                goto restart;
-        }
-        list_del(&reg->list);
-        up(&nf_sockopt_mutex);
-}
-/* Call get/setsockopt() */
-static int nf_sockopt(struct sock *sk, int pf, int val, 
-                      char __user *opt, int *len, int get)
-{
-        struct list_head *i;
-        struct nf_sockopt_ops *ops;
-        int ret;
-        if (down_interruptible(&nf_sockopt_mutex) != 0)
-                return -EINTR;
-        list_for_each(i, &nf_sockopts) {
-                ops = (struct nf_sockopt_ops *)i;
-                if (ops->pf == pf) {
-                        if (get) {
-                                if (val >= ops->get_optmin
-                                    && val < ops->get_optmax) {
-                                        ops->use++;
-                                        up(&nf_sockopt_mutex);
-                                        ret = ops->get(sk, val, opt, len);
-                                        goto out;
-                                }
-                        } else {
-                                if (val >= ops->set_optmin
-                                    && val < ops->set_optmax) {
-                                        ops->use++;
-                                        up(&nf_sockopt_mutex);
-                                        ret = ops->set(sk, val, opt, *len);
-                                        goto out;
-                                }
-                        }
-                }
-        }
-        up(&nf_sockopt_mutex);
-        return -ENOPROTOOPT;
-        
- out:
-        down(&nf_sockopt_mutex);
-        ops->use--;
-        if (ops->cleanup_task)
-                wake_up_process(ops->cleanup_task);
-        up(&nf_sockopt_mutex);
-        return ret;
-}
-int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
-                  int len)
-{
-        return nf_sockopt(sk, pf, val, opt, &len, 0);
-}
-int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
-{
-        return nf_sockopt(sk, pf, val, opt, len, 1);
-}
-static unsigned int nf_iterate(struct list_head *head,
-                               struct sk_buff **skb,
-                               int hook,
-                               const struct net_device *indev,
-                               const struct net_device *outdev,
-                               struct list_head **i,
-                               int (*okfn)(struct sk_buff *),
-                               int hook_thresh)
-{
-        unsigned int verdict;
-        /*
-         * The caller must not block between calls to this
-         * function because of risk of continuing from deleted element.
-         */
-        list_for_each_continue_rcu(*i, head) {
-                struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
-                if (hook_thresh > elem->priority)
-                        continue;
-                /* Optimization: we don't need to hold module
-                   reference here, since function can't sleep. --RR */
-                verdict = elem->hook(hook, skb, indev, outdev, okfn);
-                if (verdict != NF_ACCEPT) {
-#ifdef CONFIG_NETFILTER_DEBUG
-                        if (unlikely(verdict > NF_MAX_VERDICT)) {
-                                NFDEBUG("Evil return from %p(%u).\n",
-                                        elem->hook, hook);
-                                continue;
-                        }
-#endif
-                        if (verdict != NF_REPEAT)
-                                return verdict;
-                        *i = (*i)->prev;
-                }
-        }
-        return NF_ACCEPT;
-}
-int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
-{      
-        int ret;
-        write_lock_bh(&queue_handler_lock);
-        if (queue_handler[pf].outfn)
-                ret = -EBUSY;
-        else {
-                queue_handler[pf].outfn = outfn;
-                queue_handler[pf].data = data;
-                ret = 0;
-        }
-        write_unlock_bh(&queue_handler_lock);
-        return ret;
-}
-/* The caller must flush their queue before this */
-int nf_unregister_queue_handler(int pf)
-{
-        write_lock_bh(&queue_handler_lock);
-        queue_handler[pf].outfn = NULL;
-        queue_handler[pf].data = NULL;
-        write_unlock_bh(&queue_handler_lock);
-        
-        return 0;
-}
-/* 
- * Any packet that leaves via this function must come back 
- * through nf_reinject().
- */
-static int nf_queue(struct sk_buff *skb, 
-                    struct list_head *elem, 
-                    int pf, unsigned int hook,
-                    struct net_device *indev,
-                    struct net_device *outdev,
-                    int (*okfn)(struct sk_buff *))
-{
-        int status;
-        struct nf_info *info;
-#ifdef CONFIG_BRIDGE_NETFILTER
-        struct net_device *physindev = NULL;
-        struct net_device *physoutdev = NULL;
-#endif
-        /* QUEUE == DROP if noone is waiting, to be safe. */
-        read_lock(&queue_handler_lock);
-        if (!queue_handler[pf].outfn) {
-                read_unlock(&queue_handler_lock);
-                kfree_skb(skb);
-                return 1;
-        }
-        info = kmalloc(sizeof(*info), GFP_ATOMIC);
-        if (!info) {
-                if (net_ratelimit())
-                        printk(KERN_ERR "OOM queueing packet %p\n",
-                               skb);
-                read_unlock(&queue_handler_lock);
-                kfree_skb(skb);
-                return 1;
-        }
-        *info = (struct nf_info) { 
-                (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
-        /* If it's going away, ignore hook. */
-        if (!try_module_get(info->elem->owner)) {
-                read_unlock(&queue_handler_lock);
-                kfree(info);
-                return 0;
-        }
-        /* Bump dev refs so they don't vanish while packet is out */
-        if (indev) dev_hold(indev);
-        if (outdev) dev_hold(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-        if (skb->nf_bridge) {
-                physindev = skb->nf_bridge->physindev;
-                if (physindev) dev_hold(physindev);
-                physoutdev = skb->nf_bridge->physoutdev;
-                if (physoutdev) dev_hold(physoutdev);
-        }
-#endif
-        status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
-        read_unlock(&queue_handler_lock);
-        if (status < 0) {
-                /* James M doesn't say fuck enough. */
-                if (indev) dev_put(indev);
-                if (outdev) dev_put(outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-                if (physindev) dev_put(physindev);
-                if (physoutdev) dev_put(physoutdev);
-#endif
-                module_put(info->elem->owner);
-                kfree(info);
-                kfree_skb(skb);
-                return 1;
-        }
-        return 1;
-}
-/* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
-                 struct net_device *indev,
-                 struct net_device *outdev,
-                 int (*okfn)(struct sk_buff *),
-                 int hook_thresh)
-{
-        struct list_head *elem;
-        unsigned int verdict;
-        int ret = 0;
-        /* We may already have this, but read-locks nest anyway */
-        rcu_read_lock();
-        elem = &nf_hooks[pf][hook];
-next_hook:
-        verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
-                             outdev, &elem, okfn, hook_thresh);
-        if (verdict == NF_ACCEPT || verdict == NF_STOP) {
-                ret = 1;
-                goto unlock;
-        } else if (verdict == NF_DROP) {
-                kfree_skb(*pskb);
-                ret = -EPERM;
-        } else if (verdict == NF_QUEUE) {
-                NFDEBUG("nf_hook: Verdict = QUEUE.\n");
-                if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn))
-                        goto next_hook;
-        }
-unlock:
-        rcu_read_unlock();
-        return ret;
-}
-void nf_reinject(struct sk_buff *skb, struct nf_info *info,
-                 unsigned int verdict)
-{
-        struct list_head *elem = &info->elem->list;
-        struct list_head *i;
-        rcu_read_lock();
-        /* Release those devices we held, or Alexey will kill me. */
-        if (info->indev) dev_put(info->indev);
-        if (info->outdev) dev_put(info->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
-        if (skb->nf_bridge) {
-                if (skb->nf_bridge->physindev)
-                        dev_put(skb->nf_bridge->physindev);
-                if (skb->nf_bridge->physoutdev)
-                        dev_put(skb->nf_bridge->physoutdev);
-        }
-#endif
-        /* Drop reference to owner of hook which queued us. */
-        module_put(info->elem->owner);
-        list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
-                if (i == elem) 
-                        break;
-        }
-  
-        if (elem == &nf_hooks[info->pf][info->hook]) {
-                /* The module which sent it to userspace is gone. */
-                NFDEBUG("%s: module disappeared, dropping packet.\n",
-                        __FUNCTION__);
-                verdict = NF_DROP;
-        }
-        /* Continue traversal iff userspace said ok... */
-        if (verdict == NF_REPEAT) {
-                elem = elem->prev;
-                verdict = NF_ACCEPT;
-        }
-        if (verdict == NF_ACCEPT) {
-        next_hook:
-                verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
-                                     &skb, info->hook, 
-                                     info->indev, info->outdev, &elem,
-                                     info->okfn, INT_MIN);
-        }
-        switch (verdict) {
-        case NF_ACCEPT:
-                info->okfn(skb);
-                break;
-        case NF_QUEUE:
-                if (!nf_queue(skb, elem, info->pf, info->hook, 
-                              info->indev, info->outdev, info->okfn))
-                        goto next_hook;
-                break;
-        }
-        rcu_read_unlock();
-        if (verdict == NF_DROP)
-                kfree_skb(skb);
-        kfree(info);
-        return;
-}
-#ifdef CONFIG_INET
-/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb)
-{
-        struct iphdr *iph = (*pskb)->nh.iph;
-        struct rtable *rt;
-        struct flowi fl = {};
-        struct dst_entry *odst;
-        unsigned int hh_len;
-        /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
-         * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
-         */
-        if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
-                fl.nl_u.ip4_u.daddr = iph->daddr;
-                fl.nl_u.ip4_u.saddr = iph->saddr;
-                fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-                fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
-#ifdef CONFIG_IP_ROUTE_FWMARK
-                fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
-#endif
-                fl.proto = iph->protocol;
-                if (ip_route_output_key(&rt, &fl) != 0)
-                        return -1;
-                /* Drop old route. */
-                dst_release((*pskb)->dst);
-                (*pskb)->dst = &rt->u.dst;
-        } else {
-                /* non-local src, find valid iif to satisfy
-                 * rp-filter when calling ip_route_input. */
-                fl.nl_u.ip4_u.daddr = iph->saddr;
-                if (ip_route_output_key(&rt, &fl) != 0)
-                        return -1;
-                odst = (*pskb)->dst;
-                if (ip_route_input(*pskb, iph->daddr, iph->saddr,
-                                   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
-                        dst_release(&rt->u.dst);
-                        return -1;
-                }
-                dst_release(&rt->u.dst);
-                dst_release(odst);
-        }
-        
-        if ((*pskb)->dst->error)
-                return -1;
-        /* Change in oif may mean change in hh_len. */
-        hh_len = (*pskb)->dst->dev->hard_header_len;
-        if (skb_headroom(*pskb) < hh_len) {
-                struct sk_buff *nskb;
-                nskb = skb_realloc_headroom(*pskb, hh_len);
-                if (!nskb) 
-                        return -1;
-                if ((*pskb)->sk)
-                        skb_set_owner_w(nskb, (*pskb)->sk);
-                kfree_skb(*pskb);
-                *pskb = nskb;
-        }
-        return 0;
-}
-EXPORT_SYMBOL(ip_route_me_harder);
-int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
-{
-        struct sk_buff *nskb;
-        if (writable_len > (*pskb)->len)
-                return 0;
-        /* Not exclusive use of packet?  Must copy. */
-        if (skb_shared(*pskb) || skb_cloned(*pskb))
-                goto copy_skb;
-        return pskb_may_pull(*pskb, writable_len);
-copy_skb:
-        nskb = skb_copy(*pskb, GFP_ATOMIC);
-        if (!nskb)
-                return 0;
-        BUG_ON(skb_is_nonlinear(nskb));
-        /* Rest of kernel will get very unhappy if we pass it a
-           suddenly-orphaned skbuff */
-        if ((*pskb)->sk)
-                skb_set_owner_w(nskb, (*pskb)->sk);
-        kfree_skb(*pskb);
-        *pskb = nskb;
-        return 1;
-}
-EXPORT_SYMBOL(skb_ip_make_writable);
-#endif /*CONFIG_INET*/
-/* Internal logging interface, which relies on the real 
-   LOG target modules */
-#define NF_LOG_PREFIXLEN                128
-static nf_logfn *nf_logging[NPROTO]; /* = NULL */
-static int reported = 0;
-static DEFINE_SPINLOCK(nf_log_lock);
-int nf_log_register(int pf, nf_logfn *logfn)
-{
-        int ret = -EBUSY;
-        /* Any setup of logging members must be done before
-         * substituting pointer. */
-        spin_lock(&nf_log_lock);
-        if (!nf_logging[pf]) {
-                rcu_assign_pointer(nf_logging[pf], logfn);
-                ret = 0;
-        }
-        spin_unlock(&nf_log_lock);
-        return ret;
-}               
-void nf_log_unregister(int pf, nf_logfn *logfn)
-{
-        spin_lock(&nf_log_lock);
-        if (nf_logging[pf] == logfn)
-                nf_logging[pf] = NULL;
-        spin_unlock(&nf_log_lock);
-        /* Give time to concurrent readers. */
-        synchronize_net();
-}               
-void nf_log_packet(int pf,
-                   unsigned int hooknum,
-                   const struct sk_buff *skb,
-                   const struct net_device *in,
-                   const struct net_device *out,
-                   const char *fmt, ...)
-{
-        va_list args;
-        char prefix[NF_LOG_PREFIXLEN];
-        nf_logfn *logfn;
-        
-        rcu_read_lock();
-        logfn = rcu_dereference(nf_logging[pf]);
-        if (logfn) {
-                va_start(args, fmt);
-                vsnprintf(prefix, sizeof(prefix), fmt, args);
-                va_end(args);
-                /* We must read logging before nf_logfn[pf] */
-                logfn(hooknum, skb, in, out, prefix);
-        } else if (!reported) {
-                printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
-                       "no backend logging module loaded in!\n");
-                reported++;
-        }
-        rcu_read_unlock();
-}
-EXPORT_SYMBOL(nf_log_register);
-EXPORT_SYMBOL(nf_log_unregister);
-EXPORT_SYMBOL(nf_log_packet);
-/* This does not belong here, but locally generated errors need it if connection
-   tracking in use: without this, connection may not be in hash table, and hence
-   manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
-void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
-{
-        void (*attach)(struct sk_buff *, struct sk_buff *);
-        if (skb->nfct && (attach = ip_ct_attach) != NULL) {
-                mb(); /* Just to be sure: must be read before executing this */
-                attach(new, skb);
-        }
-}
-void __init netfilter_init(void)
-{
-        int i, h;
-        for (i = 0; i < NPROTO; i++) {
-                for (h = 0; h < NF_MAX_HOOKS; h++)
-                        INIT_LIST_HEAD(&nf_hooks[i][h]);
-        }
-}
-EXPORT_SYMBOL(ip_ct_attach);
-EXPORT_SYMBOL(nf_ct_attach);
-EXPORT_SYMBOL(nf_getsockopt);
-EXPORT_SYMBOL(nf_hook_slow);
-EXPORT_SYMBOL(nf_hooks);
-EXPORT_SYMBOL(nf_register_hook);
-EXPORT_SYMBOL(nf_register_queue_handler);
-EXPORT_SYMBOL(nf_register_sockopt);
-EXPORT_SYMBOL(nf_reinject);
-EXPORT_SYMBOL(nf_setsockopt);
-EXPORT_SYMBOL(nf_unregister_hook);
-EXPORT_SYMBOL(nf_unregister_queue_handler);
-EXPORT_SYMBOL(nf_unregister_sockopt);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c327c9edadc5..a1a9a7abff50 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -33,6 +33,7 @@
 #define MAX_UDP_CHUNK 1460
 #define MAX_SKBS 32
 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
+#define MAX_RETRIES 20000
 static DEFINE_SPINLOCK(skb_list_lock);
 static int nr_skbs;
@@ -248,14 +249,14 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
        int status;
        struct netpoll_info *npinfo;
-repeat:
+        if (!np || !np->dev || !netif_running(np->dev)) {
-        if(!np || !np->dev || !netif_running(np->dev)) {
                __kfree_skb(skb);
                return;
        }
-        /* avoid recursion */
        npinfo = np->dev->npinfo;
+        /* avoid recursion */
        if (npinfo->poll_owner == smp_processor_id() ||
            np->dev->xmit_lock_owner == smp_processor_id()) {
                if (np->drop)
@@ -265,30 +266,37 @@ repeat:
                return;
        }
-        spin_lock(&np->dev->xmit_lock);
+        do {
-        np->dev->xmit_lock_owner = smp_processor_id();
+                npinfo->tries--;
+                spin_lock(&np->dev->xmit_lock);
+                np->dev->xmit_lock_owner = smp_processor_id();
-        /*
+                /*
-         * network drivers do not expect to be called if the queue is
+                 * network drivers do not expect to be called if the queue is
-         * stopped.
+                 * stopped.
-         */
+                 */
-        if (netif_queue_stopped(np->dev)) {
+                if (netif_queue_stopped(np->dev)) {
+                        np->dev->xmit_lock_owner = -1;
+                        spin_unlock(&np->dev->xmit_lock);
+                        netpoll_poll(np);
+                        udelay(50);
+                        continue;
+                }
+                status = np->dev->hard_start_xmit(skb, np->dev);
                np->dev->xmit_lock_owner = -1;
                spin_unlock(&np->dev->xmit_lock);
-                netpoll_poll(np);
+                /* success */
-                goto repeat;
+                if(!status) {
-        }
+                        npinfo->tries = MAX_RETRIES; /* reset */
+                        return;
-        status = np->dev->hard_start_xmit(skb, np->dev);
+                }
-        np->dev->xmit_lock_owner = -1;
-        spin_unlock(&np->dev->xmit_lock);
-        /* transmit busy */
+                /* transmit busy */
-        if(status) {
                netpoll_poll(np);
-                goto repeat;
+                udelay(50);
-        }
+        } while (npinfo->tries > 0);
 }
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
@@ -349,15 +357,11 @@ static void arp_reply(struct sk_buff *skb)
        unsigned char *arp_ptr;
        int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
        u32 sip, tip;
-        unsigned long flags;
        struct sk_buff *send_skb;
        struct netpoll *np = NULL;
-        spin_lock_irqsave(&npinfo->rx_lock, flags);
        if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
                np = npinfo->rx_np;
-        spin_unlock_irqrestore(&npinfo->rx_lock, flags);
        if (!np)
                return;
@@ -639,9 +643,11 @@ int netpoll_setup(struct netpoll *np)
                if (!npinfo)
                        goto release;
+                npinfo->rx_flags = 0;
                npinfo->rx_np = NULL;
                npinfo->poll_lock = SPIN_LOCK_UNLOCKED;
                npinfo->poll_owner = -1;
+                npinfo->tries = MAX_RETRIES;
                npinfo->rx_lock = SPIN_LOCK_UNLOCKED;
        } else
                npinfo = ndev->npinfo;
@@ -718,9 +724,16 @@ int netpoll_setup(struct netpoll *np)
                npinfo->rx_np = np;
                spin_unlock_irqrestore(&npinfo->rx_lock, flags);
        }
+        /* fill up the skb queue */
+        refill_skbs();
        /* last thing to do is link it to the net device structure */
        ndev->npinfo = npinfo;
+        /* avoid racing with NAPI reading npinfo */
+        synchronize_rcu();
        return 0;
 release:
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 975d651312dc..8eb083b6041a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -363,7 +363,7 @@ struct pktgen_thread {
 * All Rights Reserved.
 *
 */
-inline static s64 divremdi3(s64 x, s64 y, int type) 
+static inline s64 divremdi3(s64 x, s64 y, int type)
 {
        u64 a = (x < 0) ? -x : x;
        u64 b = (y < 0) ? -y : y;
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index bb55675f0685..b8203de5ff07 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -32,7 +32,6 @@
 * Further increasing requires to change hash table size.
 */
 int sysctl_max_syn_backlog = 256;
-EXPORT_SYMBOL(sysctl_max_syn_backlog);
 int reqsk_queue_alloc(struct request_sock_queue *queue,
                      const int nr_table_entries)
@@ -53,6 +52,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
        get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
        rwlock_init(&queue->syn_wait_lock);
        queue->rskq_accept_head = queue->rskq_accept_head = NULL;
+        queue->rskq_defer_accept = 0;
+        lopt->nr_table_entries = nr_table_entries;
        write_lock_bh(&queue->syn_wait_lock);
        queue->listen_opt = lopt;
@@ -62,3 +63,28 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 }
 EXPORT_SYMBOL(reqsk_queue_alloc);
+void reqsk_queue_destroy(struct request_sock_queue *queue)
+{
+        /* make all the listen_opt local to us */
+        struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
+        if (lopt->qlen != 0) {
+                int i;
+                for (i = 0; i < lopt->nr_table_entries; i++) {
+                        struct request_sock *req;
+                        while ((req = lopt->syn_table[i]) != NULL) {
+                                lopt->syn_table[i] = req->dl_next;
+                                lopt->qlen--;
+                                reqsk_free(req);
+                        }
+                }
+        }
+        BUG_TRAP(lopt->qlen == 0);
+        kfree(lopt);
+}
+EXPORT_SYMBOL(reqsk_queue_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4b1bb30e6381..9bed7569ce3f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
 {
        int err = 0;
-        NETLINK_CB(skb).dst_groups = group;
+        NETLINK_CB(skb).dst_group = group;
        if (echo)
                atomic_inc(&skb->users);
        netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
@@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
                kfree_skb(skb);
                return;
        }
-        NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
+        NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-        netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL);
+        netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
 }
 static int rtnetlink_done(struct netlink_callback *cb)
@@ -708,7 +708,8 @@ void __init rtnetlink_init(void)
        if (!rta_buf)
                panic("rtnetlink_init: cannot allocate rta_buf\n");
-        rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv);
+        rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
+                                     THIS_MODULE);
        if (rtnl == NULL)
                panic("rtnetlink_init: cannot initialize rtnetlink\n");
        netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d9f7b06fe886..f80a28785610 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -68,7 +68,10 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
-static kmem_cache_t *skbuff_head_cache;
+static kmem_cache_t *skbuff_head_cache __read_mostly;
+static kmem_cache_t *skbuff_fclone_cache __read_mostly;
+struct timeval __read_mostly skb_tv_base;
 /*
 *      Keep out-of-line to prevent kernel bloat.
@@ -118,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 */
 /**
- *      alloc_skb       -       allocate a network buffer
+ *      __alloc_skb     -       allocate a network buffer
 *      @size: size to allocate
 *      @gfp_mask: allocation mask
 *
@@ -129,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 *      Buffers may only be allocated from interrupts using a @gfp_mask of
 *      %GFP_ATOMIC.
 */
-struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
+struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
+                            int fclone)
 {
        struct sk_buff *skb;
        u8 *data;
        /* Get the HEAD */
-        skb = kmem_cache_alloc(skbuff_head_cache,
+        if (fclone)
-                               gfp_mask & ~__GFP_DMA);
+                skb = kmem_cache_alloc(skbuff_fclone_cache,
+                                       gfp_mask & ~__GFP_DMA);
+        else
+                skb = kmem_cache_alloc(skbuff_head_cache,
+                                       gfp_mask & ~__GFP_DMA);
        if (!skb)
                goto out;
@@ -153,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
        skb->data = data;
        skb->tail = data;
        skb->end  = data + size;
+        if (fclone) {
+                struct sk_buff *child = skb + 1;
+                atomic_t *fclone_ref = (atomic_t *) (child + 1);
+                skb->fclone = SKB_FCLONE_ORIG;
+                atomic_set(fclone_ref, 1);
+                child->fclone = SKB_FCLONE_UNAVAILABLE;
+        }
        atomic_set(&(skb_shinfo(skb)->dataref), 1);
        skb_shinfo(skb)->nr_frags  = 0;
        skb_shinfo(skb)->tso_size = 0;
@@ -266,8 +283,34 @@ void skb_release_data(struct sk_buff *skb)
 */
 void kfree_skbmem(struct sk_buff *skb)
 {
+        struct sk_buff *other;
+        atomic_t *fclone_ref;
        skb_release_data(skb);
-        kmem_cache_free(skbuff_head_cache, skb);
+        switch (skb->fclone) {
+        case SKB_FCLONE_UNAVAILABLE:
+                kmem_cache_free(skbuff_head_cache, skb);
+                break;
+        case SKB_FCLONE_ORIG:
+                fclone_ref = (atomic_t *) (skb + 2);
+                if (atomic_dec_and_test(fclone_ref))
+                        kmem_cache_free(skbuff_fclone_cache, skb);
+                break;
+        case SKB_FCLONE_CLONE:
+                fclone_ref = (atomic_t *) (skb + 1);
+                other = skb - 1;
+                /* The clone portion is available for
+                 * fast-cloning again.
+                 */
+                skb->fclone = SKB_FCLONE_UNAVAILABLE;
+                if (atomic_dec_and_test(fclone_ref))
+                        kmem_cache_free(skbuff_fclone_cache, other);
+                break;
+        };
 }
 /**
@@ -281,8 +324,6 @@ void kfree_skbmem(struct sk_buff *skb)
 void __kfree_skb(struct sk_buff *skb)
 {
-        BUG_ON(skb->list != NULL);
        dst_release(skb->dst);
 #ifdef CONFIG_XFRM
        secpath_put(skb->sp);
@@ -302,7 +343,6 @@ void __kfree_skb(struct sk_buff *skb)
        skb->tc_index = 0;
 #ifdef CONFIG_NET_CLS_ACT
        skb->tc_verd = 0;
-        skb->tc_classid = 0;
 #endif
 #endif
@@ -325,19 +365,27 @@ void __kfree_skb(struct sk_buff *skb)
 struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 {
-        struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+        struct sk_buff *n;
-        if (!n) 
+        n = skb + 1;
-                return NULL;
+        if (skb->fclone == SKB_FCLONE_ORIG &&
+            n->fclone == SKB_FCLONE_UNAVAILABLE) {
+                atomic_t *fclone_ref = (atomic_t *) (n + 1);
+                n->fclone = SKB_FCLONE_CLONE;
+                atomic_inc(fclone_ref);
+        } else {
+                n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+                if (!n)
+                        return NULL;
+                n->fclone = SKB_FCLONE_UNAVAILABLE;
+        }
 #define C(x) n->x = skb->x
        n->next = n->prev = NULL;
-        n->list = NULL;
        n->sk = NULL;
-        C(stamp);
+        C(tstamp);
        C(dev);
-        C(real_dev);
        C(h);
        C(nh);
        C(mac);
@@ -361,7 +409,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
        n->destructor = NULL;
 #ifdef CONFIG_NETFILTER
        C(nfmark);
-        C(nfcache);
        C(nfct);
        nf_conntrack_get(skb->nfct);
        C(nfctinfo);
@@ -370,17 +417,13 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
        nf_bridge_get(skb->nf_bridge);
 #endif
 #endif /*CONFIG_NETFILTER*/
-#if defined(CONFIG_HIPPI)
-        C(private);
-#endif
 #ifdef CONFIG_NET_SCHED
        C(tc_index);
 #ifdef CONFIG_NET_CLS_ACT
        n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
-        n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
+        n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
-        n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
+        n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
        C(input_dev);
-        C(tc_classid);
 #endif
 #endif
@@ -404,10 +447,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
         */
        unsigned long offset = new->data - old->data;
-        new->list       = NULL;
        new->sk         = NULL;
        new->dev        = old->dev;
-        new->real_dev   = old->real_dev;
        new->priority   = old->priority;
        new->protocol   = old->protocol;
        new->dst        = dst_clone(old->dst);
@@ -419,12 +460,12 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
        new->mac.raw    = old->mac.raw + offset;
        memcpy(new->cb, old->cb, sizeof(old->cb));
        new->local_df   = old->local_df;
+        new->fclone     = SKB_FCLONE_UNAVAILABLE;
        new->pkt_type   = old->pkt_type;
-        new->stamp      = old->stamp;
+        new->tstamp     = old->tstamp;
        new->destructor = NULL;
 #ifdef CONFIG_NETFILTER
        new->nfmark     = old->nfmark;
-        new->nfcache    = old->nfcache;
        new->nfct       = old->nfct;
        nf_conntrack_get(old->nfct);
        new->nfctinfo   = old->nfctinfo;
@@ -1344,50 +1385,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
        __skb_queue_tail(list, newsk);
        spin_unlock_irqrestore(&list->lock, flags);
 }
 /**
 *      skb_unlink      -       remove a buffer from a list
 *      @skb: buffer to remove
+ *      @list: list to use
 *
- *      Place a packet after a given packet in a list. The list locks are taken
+ *      Remove a packet from a list. The list locks are taken and this
- *      and this function is atomic with respect to other list locked calls
+ *      function is atomic with respect to other list locked calls
 *
- *      Works even without knowing the list it is sitting on, which can be
+ *      You must know what list the SKB is on.
- *      handy at times. It also means that THE LIST MUST EXIST when you
- *      unlink. Thus a list must have its contents unlinked before it is
- *      destroyed.
 */
-void skb_unlink(struct sk_buff *skb)
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
 {
-        struct sk_buff_head *list = skb->list;
+        unsigned long flags;
-        if (list) {
-                unsigned long flags;
-                spin_lock_irqsave(&list->lock, flags);
+        spin_lock_irqsave(&list->lock, flags);
-                if (skb->list == list)
+        __skb_unlink(skb, list);
-                        __skb_unlink(skb, skb->list);
+        spin_unlock_irqrestore(&list->lock, flags);
-                spin_unlock_irqrestore(&list->lock, flags);
-        }
 }
 /**
 *      skb_append      -       append a buffer
 *      @old: buffer to insert after
 *      @newsk: buffer to insert
+ *      @list: list to use
 *
 *      Place a packet after a given packet in a list. The list locks are taken
 *      and this function is atomic with respect to other list locked calls.
 *      A buffer cannot be placed on two lists at the same time.
 */
+void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
-void skb_append(struct sk_buff *old, struct sk_buff *newsk)
 {
        unsigned long flags;
-        spin_lock_irqsave(&old->list->lock, flags);
+        spin_lock_irqsave(&list->lock, flags);
-        __skb_append(old, newsk);
+        __skb_append(old, newsk, list);
-        spin_unlock_irqrestore(&old->list->lock, flags);
+        spin_unlock_irqrestore(&list->lock, flags);
 }
@@ -1395,19 +1429,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk)
 *      skb_insert      -       insert a buffer
 *      @old: buffer to insert before
 *      @newsk: buffer to insert
+ *      @list: list to use
+ *
+ *      Place a packet before a given packet in a list. The list locks are
+ *      taken and this function is atomic with respect to other list locked
+ *      calls.
 *
- *      Place a packet before a given packet in a list. The list locks are taken
- *      and this function is atomic with respect to other list locked calls
 *      A buffer cannot be placed on two lists at the same time.
 */
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
 {
        unsigned long flags;
-        spin_lock_irqsave(&old->list->lock, flags);
+        spin_lock_irqsave(&list->lock, flags);
-        __skb_insert(newsk, old->prev, old, old->list);
+        __skb_insert(newsk, old->prev, old, list);
-        spin_unlock_irqrestore(&old->list->lock, flags);
+        spin_unlock_irqrestore(&list->lock, flags);
 }
 #if 0
@@ -1663,12 +1699,23 @@ void __init skb_init(void)
                                              NULL, NULL);
        if (!skbuff_head_cache)
                panic("cannot create skbuff cache");
+        skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+                                                (2*sizeof(struct sk_buff)) +
+                                                sizeof(atomic_t),
+                                                0,
+                                                SLAB_HWCACHE_ALIGN,
+                                                NULL, NULL);
+        if (!skbuff_fclone_cache)
+                panic("cannot create skbuff cache");
+        do_gettimeofday(&skb_tv_base);
 }
 EXPORT_SYMBOL(___pskb_trim);
 EXPORT_SYMBOL(__kfree_skb);
 EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(__alloc_skb);
 EXPORT_SYMBOL(pskb_copy);
 EXPORT_SYMBOL(pskb_expand_head);
 EXPORT_SYMBOL(skb_checksum);
@@ -1696,3 +1743,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
 EXPORT_SYMBOL(skb_seq_read);
 EXPORT_SYMBOL(skb_abort_seq_read);
 EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_tv_base);
diff --git a/net/core/sock.c b/net/core/sock.c
index 8b35ccdc2b3b..c13594579bfb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -206,13 +206,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
         */
 #ifdef SO_DONTLINGER            /* Compatibility item... */
-        switch (optname) {
+        if (optname == SO_DONTLINGER) {
-                case SO_DONTLINGER:
+                lock_sock(sk);
-                        sock_reset_flag(sk, SOCK_LINGER);
+                sock_reset_flag(sk, SOCK_LINGER);
-                        return 0;
+                release_sock(sk);
+                return 0;
        }
-#endif  
+#endif
-                
+        
        if(optlen<sizeof(int))
                return(-EINVAL);
        
@@ -259,7 +260,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                           
                        if (val > sysctl_wmem_max)
                                val = sysctl_wmem_max;
+set_sndbuf:
                        sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
                        if ((val * 2) < SOCK_MIN_SNDBUF)
                                sk->sk_sndbuf = SOCK_MIN_SNDBUF;
@@ -273,6 +274,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                        sk->sk_write_space(sk);
                        break;
+                case SO_SNDBUFFORCE:
+                        if (!capable(CAP_NET_ADMIN)) {
+                                ret = -EPERM;
+                                break;
+                        }
+                        goto set_sndbuf;
                case SO_RCVBUF:
                        /* Don't error on this BSD doesn't and if you think
                           about it this is right. Otherwise apps have to
@@ -281,7 +289,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                          
                        if (val > sysctl_rmem_max)
                                val = sysctl_rmem_max;
+set_rcvbuf:
                        sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
                        /* FIXME: is this lower bound the right one? */
                        if ((val * 2) < SOCK_MIN_RCVBUF)
@@ -290,6 +298,13 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                                sk->sk_rcvbuf = val * 2;
                        break;
+                case SO_RCVBUFFORCE:
+                        if (!capable(CAP_NET_ADMIN)) {
+                                ret = -EPERM;
+                                break;
+                        }
+                        goto set_rcvbuf;
                case SO_KEEPALIVE:
 #ifdef CONFIG_INET
                        if (sk->sk_protocol == IPPROTO_TCP)
@@ -685,6 +700,80 @@ void sk_free(struct sock *sk)
        module_put(owner);
 }
+struct sock *sk_clone(const struct sock *sk, const unsigned int __nocast priority)
+{
+        struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
+        if (newsk != NULL) {
+                struct sk_filter *filter;
+                memcpy(newsk, sk, sk->sk_prot->obj_size);
+                /* SANITY */
+                sk_node_init(&newsk->sk_node);
+                sock_lock_init(newsk);
+                bh_lock_sock(newsk);
+                atomic_set(&newsk->sk_rmem_alloc, 0);
+                atomic_set(&newsk->sk_wmem_alloc, 0);
+                atomic_set(&newsk->sk_omem_alloc, 0);
+                skb_queue_head_init(&newsk->sk_receive_queue);
+                skb_queue_head_init(&newsk->sk_write_queue);
+                rwlock_init(&newsk->sk_dst_lock);
+                rwlock_init(&newsk->sk_callback_lock);
+                newsk->sk_dst_cache     = NULL;
+                newsk->sk_wmem_queued   = 0;
+                newsk->sk_forward_alloc = 0;
+                newsk->sk_send_head     = NULL;
+                newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
+                newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+                sock_reset_flag(newsk, SOCK_DONE);
+                skb_queue_head_init(&newsk->sk_error_queue);
+                filter = newsk->sk_filter;
+                if (filter != NULL)
+                        sk_filter_charge(newsk, filter);
+                if (unlikely(xfrm_sk_clone_policy(newsk))) {
+                        /* It is still raw copy of parent, so invalidate
+                         * destructor and make plain sk_free() */
+                        newsk->sk_destruct = NULL;
+                        sk_free(newsk);
+                        newsk = NULL;
+                        goto out;
+                }
+                newsk->sk_err      = 0;
+                newsk->sk_priority = 0;
+                atomic_set(&newsk->sk_refcnt, 2);
+                /*
+                 * Increment the counter in the same struct proto as the master
+                 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+                 * is the same as sk->sk_prot->socks, as this field was copied
+                 * with memcpy).
+                 *
+                 * This _changes_ the previous behaviour, where
+                 * tcp_create_openreq_child always was incrementing the
+                 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
+                 * to be taken into account in all callers. -acme
+                 */
+                sk_refcnt_debug_inc(newsk);
+                newsk->sk_socket = NULL;
+                newsk->sk_sleep  = NULL;
+                if (newsk->sk_prot->sockets_allocated)
+                        atomic_inc(newsk->sk_prot->sockets_allocated);
+        }
+out:
+        return newsk;
+}
+EXPORT_SYMBOL_GPL(sk_clone);
 void __init sk_init(void)
 {
        if (num_physpages <= 4096) {
@@ -1352,11 +1441,7 @@ void sk_common_release(struct sock *sk)
        xfrm_sk_free_policy(sk);
-#ifdef INET_REFCNT_DEBUG
+        sk_refcnt_debug_release(sk);
-        if (atomic_read(&sk->sk_refcnt) != 1)
-                printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
-                       sk, atomic_read(&sk->sk_refcnt));
-#endif
        sock_put(sk);
 }
@@ -1367,7 +1452,8 @@ static LIST_HEAD(proto_list);
 int proto_register(struct proto *prot, int alloc_slab)
 {
-        char *request_sock_slab_name;
+        char *request_sock_slab_name = NULL;
+        char *timewait_sock_slab_name;
        int rc = -ENOBUFS;
        if (alloc_slab) {
@@ -1398,6 +1484,23 @@ int proto_register(struct proto *prot, int alloc_slab)
                                goto out_free_request_sock_slab_name;
                        }
                }
+                if (prot->twsk_obj_size) {
+                        static const char mask[] = "tw_sock_%s";
+                        timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+                        if (timewait_sock_slab_name == NULL)
+                                goto out_free_request_sock_slab;
+                        sprintf(timewait_sock_slab_name, mask, prot->name);
+                        prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
+                                                            prot->twsk_obj_size,
+                                                            0, SLAB_HWCACHE_ALIGN,
+                                                            NULL, NULL);
+                        if (prot->twsk_slab == NULL)
+                                goto out_free_timewait_sock_slab_name;
+                }
        }
        write_lock(&proto_list_lock);
@@ -1406,6 +1509,13 @@ int proto_register(struct proto *prot, int alloc_slab)
        rc = 0;
 out:
        return rc;
+out_free_timewait_sock_slab_name:
+        kfree(timewait_sock_slab_name);
+out_free_request_sock_slab:
+        if (prot->rsk_prot && prot->rsk_prot->slab) {
+                kmem_cache_destroy(prot->rsk_prot->slab);
+                prot->rsk_prot->slab = NULL;
+        }
 out_free_request_sock_slab_name:
        kfree(request_sock_slab_name);
 out_free_sock_slab:
@@ -1433,6 +1543,14 @@ void proto_unregister(struct proto *prot)
                prot->rsk_prot->slab = NULL;
        }
+        if (prot->twsk_slab != NULL) {
+                const char *name = kmem_cache_name(prot->twsk_slab);
+                kmem_cache_destroy(prot->twsk_slab);
+                kfree(name);
+                prot->twsk_slab = NULL;
+        }
        list_del(&prot->node);
        write_unlock(&proto_list_lock);
 }
@@ -1601,8 +1719,8 @@ EXPORT_SYMBOL(sock_wfree);
 EXPORT_SYMBOL(sock_wmalloc);
 EXPORT_SYMBOL(sock_i_uid);
 EXPORT_SYMBOL(sock_i_ino);
-#ifdef CONFIG_SYSCTL
 EXPORT_SYMBOL(sysctl_optmem_max);
+#ifdef CONFIG_SYSCTL
 EXPORT_SYMBOL(sysctl_rmem_max);
 EXPORT_SYMBOL(sysctl_wmem_max);
 #endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8f817ad9f546..2f278c8e4743 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -9,23 +9,18 @@
 #include <linux/sysctl.h>
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/socket.h>
+#include <net/sock.h>
 #ifdef CONFIG_SYSCTL
 extern int netdev_max_backlog;
-extern int netdev_budget;
 extern int weight_p;
-extern int net_msg_cost;
-extern int net_msg_burst;
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
-extern __u32 sysctl_wmem_default;
-extern __u32 sysctl_rmem_default;
 extern int sysctl_core_destroy_delay;
-extern int sysctl_optmem_max;
-extern int sysctl_somaxconn;
 #ifdef CONFIG_NET_DIVERT
 extern char sysctl_divert_version[];
diff --git a/net/core/utils.c b/net/core/utils.c
index e11a8654f363..7b5970fc9e40 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -16,17 +16,19 @@
 #include <linux/module.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
+#include <linux/inet.h>
 #include <linux/mm.h>
+#include <linux/net.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/random.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
+#include <asm/byteorder.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 /*
  This is a maximally equidistributed combined Tausworthe generator
  based on code from GNU Scientific Library 1.5 (30 Jun 2004)
@@ -153,3 +155,38 @@ int net_ratelimit(void)
 EXPORT_SYMBOL(net_random);
 EXPORT_SYMBOL(net_ratelimit);
 EXPORT_SYMBOL(net_srandom);
+/*
+ * Convert an ASCII string to binary IP.
+ * This is outside of net/ipv4/ because various code that uses IP addresses
+ * is otherwise not dependent on the TCP/IP stack.
+ */
+__u32 in_aton(const char *str)
+{
+        unsigned long l;
+        unsigned int val;
+        int i;
+        l = 0;
+        for (i = 0; i < 4; i++)
+        {
+                l <<= 8;
+                if (*str != '\0')
+                {
+                        val = 0;
+                        while (*str != '\0' && *str != '.')
+                        {
+                                val *= 10;
+                                val += *str - '0';
+                                str++;
+                        }
+                        l |= val;
+                        if (*str != '\0')
+                                str++;
+                }
+        }
+        return(htonl(l));
+}
+EXPORT_SYMBOL(in_aton);
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 3ff5639c0b78..5caae2399f3a 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -571,10 +571,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
        return 0;
 }
-extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
-extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
-extern void dev_seq_stop(struct seq_file *seq, void *v);
 static struct seq_operations wireless_seq_ops = {
        .start = dev_seq_start,
        .next  = dev_seq_next,
@@ -1144,8 +1140,8 @@ static inline void rtmsg_iwinfo(struct net_device *	dev,
                kfree_skb(skb);
                return;
        }
-        NETLINK_CB(skb).dst_groups = RTMGRP_LINK;
+        NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-        netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC);
+        netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
 }
 #endif  /* WE_EVENT_NETLINK */
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
new file mode 100644
index 000000000000..187ac182e24b
--- /dev/null
+++ b/net/dccp/Kconfig
@@ -0,0 +1,50 @@
+menu "DCCP Configuration (EXPERIMENTAL)"
+        depends on INET && EXPERIMENTAL
+config IP_DCCP
+        tristate "The DCCP Protocol (EXPERIMENTAL)"
+        ---help---
+          Datagram Congestion Control Protocol
+          From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
+          The Datagram Congestion Control Protocol (DCCP) is a transport
+          protocol that implements bidirectional, unicast connections of
+          congestion-controlled, unreliable datagrams. It should be suitable
+          for use by applications such as streaming media, Internet telephony,
+          and on-line games
+          To compile this protocol support as a module, choose M here: the
+          module will be called dccp.
+          If in doubt, say N.
+config INET_DCCP_DIAG
+        depends on IP_DCCP && INET_DIAG
+        def_tristate y if (IP_DCCP = y && INET_DIAG = y)
+        def_tristate m
+source "net/dccp/ccids/Kconfig"
+menu "DCCP Kernel Hacking"
+        depends on IP_DCCP && DEBUG_KERNEL=y
+config IP_DCCP_DEBUG
+        bool "DCCP debug messages"
+        ---help---
+          Only use this if you're hacking DCCP.
+          Just say N.
+config IP_DCCP_UNLOAD_HACK
+        depends on IP_DCCP=m && IP_DCCP_CCID3=m
+        bool "DCCP control sock unload hack"
+        ---help---
+          Enable this to be able to unload the dccp module when the it
+          has only one refcount held, the control sock one. Just execute
+          "rmmod dccp_ccid3 dccp"
+          Just say N.
+endmenu
+endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
new file mode 100644
index 000000000000..fb97bb042455
--- /dev/null
+++ b/net/dccp/Makefile
@@ -0,0 +1,10 @@
+obj-$(CONFIG_IP_DCCP) += dccp.o
+dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
+          timer.o
+obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
+dccp_diag-y := diag.o
+obj-y += ccids/
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
new file mode 100644
index 000000000000..9d8fc0e289ea
--- /dev/null
+++ b/net/dccp/ccid.c
@@ -0,0 +1,139 @@
+/*
+ *  net/dccp/ccid.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  CCID infrastructure
+ *
+ *      This program is free software; you can redistribute it and/or modify it
+ *      under the terms of the GNU General Public License version 2 as
+ *      published by the Free Software Foundation.
+ */
+#include "ccid.h"
+static struct ccid *ccids[CCID_MAX];
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+static atomic_t ccids_lockct = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(ccids_lock);
+/*
+ * The strategy is: modifications ccids vector are short, do not sleep and
+ * veeery rare, but read access should be free of any exclusive locks.
+ */
+static void ccids_write_lock(void)
+{
+        spin_lock(&ccids_lock);
+        while (atomic_read(&ccids_lockct) != 0) {
+                spin_unlock(&ccids_lock);
+                yield();
+                spin_lock(&ccids_lock);
+        }
+}
+static inline void ccids_write_unlock(void)
+{
+        spin_unlock(&ccids_lock);
+}
+static inline void ccids_read_lock(void)
+{
+        atomic_inc(&ccids_lockct);
+        spin_unlock_wait(&ccids_lock);
+}
+static inline void ccids_read_unlock(void)
+{
+        atomic_dec(&ccids_lockct);
+}
+#else
+#define ccids_write_lock() do { } while(0)
+#define ccids_write_unlock() do { } while(0)
+#define ccids_read_lock() do { } while(0)
+#define ccids_read_unlock() do { } while(0)
+#endif
+int ccid_register(struct ccid *ccid)
+{
+        int err;
+        if (ccid->ccid_init == NULL)
+                return -1;
+        ccids_write_lock();
+        err = -EEXIST;
+        if (ccids[ccid->ccid_id] == NULL) {
+                ccids[ccid->ccid_id] = ccid;
+                err = 0;
+        }
+        ccids_write_unlock();
+        if (err == 0)
+                pr_info("CCID: Registered CCID %d (%s)\n",
+                        ccid->ccid_id, ccid->ccid_name);
+        return err;
+}
+EXPORT_SYMBOL_GPL(ccid_register);
+int ccid_unregister(struct ccid *ccid)
+{
+        ccids_write_lock();
+        ccids[ccid->ccid_id] = NULL;
+        ccids_write_unlock();
+        pr_info("CCID: Unregistered CCID %d (%s)\n",
+                ccid->ccid_id, ccid->ccid_name);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(ccid_unregister);
+struct ccid *ccid_init(unsigned char id, struct sock *sk)
+{
+        struct ccid *ccid;
+#ifdef CONFIG_KMOD
+        if (ccids[id] == NULL)
+                request_module("net-dccp-ccid-%d", id);
+#endif
+        ccids_read_lock();
+        ccid = ccids[id];
+        if (ccid == NULL)
+                goto out;
+        if (!try_module_get(ccid->ccid_owner))
+                goto out_err;
+        if (ccid->ccid_init(sk) != 0)
+                goto out_module_put;
+out:
+        ccids_read_unlock();
+        return ccid;
+out_module_put:
+        module_put(ccid->ccid_owner);
+out_err:
+        ccid = NULL;
+        goto out;
+}
+EXPORT_SYMBOL_GPL(ccid_init);
+void ccid_exit(struct ccid *ccid, struct sock *sk)
+{
+        if (ccid == NULL)
+                return;
+        ccids_read_lock();
+        if (ccids[ccid->ccid_id] != NULL) {
+                if (ccid->ccid_exit != NULL)
+                        ccid->ccid_exit(sk);
+                module_put(ccid->ccid_owner);
+        }
+        ccids_read_unlock();
+}
+EXPORT_SYMBOL_GPL(ccid_exit);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
new file mode 100644
index 000000000000..962f1e9e2f7e
--- /dev/null
+++ b/net/dccp/ccid.h
@@ -0,0 +1,180 @@
+#ifndef _CCID_H
+#define _CCID_H
+/*
+ *  net/dccp/ccid.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  CCID infrastructure
+ *
+ *      This program is free software; you can redistribute it and/or modify it
+ *      under the terms of the GNU General Public License version 2 as
+ *      published by the Free Software Foundation.
+ */
+#include <net/sock.h>
+#include <linux/dccp.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#define CCID_MAX 255
+struct ccid {
+        unsigned char   ccid_id;
+        const char      *ccid_name;
+        struct module   *ccid_owner;
+        int             (*ccid_init)(struct sock *sk);
+        void            (*ccid_exit)(struct sock *sk);
+        int             (*ccid_hc_rx_init)(struct sock *sk);
+        int             (*ccid_hc_tx_init)(struct sock *sk);
+        void            (*ccid_hc_rx_exit)(struct sock *sk);
+        void            (*ccid_hc_tx_exit)(struct sock *sk);
+        void            (*ccid_hc_rx_packet_recv)(struct sock *sk,
+                                                  struct sk_buff *skb);
+        int             (*ccid_hc_rx_parse_options)(struct sock *sk,
+                                                    unsigned char option,
+                                                    unsigned char len, u16 idx,
+                                                    unsigned char* value);
+        void            (*ccid_hc_rx_insert_options)(struct sock *sk,
+                                                     struct sk_buff *skb);
+        void            (*ccid_hc_tx_insert_options)(struct sock *sk,
+                                                     struct sk_buff *skb);
+        void            (*ccid_hc_tx_packet_recv)(struct sock *sk,
+                                                  struct sk_buff *skb);
+        int             (*ccid_hc_tx_parse_options)(struct sock *sk,
+                                                    unsigned char option,
+                                                    unsigned char len, u16 idx,
+                                                    unsigned char* value);
+        int             (*ccid_hc_tx_send_packet)(struct sock *sk,
+                                                  struct sk_buff *skb, int len);
+        void            (*ccid_hc_tx_packet_sent)(struct sock *sk, int more,
+                                                  int len);
+        void            (*ccid_hc_rx_get_info)(struct sock *sk,
+                                               struct tcp_info *info);
+        void            (*ccid_hc_tx_get_info)(struct sock *sk,
+                                               struct tcp_info *info);
+};
+extern int         ccid_register(struct ccid *ccid);
+extern int         ccid_unregister(struct ccid *ccid);
+extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
+extern void        ccid_exit(struct ccid *ccid, struct sock *sk);
+static inline void __ccid_get(struct ccid *ccid)
+{
+        __module_get(ccid->ccid_owner);
+}
+static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
+                                         struct sk_buff *skb, int len)
+{
+        int rc = 0;
+        if (ccid->ccid_hc_tx_send_packet != NULL)
+                rc = ccid->ccid_hc_tx_send_packet(sk, skb, len);
+        return rc;
+}
+static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
+                                          int more, int len)
+{
+        if (ccid->ccid_hc_tx_packet_sent != NULL)
+                ccid->ccid_hc_tx_packet_sent(sk, more, len);
+}
+static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
+{
+        int rc = 0;
+        if (ccid->ccid_hc_rx_init != NULL)
+                rc = ccid->ccid_hc_rx_init(sk);
+        return rc;
+}
+static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
+{
+        int rc = 0;
+        if (ccid->ccid_hc_tx_init != NULL)
+                rc = ccid->ccid_hc_tx_init(sk);
+        return rc;
+}
+static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
+{
+        if (ccid->ccid_hc_rx_exit != NULL &&
+            dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL)
+                ccid->ccid_hc_rx_exit(sk);
+}
+static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
+{
+        if (ccid->ccid_hc_tx_exit != NULL &&
+            dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL)
+                ccid->ccid_hc_tx_exit(sk);
+}
+static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
+                                          struct sk_buff *skb)
+{
+        if (ccid->ccid_hc_rx_packet_recv != NULL)
+                ccid->ccid_hc_rx_packet_recv(sk, skb);
+}
+static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
+                                          struct sk_buff *skb)
+{
+        if (ccid->ccid_hc_tx_packet_recv != NULL)
+                ccid->ccid_hc_tx_packet_recv(sk, skb);
+}
+static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
+                                           unsigned char option,
+                                           unsigned char len, u16 idx,
+                                           unsigned char* value)
+{
+        int rc = 0;
+        if (ccid->ccid_hc_tx_parse_options != NULL)
+                rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx,
+                                                    value);
+        return rc;
+}
+static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
+                                           unsigned char option,
+                                           unsigned char len, u16 idx,
+                                           unsigned char* value)
+{
+        int rc = 0;
+        if (ccid->ccid_hc_rx_parse_options != NULL)
+                rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
+        return rc;
+}
+static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
+                                             struct sk_buff *skb)
+{
+        if (ccid->ccid_hc_tx_insert_options != NULL)
+                ccid->ccid_hc_tx_insert_options(sk, skb);
+}
+static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
+                                             struct sk_buff *skb)
+{
+        if (ccid->ccid_hc_rx_insert_options != NULL)
+                ccid->ccid_hc_rx_insert_options(sk, skb);
+}
+static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk,
+                                       struct tcp_info *info)
+{
+        if (ccid->ccid_hc_rx_get_info != NULL)
+                ccid->ccid_hc_rx_get_info(sk, info);
+}
+static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
+                                       struct tcp_info *info)
+{
+        if (ccid->ccid_hc_tx_get_info != NULL)
+                ccid->ccid_hc_tx_get_info(sk, info);
+}
+#endif /* _CCID_H */
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
new file mode 100644
index 000000000000..7684d83946a4
--- /dev/null
+++ b/net/dccp/ccids/Kconfig
@@ -0,0 +1,29 @@
+menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
+        depends on IP_DCCP && EXPERIMENTAL
+config IP_DCCP_CCID3
+        tristate "CCID3 (TFRC) (EXPERIMENTAL)"
+        depends on IP_DCCP
+        ---help---
+          CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
+          rate-controlled congestion control mechanism.  TFRC is designed to
+          be reasonably fair when competing for bandwidth with TCP-like flows,
+          where a flow is "reasonably fair" if its sending rate is generally
+          within a factor of two of the sending rate of a TCP flow under the
+          same conditions.  However, TFRC has a much lower variation of
+          throughput over time compared with TCP, which makes CCID 3 more
+          suitable than CCID 2 for applications such streaming media where a
+          relatively smooth sending rate is of importance.
+          CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
+          congestion control algorithms were initially described in RFC 3448.
+          This text was extracted from draft-ietf-dccp-spec-11.txt.
+          
+          If in doubt, say M.
+config IP_DCCP_TFRC_LIB
+        depends on IP_DCCP_CCID3
+        def_tristate IP_DCCP_CCID3
+endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
new file mode 100644
index 000000000000..956f79f50743
--- /dev/null
+++ b/net/dccp/ccids/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
+dccp_ccid3-y := ccid3.o
+obj-y += lib/
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
new file mode 100644
index 000000000000..7bf3b3a91e97
--- /dev/null
+++ b/net/dccp/ccids/ccid3.c
@@ -0,0 +1,1221 @@
+/*
+ *  net/dccp/ccids/ccid3.c
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/config.h>
+#include "../ccid.h"
+#include "../dccp.h"
+#include "lib/packet_history.h"
+#include "lib/loss_interval.h"
+#include "lib/tfrc.h"
+#include "ccid3.h"
+/*
+ * Reason for maths with 10 here is to avoid 32 bit overflow when a is big.
+ */
+static inline u32 usecs_div(const u32 a, const u32 b)
+{
+        const u32 tmp = a * (USEC_PER_SEC / 10);
+        return b > 20 ? tmp / (b / 10) : tmp;
+}
+static int ccid3_debug;
+#ifdef CCID3_DEBUG
+#define ccid3_pr_debug(format, a...) \
+        do { if (ccid3_debug) \
+                printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
+        } while (0)
+#else
+#define ccid3_pr_debug(format, a...)
+#endif
+static struct dccp_tx_hist *ccid3_tx_hist;
+static struct dccp_rx_hist *ccid3_rx_hist;
+static struct dccp_li_hist *ccid3_li_hist;
+static int ccid3_init(struct sock *sk)
+{
+        ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+        return 0;
+}
+static void ccid3_exit(struct sock *sk)
+{
+        ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+}
+/* TFRC sender states */
+enum ccid3_hc_tx_states {
+        TFRC_SSTATE_NO_SENT = 1,
+        TFRC_SSTATE_NO_FBACK,
+        TFRC_SSTATE_FBACK,
+        TFRC_SSTATE_TERM,
+};
+#ifdef CCID3_DEBUG
+static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
+{
+        static char *ccid3_state_names[] = {
+        [TFRC_SSTATE_NO_SENT]  = "NO_SENT",
+        [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
+        [TFRC_SSTATE_FBACK]    = "FBACK",
+        [TFRC_SSTATE_TERM]     = "TERM",
+        };
+        return ccid3_state_names[state];
+}
+#endif
+static inline void ccid3_hc_tx_set_state(struct sock *sk,
+                                         enum ccid3_hc_tx_states state)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
+        ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+                       dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
+                       ccid3_tx_state_name(state));
+        WARN_ON(state == oldstate);
+        hctx->ccid3hctx_state = state;
+}
+/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
+static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
+{
+        /*
+         * If no feedback spec says t_ipi is 1 second (set elsewhere and then
+         * doubles after every no feedback timer (separate function)
+         */
+        if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+                hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s,
+                                                  hctx->ccid3hctx_x);
+}
+/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
+static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
+{
+        hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
+                                           TFRC_OPSYS_HALF_TIME_GRAN);
+}
+/*
+ * Update X by
+ *    If (p > 0)
+ *       x_calc = calcX(s, R, p);
+ *       X = max(min(X_calc, 2 * X_recv), s / t_mbi);
+ *    Else
+ *       If (now - tld >= R)
+ *          X = max(min(2 * X, 2 * X_recv), s / R);
+ *          tld = now;
+ */ 
+static void ccid3_hc_tx_update_x(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        /* To avoid large error in calcX */
+        if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
+                hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s,
+                                                     hctx->ccid3hctx_rtt,
+                                                     hctx->ccid3hctx_p);
+                hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc,
+                                                          2 * hctx->ccid3hctx_x_recv),
+                                               (hctx->ccid3hctx_s /
+                                                TFRC_MAX_BACK_OFF_TIME));
+        } else {
+                struct timeval now;
+                do_gettimeofday(&now);
+                if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
+                    hctx->ccid3hctx_rtt) {
+                        hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
+                                                                  hctx->ccid3hctx_x) * 2,
+                                                       usecs_div(hctx->ccid3hctx_s,
+                                                                 hctx->ccid3hctx_rtt));
+                        hctx->ccid3hctx_t_ld = now;
+                }
+        }
+}
+static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
+{
+        struct sock *sk = (struct sock *)data;
+        struct dccp_sock *dp = dccp_sk(sk);
+        unsigned long next_tmout = 0;
+        struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        bh_lock_sock(sk);
+        if (sock_owned_by_user(sk)) {
+                /* Try again later. */
+                /* XXX: set some sensible MIB */
+                sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+                               jiffies + HZ / 5);
+                goto out;
+        }
+        ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
+                       ccid3_tx_state_name(hctx->ccid3hctx_state));
+        
+        switch (hctx->ccid3hctx_state) {
+        case TFRC_SSTATE_TERM:
+                goto out;
+        case TFRC_SSTATE_NO_FBACK:
+                /* Halve send rate */
+                hctx->ccid3hctx_x /= 2;
+                if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s /
+                                         TFRC_MAX_BACK_OFF_TIME))
+                        hctx->ccid3hctx_x = (hctx->ccid3hctx_s /
+                                             TFRC_MAX_BACK_OFF_TIME);
+                ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
+                               "bytes/s\n",
+                               dccp_role(sk), sk,
+                               ccid3_tx_state_name(hctx->ccid3hctx_state),
+                               hctx->ccid3hctx_x);
+                next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s,
+                                                      hctx->ccid3hctx_x),
+                                        TFRC_INITIAL_TIMEOUT);
+                /*
+                 * FIXME - not sure above calculation is correct. See section
+                 * 5 of CCID3 11 should adjust tx_t_ipi and double that to
+                 * achieve it really
+                 */
+                break;
+        case TFRC_SSTATE_FBACK:
+                /*
+                 * Check if IDLE since last timeout and recv rate is less than
+                 * 4 packets per RTT
+                 */
+                if (!hctx->ccid3hctx_idle ||
+                    (hctx->ccid3hctx_x_recv >=
+                     4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) {
+                        ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
+                                       dccp_role(sk), sk,
+                                       ccid3_tx_state_name(hctx->ccid3hctx_state));
+                        /* Halve sending rate */
+                        /*  If (X_calc > 2 * X_recv)
+                         *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
+                         *  Else
+                         *    X_recv = X_calc / 4;
+                         */
+                        BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P &&
+                               hctx->ccid3hctx_x_calc == 0);
+                        /* check also if p is zero -> x_calc is infinity? */
+                        if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
+                            hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
+                                hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
+                                                                    hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
+                        else
+                                hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
+                        /* Update sending rate */
+                        ccid3_hc_tx_update_x(sk);
+                }
+                /*
+                 * Schedule no feedback timer to expire in
+                 * max(4 * R, 2 * s / X)
+                 */
+                next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, 
+                                        2 * usecs_div(hctx->ccid3hctx_s,
+                                                      hctx->ccid3hctx_x));
+                break;
+        default:
+                printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                       __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+                dump_stack();
+                goto out;
+        }
+        sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 
+                      jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
+        hctx->ccid3hctx_idle = 1;
+out:
+        bh_unlock_sock(sk);
+        sock_put(sk);
+}
+static int ccid3_hc_tx_send_packet(struct sock *sk,
+                                   struct sk_buff *skb, int len)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        struct dccp_tx_hist_entry *new_packet;
+        struct timeval now;
+        long delay;
+        int rc = -ENOTCONN;
+        /* Check if pure ACK or Terminating*/
+        /*
+         * XXX: We only call this function for DATA and DATAACK, on, these
+         * packets can have zero length, but why the comment about "pure ACK"?
+         */
+        if (hctx == NULL || len == 0 ||
+            hctx->ccid3hctx_state == TFRC_SSTATE_TERM)
+                goto out;
+        /* See if last packet allocated was not sent */
+        new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
+        if (new_packet == NULL || new_packet->dccphtx_sent) {
+                new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist,
+                                                    SLAB_ATOMIC);
+                rc = -ENOBUFS;
+                if (new_packet == NULL) {
+                        ccid3_pr_debug("%s, sk=%p, not enough mem to add "
+                                       "to history, send refused\n",
+                                       dccp_role(sk), sk);
+                        goto out;
+                }
+                dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
+        }
+        do_gettimeofday(&now);
+        switch (hctx->ccid3hctx_state) {
+        case TFRC_SSTATE_NO_SENT:
+                ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n",
+                               dccp_role(sk), sk, dp->dccps_gss);
+                hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
+                hctx->ccid3hctx_no_feedback_timer.data     = (unsigned long)sk;
+                sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+                               jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
+                hctx->ccid3hctx_last_win_count   = 0;
+                hctx->ccid3hctx_t_last_win_count = now;
+                ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+                hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT;
+                /* Set nominal send time for initial packet */
+                hctx->ccid3hctx_t_nom = now;
+                timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+                                  hctx->ccid3hctx_t_ipi);
+                ccid3_calc_new_delta(hctx);
+                rc = 0;
+                break;
+        case TFRC_SSTATE_NO_FBACK:
+        case TFRC_SSTATE_FBACK:
+                delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) -
+                         hctx->ccid3hctx_delta);
+                ccid3_pr_debug("send_packet delay=%ld\n", delay);
+                delay /= -1000;
+                /* divide by -1000 is to convert to ms and get sign right */
+                rc = delay > 0 ? delay : 0;
+                break;
+        default:
+                printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                       __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+                dump_stack();
+                rc = -EINVAL;
+                break;
+        }
+        /* Can we send? if so add options and add to packet history */
+        if (rc == 0)
+                new_packet->dccphtx_ccval =
+                        DCCP_SKB_CB(skb)->dccpd_ccval =
+                                hctx->ccid3hctx_last_win_count;
+out:
+        return rc;
+}
+static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        struct timeval now;
+        BUG_ON(hctx == NULL);
+        if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
+                ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n",
+                               dccp_role(sk), sk);
+                return;
+        }
+        do_gettimeofday(&now);
+        /* check if we have sent a data packet */
+        if (len > 0) {
+                unsigned long quarter_rtt;
+                struct dccp_tx_hist_entry *packet;
+                packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist);
+                if (packet == NULL) {
+                        printk(KERN_CRIT "%s: packet doesn't exists in "
+                                         "history!\n", __FUNCTION__);
+                        return;
+                }
+                if (packet->dccphtx_sent) {
+                        printk(KERN_CRIT "%s: no unsent packet in history!\n",
+                               __FUNCTION__);
+                        return;
+                }
+                packet->dccphtx_tstamp = now;
+                packet->dccphtx_seqno  = dp->dccps_gss;
+                /*
+                 * Check if win_count have changed
+                 * Algorithm in "8.1. Window Counter Valuer" in
+                 * draft-ietf-dccp-ccid3-11.txt
+                 */
+                quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count);
+                if (likely(hctx->ccid3hctx_rtt > 8))
+                        quarter_rtt /= hctx->ccid3hctx_rtt / 4;
+                if (quarter_rtt > 0) {
+                        hctx->ccid3hctx_t_last_win_count = now;
+                        hctx->ccid3hctx_last_win_count   = (hctx->ccid3hctx_last_win_count +
+                                                            min_t(unsigned long, quarter_rtt, 5)) % 16;
+                        ccid3_pr_debug("%s, sk=%p, window changed from "
+                                       "%u to %u!\n",
+                                       dccp_role(sk), sk,
+                                       packet->dccphtx_ccval,
+                                       hctx->ccid3hctx_last_win_count);
+                }
+                hctx->ccid3hctx_idle = 0;
+                packet->dccphtx_rtt  = hctx->ccid3hctx_rtt;
+                packet->dccphtx_sent = 1;
+        } else
+                ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
+                               dccp_role(sk), sk, dp->dccps_gss);
+        switch (hctx->ccid3hctx_state) {
+        case TFRC_SSTATE_NO_SENT:
+                /* if first wasn't pure ack */
+                if (len != 0)
+                        printk(KERN_CRIT "%s: %s, First packet sent is noted "
+                                         "as a data packet\n",
+                               __FUNCTION__, dccp_role(sk));
+                return;
+        case TFRC_SSTATE_NO_FBACK:
+        case TFRC_SSTATE_FBACK:
+                if (len > 0) {
+                        hctx->ccid3hctx_t_nom = now;
+                        ccid3_calc_new_t_ipi(hctx);
+                        ccid3_calc_new_delta(hctx);
+                        timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+                                          hctx->ccid3hctx_t_ipi);
+                }
+                break;
+        default:
+                printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                       __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+                dump_stack();
+                break;
+        }
+}
+static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        struct ccid3_options_received *opt_recv;
+        struct dccp_tx_hist_entry *packet;
+        unsigned long next_tmout; 
+        u32 t_elapsed;
+        u32 pinv;
+        u32 x_recv;
+        u32 r_sample;
+        if (hctx == NULL)
+                return;
+        if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
+                ccid3_pr_debug("%s, sk=%p, received a packet when "
+                               "terminating!\n", dccp_role(sk), sk);
+                return;
+        }
+        /* we are only interested in ACKs */
+        if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
+              DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
+                return;
+        opt_recv = &hctx->ccid3hctx_options_received;
+        t_elapsed = dp->dccps_options_received.dccpor_elapsed_time;
+        x_recv = opt_recv->ccid3or_receive_rate;
+        pinv = opt_recv->ccid3or_loss_event_rate;
+        switch (hctx->ccid3hctx_state) {
+        case TFRC_SSTATE_NO_SENT:
+                /* FIXME: what to do here? */
+                return;
+        case TFRC_SSTATE_NO_FBACK:
+        case TFRC_SSTATE_FBACK:
+                /* Calculate new round trip sample by
+                 * R_sample = (now - t_recvdata) - t_delay */
+                /* get t_recvdata from history */
+                packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist,
+                                                 DCCP_SKB_CB(skb)->dccpd_ack_seq);
+                if (packet == NULL) {
+                        ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't "
+                                       "exist in history!\n",
+                                       dccp_role(sk), sk,
+                                       DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                       dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+                        return;
+                }
+                /* Update RTT */
+                r_sample = timeval_now_delta(&packet->dccphtx_tstamp);
+                /* FIXME: */
+                // r_sample -= usecs_to_jiffies(t_elapsed * 10);
+                /* Update RTT estimate by 
+                 * If (No feedback recv)
+                 *    R = R_sample;
+                 * Else
+                 *    R = q * R + (1 - q) * R_sample;
+                 *
+                 * q is a constant, RFC 3448 recomments 0.9
+                 */
+                if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
+                        ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
+                        hctx->ccid3hctx_rtt = r_sample;
+                } else
+                        hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 +
+                                              r_sample / 10;
+                ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, "
+                               "r_sample=%us\n", dccp_role(sk), sk,
+                               hctx->ccid3hctx_rtt, r_sample);
+                /* Update timeout interval */
+                hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt,
+                                              USEC_PER_SEC);
+                /* Update receive rate */
+                hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */
+                /* Update loss event rate */
+                if (pinv == ~0 || pinv == 0)
+                        hctx->ccid3hctx_p = 0;
+                else {
+                        hctx->ccid3hctx_p = 1000000 / pinv;
+                        if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
+                                hctx->ccid3hctx_p = TFRC_SMALLEST_P;
+                                ccid3_pr_debug("%s, sk=%p, Smallest p used!\n",
+                                               dccp_role(sk), sk);
+                        }
+                }
+                /* unschedule no feedback timer */
+                sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+                /* Update sending rate */
+                ccid3_hc_tx_update_x(sk);
+                /* Update next send time */
+                timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
+                                  hctx->ccid3hctx_t_ipi);
+                ccid3_calc_new_t_ipi(hctx);
+                timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+                                  hctx->ccid3hctx_t_ipi);
+                ccid3_calc_new_delta(hctx);
+                /* remove all packets older than the one acked from history */
+                dccp_tx_hist_purge_older(ccid3_tx_hist,
+                                         &hctx->ccid3hctx_hist, packet);
+                /*
+                 * As we have calculated new ipi, delta, t_nom it is possible that
+                 * we now can send a packet, so wake up dccp_wait_for_ccids.
+                 */
+                sk->sk_write_space(sk);
+                /*
+                 * Schedule no feedback timer to expire in
+                 * max(4 * R, 2 * s / X)
+                 */
+                next_tmout = max(hctx->ccid3hctx_t_rto,
+                                 2 * usecs_div(hctx->ccid3hctx_s,
+                                               hctx->ccid3hctx_x));
+                        
+                ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to "
+                               "expire in %lu jiffies (%luus)\n",
+                               dccp_role(sk), sk,
+                               usecs_to_jiffies(next_tmout), next_tmout); 
+                sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 
+                               jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
+                /* set idle flag */
+                hctx->ccid3hctx_idle = 1;   
+                break;
+        default:
+                printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                       __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+                dump_stack();
+                break;
+        }
+}
+static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+        const struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        if (hctx == NULL || !(sk->sk_state == DCCP_OPEN ||
+                              sk->sk_state == DCCP_PARTOPEN))
+                return;
+         DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+}
+static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
+                                     unsigned char len, u16 idx,
+                                     unsigned char *value)
+{
+        int rc = 0;
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        struct ccid3_options_received *opt_recv;
+        if (hctx == NULL)
+                return 0;
+        opt_recv = &hctx->ccid3hctx_options_received;
+        if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
+                opt_recv->ccid3or_seqno              = dp->dccps_gsr;
+                opt_recv->ccid3or_loss_event_rate    = ~0;
+                opt_recv->ccid3or_loss_intervals_idx = 0;
+                opt_recv->ccid3or_loss_intervals_len = 0;
+                opt_recv->ccid3or_receive_rate       = 0;
+        }
+        switch (option) {
+        case TFRC_OPT_LOSS_EVENT_RATE:
+                if (len != 4) {
+                        ccid3_pr_debug("%s, sk=%p, invalid len for "
+                                       "TFRC_OPT_LOSS_EVENT_RATE\n",
+                                       dccp_role(sk), sk);
+                        rc = -EINVAL;
+                } else {
+                        opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value);
+                        ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
+                                       dccp_role(sk), sk,
+                                       opt_recv->ccid3or_loss_event_rate);
+                }
+                break;
+        case TFRC_OPT_LOSS_INTERVALS:
+                opt_recv->ccid3or_loss_intervals_idx = idx;
+                opt_recv->ccid3or_loss_intervals_len = len;
+                ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
+                               dccp_role(sk), sk,
+                               opt_recv->ccid3or_loss_intervals_idx,
+                               opt_recv->ccid3or_loss_intervals_len);
+                break;
+        case TFRC_OPT_RECEIVE_RATE:
+                if (len != 4) {
+                        ccid3_pr_debug("%s, sk=%p, invalid len for "
+                                       "TFRC_OPT_RECEIVE_RATE\n",
+                                       dccp_role(sk), sk);
+                        rc = -EINVAL;
+                } else {
+                        opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value);
+                        ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
+                                       dccp_role(sk), sk,
+                                       opt_recv->ccid3or_receive_rate);
+                }
+                break;
+        }
+        return rc;
+}
+static int ccid3_hc_tx_init(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_tx_sock *hctx;
+        ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+        hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx),
+                                                      gfp_any());
+        if (hctx == NULL)
+                return -ENOMEM;
+        memset(hctx, 0, sizeof(*hctx));
+        if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
+            dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
+                hctx->ccid3hctx_s = dp->dccps_packet_size;
+        else
+                hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
+        /* Set transmission rate to 1 packet per second */
+        hctx->ccid3hctx_x     = hctx->ccid3hctx_s;
+        hctx->ccid3hctx_t_rto = USEC_PER_SEC;
+        hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
+        INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
+        init_timer(&hctx->ccid3hctx_no_feedback_timer);
+        return 0;
+}
+static void ccid3_hc_tx_exit(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+        BUG_ON(hctx == NULL);
+        ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
+        sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+        /* Empty packet history */
+        dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
+        kfree(dp->dccps_hc_tx_ccid_private);
+        dp->dccps_hc_tx_ccid_private = NULL;
+}
+/*
+ * RX Half Connection methods
+ */
+/* TFRC receiver states */
+enum ccid3_hc_rx_states {
+        TFRC_RSTATE_NO_DATA = 1,
+        TFRC_RSTATE_DATA,
+        TFRC_RSTATE_TERM    = 127,
+};
+#ifdef CCID3_DEBUG
+static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
+{
+        static char *ccid3_rx_state_names[] = {
+        [TFRC_RSTATE_NO_DATA] = "NO_DATA",
+        [TFRC_RSTATE_DATA]    = "DATA",
+        [TFRC_RSTATE_TERM]    = "TERM",
+        };
+        return ccid3_rx_state_names[state];
+}
+#endif
+static inline void ccid3_hc_rx_set_state(struct sock *sk,
+                                         enum ccid3_hc_rx_states state)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+        enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
+        ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+                       dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
+                       ccid3_rx_state_name(state));
+        WARN_ON(state == oldstate);
+        hcrx->ccid3hcrx_state = state;
+}
+static void ccid3_hc_rx_send_feedback(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+        struct dccp_rx_hist_entry *packet;
+        struct timeval now;
+        ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+        do_gettimeofday(&now);
+        switch (hcrx->ccid3hcrx_state) {
+        case TFRC_RSTATE_NO_DATA:
+                hcrx->ccid3hcrx_x_recv = 0;
+                break;
+        case TFRC_RSTATE_DATA: {
+                const u32 delta = timeval_delta(&now,
+                                        &hcrx->ccid3hcrx_tstamp_last_feedback);
+                hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv *
+                                          USEC_PER_SEC);
+                if (likely(delta > 1))
+                        hcrx->ccid3hcrx_x_recv /= delta;
+        }
+                break;
+        default:
+                printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                       __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
+                dump_stack();
+                return;
+        }
+        packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist);
+        if (packet == NULL) {
+                printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n",
+                       __FUNCTION__, dccp_role(sk), sk);
+                dump_stack();
+                return;
+        }
+        hcrx->ccid3hcrx_tstamp_last_feedback = now;
+        hcrx->ccid3hcrx_last_counter         = packet->dccphrx_ccval;
+        hcrx->ccid3hcrx_seqno_last_counter   = packet->dccphrx_seqno;
+        hcrx->ccid3hcrx_bytes_recv           = 0;
+        /* Convert to multiples of 10us */
+        hcrx->ccid3hcrx_elapsed_time =
+                        timeval_delta(&now, &packet->dccphrx_tstamp) / 10;
+        if (hcrx->ccid3hcrx_p == 0)
+                hcrx->ccid3hcrx_pinv = ~0;
+        else
+                hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
+        dccp_send_ack(sk);
+}
+static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+        const struct dccp_sock *dp = dccp_sk(sk);
+        u32 x_recv, pinv;
+        struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+        if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN ||
+                              sk->sk_state == DCCP_PARTOPEN))
+                return;
+        DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
+        if (dccp_packet_without_ack(skb))
+                return;
+                
+        if (hcrx->ccid3hcrx_elapsed_time != 0)
+                dccp_insert_option_elapsed_time(sk, skb,
+                                                hcrx->ccid3hcrx_elapsed_time);
+        dccp_insert_option_timestamp(sk, skb);
+        x_recv = htonl(hcrx->ccid3hcrx_x_recv);
+        pinv   = htonl(hcrx->ccid3hcrx_pinv);
+        dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
+                           &pinv, sizeof(pinv));
+        dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
+                           &x_recv, sizeof(x_recv));
+}
+/* calculate first loss interval
+ *
+ * returns estimated loss interval in usecs */
+static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+        struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
+        u32 rtt, delta, x_recv, fval, p, tmp2;
+        struct timeval tstamp = { 0, };
+        int interval = 0;
+        int win_count = 0;
+        int step = 0;
+        u64 tmp1;
+        list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
+                                 dccphrx_node) {
+                if (dccp_rx_hist_entry_data_packet(entry)) {
+                        tail = entry;
+                        switch (step) {
+                        case 0:
+                                tstamp    = entry->dccphrx_tstamp;
+                                win_count = entry->dccphrx_ccval;
+                                step = 1;
+                                break;
+                        case 1:
+                                interval = win_count - entry->dccphrx_ccval;
+                                if (interval < 0)
+                                        interval += TFRC_WIN_COUNT_LIMIT;
+                                if (interval > 4)
+                                        goto found;
+                                break;
+                        }
+                }
+        }
+        if (step == 0) {
+                printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no "
+                                 "data packets!\n",
+                       __FUNCTION__, dccp_role(sk), sk);
+                return ~0;
+        }
+        if (interval == 0) {
+                ccid3_pr_debug("%s, sk=%p, Could not find a win_count "
+                               "interval > 0. Defaulting to 1\n",
+                               dccp_role(sk), sk);
+                interval = 1;
+        }
+found:
+        rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
+        ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
+                       dccp_role(sk), sk, rtt);
+        if (rtt == 0)
+                rtt = 1;
+        delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback);
+        x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC;
+        if (likely(delta > 1))
+                x_recv /= delta;
+        tmp1 = (u64)x_recv * (u64)rtt;
+        do_div(tmp1,10000000);
+        tmp2 = (u32)tmp1;
+        fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
+        /* do not alter order above or you will get overflow on 32 bit */
+        p = tfrc_calc_x_reverse_lookup(fval);
+        ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied "
+                       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
+        if (p == 0)
+                return ~0;
+        else
+                return 1000000 / p; 
+}
+static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+        if (seq_loss != DCCP_MAX_SEQNO + 1 &&
+            list_empty(&hcrx->ccid3hcrx_li_hist)) {
+                struct dccp_li_hist_entry *li_tail;
+                li_tail = dccp_li_hist_interval_new(ccid3_li_hist,
+                                                    &hcrx->ccid3hcrx_li_hist,
+                                                    seq_loss, win_loss);
+                if (li_tail == NULL)
+                        return;
+                li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+        }
+        /* FIXME: find end of interval */
+}
+static void ccid3_hc_rx_detect_loss(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+        u8 win_loss;
+        const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
+                                                      &hcrx->ccid3hcrx_li_hist,
+                                                      &win_loss);
+        ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
+}
+static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+        const struct dccp_options_received *opt_recv;
+        struct dccp_rx_hist_entry *packet;
+        struct timeval now;
+        u8 win_count;
+        u32 p_prev;
+        int ins;
+        if (hcrx == NULL)
+                return;
+        BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
+                 hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
+        opt_recv = &dp->dccps_options_received;
+        switch (DCCP_SKB_CB(skb)->dccpd_type) {
+        case DCCP_PKT_ACK:
+                if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
+                        return;
+        case DCCP_PKT_DATAACK:
+                if (opt_recv->dccpor_timestamp_echo == 0)
+                        break;
+                p_prev = hcrx->ccid3hcrx_rtt;
+                do_gettimeofday(&now);
+                hcrx->ccid3hcrx_rtt = timeval_usecs(&now) -
+                                     (opt_recv->dccpor_timestamp_echo -
+                                      opt_recv->dccpor_elapsed_time) * 10;
+                if (p_prev != hcrx->ccid3hcrx_rtt)
+                        ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
+                                       dccp_role(sk), hcrx->ccid3hcrx_rtt,
+                                       opt_recv->dccpor_elapsed_time);
+                break;
+        case DCCP_PKT_DATA:
+                break;
+        default:
+                ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n",
+                               dccp_role(sk), sk,
+                               dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+                return;
+        }
+        packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp,
+                                        skb, SLAB_ATOMIC);
+        if (packet == NULL) {
+                ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet "
+                               "to history (consider it lost)!",
+                               dccp_role(sk), sk);
+                return;
+        }
+        win_count = packet->dccphrx_ccval;
+        ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
+                                      &hcrx->ccid3hcrx_li_hist, packet);
+        if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
+                return;
+        switch (hcrx->ccid3hcrx_state) {
+        case TFRC_RSTATE_NO_DATA:
+                ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial "
+                               "feedback\n",
+                               dccp_role(sk), sk,
+                               dccp_state_name(sk->sk_state), skb);
+                ccid3_hc_rx_send_feedback(sk);
+                ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
+                return;
+        case TFRC_RSTATE_DATA:
+                hcrx->ccid3hcrx_bytes_recv += skb->len -
+                                              dccp_hdr(skb)->dccph_doff * 4;
+                if (ins != 0)
+                        break;
+                do_gettimeofday(&now);
+                if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >=
+                    hcrx->ccid3hcrx_rtt) {
+                        hcrx->ccid3hcrx_tstamp_last_ack = now;
+                        ccid3_hc_rx_send_feedback(sk);
+                }
+                return;
+        default:
+                printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                       __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
+                dump_stack();
+                return;
+        }
+        /* Dealing with packet loss */
+        ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
+                       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
+        ccid3_hc_rx_detect_loss(sk);
+        p_prev = hcrx->ccid3hcrx_p;
+        
+        /* Calculate loss event rate */
+        if (!list_empty(&hcrx->ccid3hcrx_li_hist))
+                /* Scaling up by 1000000 as fixed decimal */
+                hcrx->ccid3hcrx_p = 1000000 / dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
+        if (hcrx->ccid3hcrx_p > p_prev) {
+                ccid3_hc_rx_send_feedback(sk);
+                return;
+        }
+}
+static int ccid3_hc_rx_init(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_rx_sock *hcrx;
+        ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+        hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx),
+                                                      gfp_any());
+        if (hcrx == NULL)
+                return -ENOMEM;
+        memset(hcrx, 0, sizeof(*hcrx));
+        if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
+            dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
+                hcrx->ccid3hcrx_s = dp->dccps_packet_size;
+        else
+                hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
+        hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
+        INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
+        INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
+        /*
+         * XXX this seems to be paranoid, need to think more about this, for
+         * now start with something different than zero. -acme
+         */
+        hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5;
+        return 0;
+}
+static void ccid3_hc_rx_exit(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+        ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+        if (hcrx == NULL)
+                return;
+        ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
+        /* Empty packet history */
+        dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
+        /* Empty loss interval history */
+        dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
+        kfree(dp->dccps_hc_rx_ccid_private);
+        dp->dccps_hc_rx_ccid_private = NULL;
+}
+static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
+{
+        const struct dccp_sock *dp = dccp_sk(sk);
+        const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+        if (hcrx == NULL)
+                return;
+        info->tcpi_ca_state     = hcrx->ccid3hcrx_state;
+        info->tcpi_options      |= TCPI_OPT_TIMESTAMPS;
+        info->tcpi_rcv_rtt      = hcrx->ccid3hcrx_rtt;
+}
+static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
+{
+        const struct dccp_sock *dp = dccp_sk(sk);
+        const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+        if (hctx == NULL)
+                return;
+        info->tcpi_rto = hctx->ccid3hctx_t_rto;
+        info->tcpi_rtt = hctx->ccid3hctx_rtt;
+}
+static struct ccid ccid3 = {
+        .ccid_id                   = 3,
+        .ccid_name                 = "ccid3",
+        .ccid_owner                = THIS_MODULE,
+        .ccid_init                 = ccid3_init,
+        .ccid_exit                 = ccid3_exit,
+        .ccid_hc_tx_init           = ccid3_hc_tx_init,
+        .ccid_hc_tx_exit           = ccid3_hc_tx_exit,
+        .ccid_hc_tx_send_packet    = ccid3_hc_tx_send_packet,
+        .ccid_hc_tx_packet_sent    = ccid3_hc_tx_packet_sent,
+        .ccid_hc_tx_packet_recv    = ccid3_hc_tx_packet_recv,
+        .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
+        .ccid_hc_tx_parse_options  = ccid3_hc_tx_parse_options,
+        .ccid_hc_rx_init           = ccid3_hc_rx_init,
+        .ccid_hc_rx_exit           = ccid3_hc_rx_exit,
+        .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
+        .ccid_hc_rx_packet_recv    = ccid3_hc_rx_packet_recv,
+        .ccid_hc_rx_get_info       = ccid3_hc_rx_get_info,
+        .ccid_hc_tx_get_info       = ccid3_hc_tx_get_info,
+};
+ 
+module_param(ccid3_debug, int, 0444);
+MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
+static __init int ccid3_module_init(void)
+{
+        int rc = -ENOBUFS;
+        ccid3_rx_hist = dccp_rx_hist_new("ccid3");
+        if (ccid3_rx_hist == NULL)
+                goto out;
+        ccid3_tx_hist = dccp_tx_hist_new("ccid3");
+        if (ccid3_tx_hist == NULL)
+                goto out_free_rx;
+        ccid3_li_hist = dccp_li_hist_new("ccid3");
+        if (ccid3_li_hist == NULL)
+                goto out_free_tx;
+        rc = ccid_register(&ccid3);
+        if (rc != 0) 
+                goto out_free_loss_interval_history;
+out:
+        return rc;
+out_free_loss_interval_history:
+        dccp_li_hist_delete(ccid3_li_hist);
+        ccid3_li_hist = NULL;
+out_free_tx:
+        dccp_tx_hist_delete(ccid3_tx_hist);
+        ccid3_tx_hist = NULL;
+out_free_rx:
+        dccp_rx_hist_delete(ccid3_rx_hist);
+        ccid3_rx_hist = NULL;
+        goto out;
+}
+module_init(ccid3_module_init);
+static __exit void ccid3_module_exit(void)
+{
+#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
+        /*
+         * Hack to use while developing, so that we get rid of the control
+         * sock, that is what keeps a refcount on dccp.ko -acme
+         */
+        extern void dccp_ctl_sock_exit(void);
+        dccp_ctl_sock_exit();
+#endif
+        ccid_unregister(&ccid3);
+        if (ccid3_tx_hist != NULL) {
+                dccp_tx_hist_delete(ccid3_tx_hist);
+                ccid3_tx_hist = NULL;
+        }
+        if (ccid3_rx_hist != NULL) {
+                dccp_rx_hist_delete(ccid3_rx_hist);
+                ccid3_rx_hist = NULL;
+        }
+        if (ccid3_li_hist != NULL) {
+                dccp_li_hist_delete(ccid3_li_hist);
+                ccid3_li_hist = NULL;
+        }
+}
+module_exit(ccid3_module_exit);
+MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+              "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
+MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
new file mode 100644
index 000000000000..ee8cbace6630
--- /dev/null
+++ b/net/dccp/ccids/ccid3.h
@@ -0,0 +1,137 @@
+/*
+ *  net/dccp/ccids/ccid3.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _DCCP_CCID3_H_
+#define _DCCP_CCID3_H_
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/time.h>
+#include <linux/types.h>
+#define TFRC_MIN_PACKET_SIZE       16
+#define TFRC_STD_PACKET_SIZE      256
+#define TFRC_MAX_PACKET_SIZE    65535
+/* Two seconds as per CCID3 spec */
+#define TFRC_INITIAL_TIMEOUT       (2 * USEC_PER_SEC)
+/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
+#define TFRC_OPSYS_HALF_TIME_GRAN  (USEC_PER_SEC / (2 * HZ))
+/* In seconds */
+#define TFRC_MAX_BACK_OFF_TIME     64
+#define TFRC_SMALLEST_P            40
+enum ccid3_options {
+        TFRC_OPT_LOSS_EVENT_RATE = 192,
+        TFRC_OPT_LOSS_INTERVALS  = 193,
+        TFRC_OPT_RECEIVE_RATE    = 194,
+};
+struct ccid3_options_received {
+        u64 ccid3or_seqno:48,
+            ccid3or_loss_intervals_idx:16;
+        u16 ccid3or_loss_intervals_len;
+        u32 ccid3or_loss_event_rate;
+        u32 ccid3or_receive_rate;
+};
+/** struct ccid3_hc_tx_sock - CCID3 sender half connection sock
+ *
+  * @ccid3hctx_state - Sender state
+  * @ccid3hctx_x - Current sending rate
+  * @ccid3hctx_x_recv - Receive rate
+  * @ccid3hctx_x_calc - Calculated send (?) rate
+  * @ccid3hctx_s - Packet size
+  * @ccid3hctx_rtt - Estimate of current round trip time in usecs
+  * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
+  * @ccid3hctx_last_win_count - Last window counter sent
+  * @ccid3hctx_t_last_win_count - Timestamp of earliest packet
+  *                               with last_win_count value sent
+  * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
+  * @ccid3hctx_idle - FIXME
+  * @ccid3hctx_t_ld - Time last doubled during slow start
+  * @ccid3hctx_t_nom - Nominal send time of next packet
+  * @ccid3hctx_t_ipi - Interpacket (send) interval
+  * @ccid3hctx_delta - Send timer delta
+  * @ccid3hctx_hist - Packet history
+  */
+struct ccid3_hc_tx_sock {
+        u32                             ccid3hctx_x;
+        u32                             ccid3hctx_x_recv;
+        u32                             ccid3hctx_x_calc;
+        u16                             ccid3hctx_s;
+        u32                             ccid3hctx_rtt;
+        u32                             ccid3hctx_p;
+        u8                              ccid3hctx_state;
+        u8                              ccid3hctx_last_win_count;
+        u8                              ccid3hctx_idle;
+        struct timeval                  ccid3hctx_t_last_win_count;
+        struct timer_list               ccid3hctx_no_feedback_timer;
+        struct timeval                  ccid3hctx_t_ld;
+        struct timeval                  ccid3hctx_t_nom;
+        u32                             ccid3hctx_t_rto;
+        u32                             ccid3hctx_t_ipi;
+        u32                             ccid3hctx_delta;
+        struct list_head                ccid3hctx_hist;
+        struct ccid3_options_received   ccid3hctx_options_received;
+};
+struct ccid3_hc_rx_sock {
+        u64                     ccid3hcrx_seqno_last_counter:48,
+                                ccid3hcrx_state:8,
+                                ccid3hcrx_last_counter:4;
+        unsigned long           ccid3hcrx_rtt;
+        u32                     ccid3hcrx_p;
+        u32                     ccid3hcrx_bytes_recv;
+        struct timeval          ccid3hcrx_tstamp_last_feedback;
+        struct timeval          ccid3hcrx_tstamp_last_ack;
+        struct list_head        ccid3hcrx_hist;
+        struct list_head        ccid3hcrx_li_hist;
+        u16                     ccid3hcrx_s;
+        u32                     ccid3hcrx_pinv;
+        u32                     ccid3hcrx_elapsed_time;
+        u32                     ccid3hcrx_x_recv;
+};
+#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
+    ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
+#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
+    ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
+#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
new file mode 100644
index 000000000000..5f940a6cbaca
--- /dev/null
+++ b/net/dccp/ccids/lib/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
+dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
new file mode 100644
index 000000000000..4c01a54143ad
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -0,0 +1,144 @@
+/*
+ *  net/dccp/ccids/lib/loss_interval.c
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include "loss_interval.h"
+struct dccp_li_hist *dccp_li_hist_new(const char *name)
+{
+        struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+        static const char dccp_li_hist_mask[] = "li_hist_%s";
+        char *slab_name;
+        if (hist == NULL)
+                goto out;
+        slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1,
+                            GFP_ATOMIC);
+        if (slab_name == NULL)
+                goto out_free_hist;
+        sprintf(slab_name, dccp_li_hist_mask, name);
+        hist->dccplih_slab = kmem_cache_create(slab_name,
+                                             sizeof(struct dccp_li_hist_entry),
+                                               0, SLAB_HWCACHE_ALIGN,
+                                               NULL, NULL);
+        if (hist->dccplih_slab == NULL)
+                goto out_free_slab_name;
+out:
+        return hist;
+out_free_slab_name:
+        kfree(slab_name);
+out_free_hist:
+        kfree(hist);
+        hist = NULL;
+        goto out;
+}
+EXPORT_SYMBOL_GPL(dccp_li_hist_new);
+void dccp_li_hist_delete(struct dccp_li_hist *hist)
+{
+        const char* name = kmem_cache_name(hist->dccplih_slab);
+        kmem_cache_destroy(hist->dccplih_slab);
+        kfree(name);
+        kfree(hist);
+}
+EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
+void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
+{
+        struct dccp_li_hist_entry *entry, *next;
+        list_for_each_entry_safe(entry, next, list, dccplih_node) {
+                list_del_init(&entry->dccplih_node);
+                kmem_cache_free(hist->dccplih_slab, entry);
+        }
+}
+EXPORT_SYMBOL_GPL(dccp_li_hist_purge);
+/* Weights used to calculate loss event rate */
+/*
+ * These are integers as per section 8 of RFC3448. We can then divide by 4 *
+ * when we use it.
+ */
+static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = {
+        4, 4, 4, 4, 3, 2, 1, 1,
+};
+u32 dccp_li_hist_calc_i_mean(struct list_head *list)
+{
+        struct dccp_li_hist_entry *li_entry, *li_next;
+        int i = 0;
+        u32 i_tot;
+        u32 i_tot0 = 0;
+        u32 i_tot1 = 0;
+        u32 w_tot  = 0;
+        list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
+                if (i < DCCP_LI_HIST_IVAL_F_LENGTH) {
+                        i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
+                        w_tot  += dccp_li_hist_w[i];
+                }
+                if (i != 0)
+                        i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
+                if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
+                        break;
+        }
+        if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
+                return 0;
+        i_tot = max(i_tot0, i_tot1);
+        /* FIXME: Why do we do this? -Ian McDonald */
+        if (i_tot * 4 < w_tot)
+                i_tot = w_tot * 4;
+        return i_tot * 4 / w_tot;
+}
+EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
+struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+                                                     struct list_head *list,
+                                                     const u64 seq_loss,
+                                                     const u8 win_loss)
+{
+        struct dccp_li_hist_entry *tail = NULL, *entry;
+        int i;
+        for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) {
+                entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
+                if (entry == NULL) {
+                        dccp_li_hist_purge(hist, list);
+                        return NULL;
+                }
+                if (tail == NULL)
+                        tail = entry;
+                list_add(&entry->dccplih_node, list);
+        }
+        entry->dccplih_seqno     = seq_loss;
+        entry->dccplih_win_count = win_loss;
+        return tail;
+}
+EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
new file mode 100644
index 000000000000..13ad47ba1420
--- /dev/null
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -0,0 +1,61 @@
+#ifndef _DCCP_LI_HIST_
+#define _DCCP_LI_HIST_
+/*
+ *  net/dccp/ccids/lib/loss_interval.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ */
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#define DCCP_LI_HIST_IVAL_F_LENGTH  8
+struct dccp_li_hist {
+        kmem_cache_t *dccplih_slab;
+};
+extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
+extern void dccp_li_hist_delete(struct dccp_li_hist *hist);
+struct dccp_li_hist_entry {
+        struct list_head dccplih_node;
+        u64              dccplih_seqno:48,
+                         dccplih_win_count:4;
+        u32              dccplih_interval;
+};
+static inline struct dccp_li_hist_entry *
+                dccp_li_hist_entry_new(struct dccp_li_hist *hist,
+                                       const unsigned int __nocast prio)
+{
+        return kmem_cache_alloc(hist->dccplih_slab, prio);
+}
+static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
+                                             struct dccp_li_hist_entry *entry)
+{
+        if (entry != NULL)
+                kmem_cache_free(hist->dccplih_slab, entry);
+}
+extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
+                               struct list_head *list);
+extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
+extern struct dccp_li_hist_entry *
+                        dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+                                                  struct list_head *list,
+                                                  const u64 seq_loss,
+                                                  const u8 win_loss);
+#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
new file mode 100644
index 000000000000..d3f9d2053830
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -0,0 +1,398 @@
+/*
+ *  net/dccp/packet_history.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include "packet_history.h"
+struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
+{
+        struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+        static const char dccp_rx_hist_mask[] = "rx_hist_%s";
+        char *slab_name;
+        if (hist == NULL)
+                goto out;
+        slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
+                            GFP_ATOMIC);
+        if (slab_name == NULL)
+                goto out_free_hist;
+        sprintf(slab_name, dccp_rx_hist_mask, name);
+        hist->dccprxh_slab = kmem_cache_create(slab_name,
+                                             sizeof(struct dccp_rx_hist_entry),
+                                               0, SLAB_HWCACHE_ALIGN,
+                                               NULL, NULL);
+        if (hist->dccprxh_slab == NULL)
+                goto out_free_slab_name;
+out:
+        return hist;
+out_free_slab_name:
+        kfree(slab_name);
+out_free_hist:
+        kfree(hist);
+        hist = NULL;
+        goto out;
+}
+EXPORT_SYMBOL_GPL(dccp_rx_hist_new);
+void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
+{
+        const char* name = kmem_cache_name(hist->dccprxh_slab);
+        kmem_cache_destroy(hist->dccprxh_slab);
+        kfree(name);
+        kfree(hist);
+}
+EXPORT_SYMBOL_GPL(dccp_rx_hist_delete);
+void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list)
+{
+        struct dccp_rx_hist_entry *entry, *next;
+        list_for_each_entry_safe(entry, next, list, dccphrx_node) {
+                list_del_init(&entry->dccphrx_node);
+                kmem_cache_free(hist->dccprxh_slab, entry);
+        }
+}
+EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
+struct dccp_rx_hist_entry *
+                dccp_rx_hist_find_data_packet(const struct list_head *list)
+{
+        struct dccp_rx_hist_entry *entry, *packet = NULL;
+        list_for_each_entry(entry, list, dccphrx_node)
+                if (entry->dccphrx_type == DCCP_PKT_DATA ||
+                    entry->dccphrx_type == DCCP_PKT_DATAACK) {
+                        packet = entry;
+                        break;
+                }
+        return packet;
+}
+EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
+int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+                            struct list_head *rx_list,
+                            struct list_head *li_list,
+                            struct dccp_rx_hist_entry *packet)
+{
+        struct dccp_rx_hist_entry *entry, *next, *iter;
+        u8 num_later = 0;
+        iter = dccp_rx_hist_head(rx_list);
+        if (iter == NULL)
+                dccp_rx_hist_add_entry(rx_list, packet);
+        else {
+                const u64 seqno = packet->dccphrx_seqno;
+                if (after48(seqno, iter->dccphrx_seqno))
+                        dccp_rx_hist_add_entry(rx_list, packet);
+                else {
+                        if (dccp_rx_hist_entry_data_packet(iter))
+                                num_later = 1;
+                        list_for_each_entry_continue(iter, rx_list,
+                                                     dccphrx_node) {
+                                if (after48(seqno, iter->dccphrx_seqno)) {
+                                        dccp_rx_hist_add_entry(&iter->dccphrx_node,
+                                                               packet);
+                                        goto trim_history;
+                                }
+                                if (dccp_rx_hist_entry_data_packet(iter))
+                                        num_later++;
+                                if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
+                                        dccp_rx_hist_entry_delete(hist, packet);
+                                        return 1;
+                                }
+                        }
+                        if (num_later < TFRC_RECV_NUM_LATE_LOSS)
+                                dccp_rx_hist_add_entry(rx_list, packet);
+                        /*
+                         * FIXME: else what? should we destroy the packet
+                         * like above?
+                         */
+                }
+        }
+trim_history:
+        /*
+         * Trim history (remove all packets after the NUM_LATE_LOSS + 1
+         * data packets)
+         */
+        num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
+        if (!list_empty(li_list)) {
+                list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+                        if (num_later == 0) {
+                                list_del_init(&entry->dccphrx_node);
+                                dccp_rx_hist_entry_delete(hist, entry);
+                        } else if (dccp_rx_hist_entry_data_packet(entry))
+                                --num_later;
+                }
+        } else {
+                int step = 0;
+                u8 win_count = 0; /* Not needed, but lets shut up gcc */
+                int tmp;
+                /*
+                 * We have no loss interval history so we need at least one
+                 * rtt:s of data packets to approximate rtt.
+                 */
+                list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+                        if (num_later == 0) {
+                                switch (step) {
+                                case 0:
+                                        step = 1;
+                                        /* OK, find next data packet */
+                                        num_later = 1;
+                                        break;
+                                case 1:
+                                        step = 2;
+                                        /* OK, find next data packet */
+                                        num_later = 1;
+                                        win_count = entry->dccphrx_ccval;
+                                        break;
+                                case 2:
+                                        tmp = win_count - entry->dccphrx_ccval;
+                                        if (tmp < 0)
+                                                tmp += TFRC_WIN_COUNT_LIMIT;
+                                        if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
+                                                /*
+                                                 * We have found a packet older
+                                                 * than one rtt remove the rest
+                                                 */
+                                                step = 3;
+                                        } else /* OK, find next data packet */
+                                                num_later = 1;
+                                        break;
+                                case 3:
+                                        list_del_init(&entry->dccphrx_node);
+                                        dccp_rx_hist_entry_delete(hist, entry);
+                                        break;
+                                }
+                        } else if (dccp_rx_hist_entry_data_packet(entry))
+                                --num_later;
+                }
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
+u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
+                             struct list_head *li_list, u8 *win_loss)
+{
+        struct dccp_rx_hist_entry *entry, *next, *packet;
+        struct dccp_rx_hist_entry *a_loss = NULL;
+        struct dccp_rx_hist_entry *b_loss = NULL;
+        u64 seq_loss = DCCP_MAX_SEQNO + 1;
+        u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
+        list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
+                if (num_later == 0) {
+                        b_loss = entry;
+                        break;
+                } else if (dccp_rx_hist_entry_data_packet(entry))
+                        --num_later;
+        }
+        if (b_loss == NULL)
+                goto out;
+        num_later = 1;
+        list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
+                if (num_later == 0) {
+                        a_loss = entry;
+                        break;
+                } else if (dccp_rx_hist_entry_data_packet(entry))
+                        --num_later;
+        }
+        if (a_loss == NULL) {
+                if (list_empty(li_list)) {
+                        /* no loss event have occured yet */
+                        LIMIT_NETDEBUG("%s: TODO: find a lost data packet by "
+                                       "comparing to initial seqno\n",
+                                       __FUNCTION__);
+                        goto out;
+                } else {
+                        LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!",
+                                       __FUNCTION__);
+                        goto out;
+                }
+        }
+        /* Locate a lost data packet */
+        entry = packet = b_loss;
+        list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
+                u64 delta = dccp_delta_seqno(entry->dccphrx_seqno,
+                                             packet->dccphrx_seqno);
+                if (delta != 0) {
+                        if (dccp_rx_hist_entry_data_packet(packet))
+                                --delta;
+                        /*
+                         * FIXME: check this, probably this % usage is because
+                         * in earlier drafts the ndp count was just 8 bits
+                         * long, but now it cam be up to 24 bits long.
+                         */
+#if 0
+                        if (delta % DCCP_NDP_LIMIT !=
+                            (packet->dccphrx_ndp -
+                             entry->dccphrx_ndp) % DCCP_NDP_LIMIT)
+#endif
+                        if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) {
+                                seq_loss = entry->dccphrx_seqno;
+                                dccp_inc_seqno(&seq_loss);
+                        }
+                }
+                packet = entry;
+                if (packet == a_loss)
+                        break;
+        }
+out:
+        if (seq_loss != DCCP_MAX_SEQNO + 1)
+                *win_loss = a_loss->dccphrx_ccval;
+        else
+                *win_loss = 0; /* Paranoia */
+        return seq_loss;
+}
+EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss);
+struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
+{
+        struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+        static const char dccp_tx_hist_mask[] = "tx_hist_%s";
+        char *slab_name;
+        if (hist == NULL)
+                goto out;
+        slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1,
+                            GFP_ATOMIC);
+        if (slab_name == NULL)
+                goto out_free_hist;
+        sprintf(slab_name, dccp_tx_hist_mask, name);
+        hist->dccptxh_slab = kmem_cache_create(slab_name,
+                                             sizeof(struct dccp_tx_hist_entry),
+                                               0, SLAB_HWCACHE_ALIGN,
+                                               NULL, NULL);
+        if (hist->dccptxh_slab == NULL)
+                goto out_free_slab_name;
+out:
+        return hist;
+out_free_slab_name:
+        kfree(slab_name);
+out_free_hist:
+        kfree(hist);
+        hist = NULL;
+        goto out;
+}
+EXPORT_SYMBOL_GPL(dccp_tx_hist_new);
+void dccp_tx_hist_delete(struct dccp_tx_hist *hist)
+{
+        const char* name = kmem_cache_name(hist->dccptxh_slab);
+        kmem_cache_destroy(hist->dccptxh_slab);
+        kfree(name);
+        kfree(hist);
+}
+EXPORT_SYMBOL_GPL(dccp_tx_hist_delete);
+struct dccp_tx_hist_entry *
+        dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq)
+{
+        struct dccp_tx_hist_entry *packet = NULL, *entry;
+        list_for_each_entry(entry, list, dccphtx_node)
+                if (entry->dccphtx_seqno == seq) {
+                        packet = entry;
+                        break;
+                }
+        return packet;
+}
+EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
+void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
+                              struct list_head *list,
+                              struct dccp_tx_hist_entry *packet)
+{
+        struct dccp_tx_hist_entry *next;
+        list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) {
+                list_del_init(&packet->dccphtx_node);
+                dccp_tx_hist_entry_delete(hist, packet);
+        }
+}
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older);
+void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
+{
+        struct dccp_tx_hist_entry *entry, *next;
+        list_for_each_entry_safe(entry, next, list, dccphtx_node) {
+                list_del_init(&entry->dccphtx_node);
+                dccp_tx_hist_entry_delete(hist, entry);
+        }
+}
+EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
+MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+              "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
+MODULE_DESCRIPTION("DCCP TFRC library");
+MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
new file mode 100644
index 000000000000..fb90a91aa93d
--- /dev/null
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -0,0 +1,199 @@
+/*
+ *  net/dccp/packet_history.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _DCCP_PKT_HIST_
+#define _DCCP_PKT_HIST_
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include "../../dccp.h"
+/* Number of later packets received before one is considered lost */
+#define TFRC_RECV_NUM_LATE_LOSS  3
+#define TFRC_WIN_COUNT_PER_RTT   4
+#define TFRC_WIN_COUNT_LIMIT    16
+struct dccp_tx_hist_entry {
+        struct list_head dccphtx_node;
+        u64              dccphtx_seqno:48,
+                         dccphtx_ccval:4,
+                         dccphtx_sent:1;
+        u32              dccphtx_rtt;
+        struct timeval   dccphtx_tstamp;
+};
+struct dccp_rx_hist_entry {
+        struct list_head dccphrx_node;
+        u64              dccphrx_seqno:48,
+                         dccphrx_ccval:4,
+                         dccphrx_type:4;
+        u32              dccphrx_ndp; /* In fact it is from 8 to 24 bits */
+        struct timeval   dccphrx_tstamp;
+};
+struct dccp_tx_hist {
+        kmem_cache_t *dccptxh_slab;
+};
+extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name);
+extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist);
+struct dccp_rx_hist {
+        kmem_cache_t *dccprxh_slab;
+};
+extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name);
+extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist);
+extern struct dccp_rx_hist_entry *
+                dccp_rx_hist_find_data_packet(const struct list_head *list);
+static inline struct dccp_tx_hist_entry *
+                dccp_tx_hist_entry_new(struct dccp_tx_hist *hist,
+                                       const unsigned int __nocast prio)
+{
+        struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab,
+                                                            prio);
+        if (entry != NULL)
+                entry->dccphtx_sent = 0;
+        return entry;
+}
+static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
+                                             struct dccp_tx_hist_entry *entry)
+{
+        if (entry != NULL)
+                kmem_cache_free(hist->dccptxh_slab, entry);
+}
+extern struct dccp_tx_hist_entry *
+                        dccp_tx_hist_find_entry(const struct list_head *list,
+                                                const u64 seq);
+static inline void dccp_tx_hist_add_entry(struct list_head *list,
+                                          struct dccp_tx_hist_entry *entry)
+{
+        list_add(&entry->dccphtx_node, list);
+}
+extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
+                                     struct list_head *list,
+                                     struct dccp_tx_hist_entry *next);
+extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist,
+                               struct list_head *list);
+static inline struct dccp_tx_hist_entry *
+                dccp_tx_hist_head(struct list_head *list)
+{
+        struct dccp_tx_hist_entry *head = NULL;
+        if (!list_empty(list))
+                head = list_entry(list->next, struct dccp_tx_hist_entry,
+                                  dccphtx_node);
+        return head;
+}
+static inline struct dccp_rx_hist_entry *
+                     dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
+                                            const u32 ndp, 
+                                            const struct sk_buff *skb,
+                                            const unsigned int __nocast prio)
+{
+        struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab,
+                                                            prio);
+        if (entry != NULL) {
+                const struct dccp_hdr *dh = dccp_hdr(skb);
+                entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+                entry->dccphrx_ccval = dh->dccph_ccval;
+                entry->dccphrx_type  = dh->dccph_type;
+                entry->dccphrx_ndp   = ndp;
+                do_gettimeofday(&(entry->dccphrx_tstamp));
+        }
+        return entry;
+}
+static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
+                                             struct dccp_rx_hist_entry *entry)
+{
+        if (entry != NULL)
+                kmem_cache_free(hist->dccprxh_slab, entry);
+}
+extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
+                               struct list_head *list);
+static inline void dccp_rx_hist_add_entry(struct list_head *list,
+                                          struct dccp_rx_hist_entry *entry)
+{
+        list_add(&entry->dccphrx_node, list);
+}
+static inline struct dccp_rx_hist_entry *
+                dccp_rx_hist_head(struct list_head *list)
+{
+        struct dccp_rx_hist_entry *head = NULL;
+        if (!list_empty(list))
+                head = list_entry(list->next, struct dccp_rx_hist_entry,
+                                  dccphrx_node);
+        return head;
+}
+static inline int
+        dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry)
+{
+        return entry->dccphrx_type == DCCP_PKT_DATA ||
+               entry->dccphrx_type == DCCP_PKT_DATAACK;
+}
+extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+                                   struct list_head *rx_list,
+                                   struct list_head *li_list,
+                                   struct dccp_rx_hist_entry *packet);
+extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
+                                    struct list_head *li_list, u8 *win_loss);
+#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
new file mode 100644
index 000000000000..130c4c40cfe3
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -0,0 +1,22 @@
+#ifndef _TFRC_H_
+#define _TFRC_H_
+/*
+ *  net/dccp/ccids/lib/tfrc.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+#include <linux/types.h>
+extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
+extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
+#endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
new file mode 100644
index 000000000000..d2b5933b4510
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -0,0 +1,644 @@
+/*
+ *  net/dccp/ccids/lib/tfrc_equation.c
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/bug.h>
+#include <asm/div64.h>
+#include "tfrc.h"
+#define TFRC_CALC_X_ARRSIZE 500
+#define TFRC_CALC_X_SPLIT 50000
+/* equivalent to 0.05 */
+static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
+        {     37172,   8172 },
+        {     53499,  11567 },
+        {     66664,  14180 },
+        {     78298,  16388 },
+        {     89021,  18339 },
+        {     99147,  20108 },
+        {    108858,  21738 },
+        {    118273,  23260 },
+        {    127474,  24693 },
+        {    136520,  26052 },
+        {    145456,  27348 },
+        {    154316,  28589 },
+        {    163130,  29783 },
+        {    171919,  30935 },
+        {    180704,  32049 },
+        {    189502,  33130 },
+        {    198328,  34180 },
+        {    207194,  35202 },
+        {    216114,  36198 },
+        {    225097,  37172 },
+        {    234153,  38123 },
+        {    243294,  39055 },
+        {    252527,  39968 },
+        {    261861,  40864 },
+        {    271305,  41743 },
+        {    280866,  42607 },
+        {    290553,  43457 },
+        {    300372,  44293 },
+        {    310333,  45117 },
+        {    320441,  45929 },
+        {    330705,  46729 },
+        {    341131,  47518 },
+        {    351728,  48297 },
+        {    362501,  49066 },
+        {    373460,  49826 },
+        {    384609,  50577 },
+        {    395958,  51320 },
+        {    407513,  52054 },
+        {    419281,  52780 },
+        {    431270,  53499 },
+        {    443487,  54211 },
+        {    455940,  54916 },
+        {    468635,  55614 },
+        {    481581,  56306 },
+        {    494785,  56991 },
+        {    508254,  57671 },
+        {    521996,  58345 },
+        {    536019,  59014 },
+        {    550331,  59677 },
+        {    564939,  60335 },
+        {    579851,  60988 },
+        {    595075,  61636 },
+        {    610619,  62279 },
+        {    626491,  62918 },
+        {    642700,  63553 },
+        {    659253,  64183 },
+        {    676158,  64809 },
+        {    693424,  65431 },
+        {    711060,  66050 },
+        {    729073,  66664 },
+        {    747472,  67275 },
+        {    766266,  67882 },
+        {    785464,  68486 },
+        {    805073,  69087 },
+        {    825103,  69684 },
+        {    845562,  70278 },
+        {    866460,  70868 },
+        {    887805,  71456 },
+        {    909606,  72041 },
+        {    931873,  72623 },
+        {    954614,  73202 },
+        {    977839,  73778 },
+        {   1001557,  74352 },
+        {   1025777,  74923 },
+        {   1050508,  75492 },
+        {   1075761,  76058 },
+        {   1101544,  76621 },
+        {   1127867,  77183 },
+        {   1154739,  77741 },
+        {   1182172,  78298 },
+        {   1210173,  78852 },
+        {   1238753,  79405 },
+        {   1267922,  79955 },
+        {   1297689,  80503 },
+        {   1328066,  81049 },
+        {   1359060,  81593 },
+        {   1390684,  82135 },
+        {   1422947,  82675 },
+        {   1455859,  83213 },
+        {   1489430,  83750 },
+        {   1523671,  84284 },
+        {   1558593,  84817 },
+        {   1594205,  85348 },
+        {   1630518,  85878 },
+        {   1667543,  86406 },
+        {   1705290,  86932 },
+        {   1743770,  87457 },
+        {   1782994,  87980 },
+        {   1822973,  88501 },
+        {   1863717,  89021 },
+        {   1905237,  89540 },
+        {   1947545,  90057 },
+        {   1990650,  90573 },
+        {   2034566,  91087 },
+        {   2079301,  91600 },
+        {   2124869,  92111 },
+        {   2171279,  92622 },
+        {   2218543,  93131 },
+        {   2266673,  93639 },
+        {   2315680,  94145 },
+        {   2365575,  94650 },
+        {   2416371,  95154 },
+        {   2468077,  95657 },
+        {   2520707,  96159 },
+        {   2574271,  96660 },
+        {   2628782,  97159 },
+        {   2684250,  97658 },
+        {   2740689,  98155 },
+        {   2798110,  98651 },
+        {   2856524,  99147 },
+        {   2915944,  99641 },
+        {   2976382, 100134 },
+        {   3037850, 100626 },
+        {   3100360, 101117 },
+        {   3163924, 101608 },
+        {   3228554, 102097 },
+        {   3294263, 102586 },
+        {   3361063, 103073 },
+        {   3428966, 103560 },
+        {   3497984, 104045 },
+        {   3568131, 104530 },
+        {   3639419, 105014 },
+        {   3711860, 105498 },
+        {   3785467, 105980 },
+        {   3860253, 106462 },
+        {   3936229, 106942 },
+        {   4013410, 107422 },
+        {   4091808, 107902 },
+        {   4171435, 108380 },
+        {   4252306, 108858 },
+        {   4334431, 109335 },
+        {   4417825, 109811 },
+        {   4502501, 110287 },
+        {   4588472, 110762 },
+        {   4675750, 111236 },
+        {   4764349, 111709 },
+        {   4854283, 112182 },
+        {   4945564, 112654 },
+        {   5038206, 113126 },
+        {   5132223, 113597 },
+        {   5227627, 114067 },
+        {   5324432, 114537 },
+        {   5422652, 115006 },
+        {   5522299, 115474 },
+        {   5623389, 115942 },
+        {   5725934, 116409 },
+        {   5829948, 116876 },
+        {   5935446, 117342 },
+        {   6042439, 117808 },
+        {   6150943, 118273 },
+        {   6260972, 118738 },
+        {   6372538, 119202 },
+        {   6485657, 119665 },
+        {   6600342, 120128 },
+        {   6716607, 120591 },
+        {   6834467, 121053 },
+        {   6953935, 121514 },
+        {   7075025, 121976 },
+        {   7197752, 122436 },
+        {   7322131, 122896 },
+        {   7448175, 123356 },
+        {   7575898, 123815 },
+        {   7705316, 124274 },
+        {   7836442, 124733 },
+        {   7969291, 125191 },
+        {   8103877, 125648 },
+        {   8240216, 126105 },
+        {   8378321, 126562 },
+        {   8518208, 127018 },
+        {   8659890, 127474 },
+        {   8803384, 127930 },
+        {   8948702, 128385 },
+        {   9095861, 128840 },
+        {   9244875, 129294 },
+        {   9395760, 129748 },
+        {   9548529, 130202 },
+        {   9703198, 130655 },
+        {   9859782, 131108 },
+        {  10018296, 131561 },
+        {  10178755, 132014 },
+        {  10341174, 132466 },
+        {  10505569, 132917 },
+        {  10671954, 133369 },
+        {  10840345, 133820 },
+        {  11010757, 134271 },
+        {  11183206, 134721 },
+        {  11357706, 135171 },
+        {  11534274, 135621 },
+        {  11712924, 136071 },
+        {  11893673, 136520 },
+        {  12076536, 136969 },
+        {  12261527, 137418 },
+        {  12448664, 137867 },
+        {  12637961, 138315 },
+        {  12829435, 138763 },
+        {  13023101, 139211 },
+        {  13218974, 139658 },
+        {  13417071, 140106 },
+        {  13617407, 140553 },
+        {  13819999, 140999 },
+        {  14024862, 141446 },
+        {  14232012, 141892 },
+        {  14441465, 142339 },
+        {  14653238, 142785 },
+        {  14867346, 143230 },
+        {  15083805, 143676 },
+        {  15302632, 144121 },
+        {  15523842, 144566 },
+        {  15747453, 145011 },
+        {  15973479, 145456 },
+        {  16201939, 145900 },
+        {  16432847, 146345 },
+        {  16666221, 146789 },
+        {  16902076, 147233 },
+        {  17140429, 147677 },
+        {  17381297, 148121 },
+        {  17624696, 148564 },
+        {  17870643, 149007 },
+        {  18119154, 149451 },
+        {  18370247, 149894 },
+        {  18623936, 150336 },
+        {  18880241, 150779 },
+        {  19139176, 151222 },
+        {  19400759, 151664 },
+        {  19665007, 152107 },
+        {  19931936, 152549 },
+        {  20201564, 152991 },
+        {  20473907, 153433 },
+        {  20748982, 153875 },
+        {  21026807, 154316 },
+        {  21307399, 154758 },
+        {  21590773, 155199 },
+        {  21876949, 155641 },
+        {  22165941, 156082 },
+        {  22457769, 156523 },
+        {  22752449, 156964 },
+        {  23049999, 157405 },
+        {  23350435, 157846 },
+        {  23653774, 158287 },
+        {  23960036, 158727 },
+        {  24269236, 159168 },
+        {  24581392, 159608 },
+        {  24896521, 160049 },
+        {  25214642, 160489 },
+        {  25535772, 160929 },
+        {  25859927, 161370 },
+        {  26187127, 161810 },
+        {  26517388, 162250 },
+        {  26850728, 162690 },
+        {  27187165, 163130 },
+        {  27526716, 163569 },
+        {  27869400, 164009 },
+        {  28215234, 164449 },
+        {  28564236, 164889 },
+        {  28916423, 165328 },
+        {  29271815, 165768 },
+        {  29630428, 166208 },
+        {  29992281, 166647 },
+        {  30357392, 167087 },
+        {  30725779, 167526 },
+        {  31097459, 167965 },
+        {  31472452, 168405 },
+        {  31850774, 168844 },
+        {  32232445, 169283 },
+        {  32617482, 169723 },
+        {  33005904, 170162 },
+        {  33397730, 170601 },
+        {  33792976, 171041 },
+        {  34191663, 171480 },
+        {  34593807, 171919 },
+        {  34999428, 172358 },
+        {  35408544, 172797 },
+        {  35821174, 173237 },
+        {  36237335, 173676 },
+        {  36657047, 174115 },
+        {  37080329, 174554 },
+        {  37507197, 174993 },
+        {  37937673, 175433 },
+        {  38371773, 175872 },
+        {  38809517, 176311 },
+        {  39250924, 176750 },
+        {  39696012, 177190 },
+        {  40144800, 177629 },
+        {  40597308, 178068 },
+        {  41053553, 178507 },
+        {  41513554, 178947 },
+        {  41977332, 179386 },
+        {  42444904, 179825 },
+        {  42916290, 180265 },
+        {  43391509, 180704 },
+        {  43870579, 181144 },
+        {  44353520, 181583 },
+        {  44840352, 182023 },
+        {  45331092, 182462 },
+        {  45825761, 182902 },
+        {  46324378, 183342 },
+        {  46826961, 183781 },
+        {  47333531, 184221 },
+        {  47844106, 184661 },
+        {  48358706, 185101 },
+        {  48877350, 185541 },
+        {  49400058, 185981 },
+        {  49926849, 186421 },
+        {  50457743, 186861 },
+        {  50992759, 187301 },
+        {  51531916, 187741 },
+        {  52075235, 188181 },
+        {  52622735, 188622 },
+        {  53174435, 189062 },
+        {  53730355, 189502 },
+        {  54290515, 189943 },
+        {  54854935, 190383 },
+        {  55423634, 190824 },
+        {  55996633, 191265 },
+        {  56573950, 191706 },
+        {  57155606, 192146 },
+        {  57741621, 192587 },
+        {  58332014, 193028 },
+        {  58926806, 193470 },
+        {  59526017, 193911 },
+        {  60129666, 194352 },
+        {  60737774, 194793 },
+        {  61350361, 195235 },
+        {  61967446, 195677 },
+        {  62589050, 196118 },
+        {  63215194, 196560 },
+        {  63845897, 197002 },
+        {  64481179, 197444 },
+        {  65121061, 197886 },
+        {  65765563, 198328 },
+        {  66414705, 198770 },
+        {  67068508, 199213 },
+        {  67726992, 199655 },
+        {  68390177, 200098 },
+        {  69058085, 200540 },
+        {  69730735, 200983 },
+        {  70408147, 201426 },
+        {  71090343, 201869 },
+        {  71777343, 202312 },
+        {  72469168, 202755 },
+        {  73165837, 203199 },
+        {  73867373, 203642 },
+        {  74573795, 204086 },
+        {  75285124, 204529 },
+        {  76001380, 204973 },
+        {  76722586, 205417 },
+        {  77448761, 205861 },
+        {  78179926, 206306 },
+        {  78916102, 206750 },
+        {  79657310, 207194 },
+        {  80403571, 207639 },
+        {  81154906, 208084 },
+        {  81911335, 208529 },
+        {  82672880, 208974 },
+        {  83439562, 209419 },
+        {  84211402, 209864 },
+        {  84988421, 210309 },
+        {  85770640, 210755 },
+        {  86558080, 211201 },
+        {  87350762, 211647 },
+        {  88148708, 212093 },
+        {  88951938, 212539 },
+        {  89760475, 212985 },
+        {  90574339, 213432 },
+        {  91393551, 213878 },
+        {  92218133, 214325 },
+        {  93048107, 214772 },
+        {  93883493, 215219 },
+        {  94724314, 215666 },
+        {  95570590, 216114 },
+        {  96422343, 216561 },
+        {  97279594, 217009 },
+        {  98142366, 217457 },
+        {  99010679, 217905 },
+        {  99884556, 218353 },
+        { 100764018, 218801 },
+        { 101649086, 219250 },
+        { 102539782, 219698 },
+        { 103436128, 220147 },
+        { 104338146, 220596 },
+        { 105245857, 221046 },
+        { 106159284, 221495 },
+        { 107078448, 221945 },
+        { 108003370, 222394 },
+        { 108934074, 222844 },
+        { 109870580, 223294 },
+        { 110812910, 223745 },
+        { 111761087, 224195 },
+        { 112715133, 224646 },
+        { 113675069, 225097 },
+        { 114640918, 225548 },
+        { 115612702, 225999 },
+        { 116590442, 226450 },
+        { 117574162, 226902 },
+        { 118563882, 227353 },
+        { 119559626, 227805 },
+        { 120561415, 228258 },
+        { 121569272, 228710 },
+        { 122583219, 229162 },
+        { 123603278, 229615 },
+        { 124629471, 230068 },
+        { 125661822, 230521 },
+        { 126700352, 230974 },
+        { 127745083, 231428 },
+        { 128796039, 231882 },
+        { 129853241, 232336 },
+        { 130916713, 232790 },
+        { 131986475, 233244 },
+        { 133062553, 233699 },
+        { 134144966, 234153 },
+        { 135233739, 234608 },
+        { 136328894, 235064 },
+        { 137430453, 235519 },
+        { 138538440, 235975 },
+        { 139652876, 236430 },
+        { 140773786, 236886 },
+        { 141901190, 237343 },
+        { 143035113, 237799 },
+        { 144175576, 238256 },
+        { 145322604, 238713 },
+        { 146476218, 239170 },
+        { 147636442, 239627 },
+        { 148803298, 240085 },
+        { 149976809, 240542 },
+        { 151156999, 241000 },
+        { 152343890, 241459 },
+        { 153537506, 241917 },
+        { 154737869, 242376 },
+        { 155945002, 242835 },
+        { 157158929, 243294 },
+        { 158379673, 243753 },
+        { 159607257, 244213 },
+        { 160841704, 244673 },
+        { 162083037, 245133 },
+        { 163331279, 245593 },
+        { 164586455, 246054 },
+        { 165848586, 246514 },
+        { 167117696, 246975 },
+        { 168393810, 247437 },
+        { 169676949, 247898 },
+        { 170967138, 248360 },
+        { 172264399, 248822 },
+        { 173568757, 249284 },
+        { 174880235, 249747 },
+        { 176198856, 250209 },
+        { 177524643, 250672 },
+        { 178857621, 251136 },
+        { 180197813, 251599 },
+        { 181545242, 252063 },
+        { 182899933, 252527 },
+        { 184261908, 252991 },
+        { 185631191, 253456 },
+        { 187007807, 253920 },
+        { 188391778, 254385 },
+        { 189783129, 254851 },
+        { 191181884, 255316 },
+        { 192588065, 255782 },
+        { 194001698, 256248 },
+        { 195422805, 256714 },
+        { 196851411, 257181 },
+        { 198287540, 257648 },
+        { 199731215, 258115 },
+        { 201182461, 258582 },
+        { 202641302, 259050 },
+        { 204107760, 259518 },
+        { 205581862, 259986 },
+        { 207063630, 260454 },
+        { 208553088, 260923 },
+        { 210050262, 261392 },
+        { 211555174, 261861 },
+        { 213067849, 262331 },
+        { 214588312, 262800 },
+        { 216116586, 263270 },
+        { 217652696, 263741 },
+        { 219196666, 264211 },
+        { 220748520, 264682 },
+        { 222308282, 265153 },
+        { 223875978, 265625 },
+        { 225451630, 266097 },
+        { 227035265, 266569 },
+        { 228626905, 267041 },
+        { 230226576, 267514 },
+        { 231834302, 267986 },
+        { 233450107, 268460 },
+        { 235074016, 268933 },
+        { 236706054, 269407 },
+        { 238346244, 269881 },
+        { 239994613, 270355 },
+        { 241651183, 270830 },
+        { 243315981, 271305 }
+};
+/* Calculate the send rate as per section 3.1 of RFC3448
+ 
+Returns send rate in bytes per second
+Integer maths and lookups are used as not allowed floating point in kernel
+The function for Xcalc as per section 3.1 of RFC3448 is:
+X =                            s
+     -------------------------------------------------------------
+     R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
+where 
+X is the trasmit rate in bytes/second
+s is the packet size in bytes
+R is the round trip time in seconds
+p is the loss event rate, between 0 and 1.0, of the number of loss events 
+  as a fraction of the number of packets transmitted
+t_RTO is the TCP retransmission timeout value in seconds
+b is the number of packets acknowledged by a single TCP acknowledgement
+we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
+X =                            s
+     -----------------------------------------------------------------------
+     R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
+which we can break down into:
+X =     s
+     --------
+     R * f(p)
+where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
+Function parameters:
+s - bytes
+R - RTT in usecs
+p - loss rate (decimal fraction multiplied by 1,000,000)
+Returns Xcalc in bytes per second
+DON'T alter this code unless you run test cases against it as the code
+has been manipulated to stop underflow/overlow.
+*/
+u32 tfrc_calc_x(u16 s, u32 R, u32 p)
+{
+        int index;
+        u32 f;
+        u64 tmp1, tmp2;
+        if (p < TFRC_CALC_X_SPLIT)
+                index = (p / (TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE)) - 1;
+        else
+                index = (p / (1000000 / TFRC_CALC_X_ARRSIZE)) - 1;
+        if (index < 0)
+                /* p should be 0 unless there is a bug in my code */
+                index = 0;
+        if (R == 0)
+                R = 1; /* RTT can't be zero or else divide by zero */
+        BUG_ON(index >= TFRC_CALC_X_ARRSIZE);
+        if (p >= TFRC_CALC_X_SPLIT)
+                f = tfrc_calc_x_lookup[index][0];
+        else
+                f = tfrc_calc_x_lookup[index][1];
+        tmp1 = ((u64)s * 100000000);
+        tmp2 = ((u64)R * (u64)f);
+        do_div(tmp2, 10000);
+        do_div(tmp1, tmp2); 
+        /* Don't alter above math unless you test due to overflow on 32 bit */
+        return (u32)tmp1; 
+}
+EXPORT_SYMBOL_GPL(tfrc_calc_x);
+/*
+ * args: fvalue - function value to match
+ * returns: p closest to that value
+ *
+ * both fvalue and p are multiplied by 1,000,000 to use ints
+ */
+u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
+{
+        int ctr = 0;
+        int small;
+        if (fvalue < tfrc_calc_x_lookup[0][1])
+                return 0;
+        if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1])
+                small = 1;
+        else if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0])
+                return 1000000;
+        else
+                small = 0;
+        while (fvalue > tfrc_calc_x_lookup[ctr][small])
+                ctr++;
+        if (small)
+                return TFRC_CALC_X_SPLIT * ctr / TFRC_CALC_X_ARRSIZE;
+        else
+                return 1000000 * ctr / TFRC_CALC_X_ARRSIZE;
+}
+EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
new file mode 100644
index 000000000000..33456c0d5937
--- /dev/null
+++ b/net/dccp/dccp.h
@@ -0,0 +1,493 @@
+#ifndef _DCCP_H
+#define _DCCP_H
+/*
+ *  net/dccp/dccp.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *
+ *      This program is free software; you can redistribute it and/or modify it
+ *      under the terms of the GNU General Public License version 2 as
+ *      published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <net/snmp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#ifdef CONFIG_IP_DCCP_DEBUG
+extern int dccp_debug;
+#define dccp_pr_debug(format, a...) \
+        do { if (dccp_debug) \
+                printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
+        } while (0)
+#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) \
+                                             printk(format, ##a); } while (0)
+#else
+#define dccp_pr_debug(format, a...)
+#define dccp_pr_debug_cat(format, a...)
+#endif
+extern struct inet_hashinfo dccp_hashinfo;
+extern atomic_t dccp_orphan_count;
+extern int dccp_tw_count;
+extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
+extern void dccp_time_wait(struct sock *sk, int state, int timeo);
+/* FIXME: Right size this */
+#define DCCP_MAX_OPT_LEN 128
+#define DCCP_MAX_PACKET_HDR 32
+#define MAX_DCCP_HEADER  (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
+#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
+                                     * state, about 60 seconds */
+/* draft-ietf-dccp-spec-11.txt initial RTO value */
+#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
+/* Maximal interval between probes for local resources.  */
+#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
+#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+extern struct proto dccp_v4_prot;
+/* is seq1 < seq2 ? */
+static inline int before48(const u64 seq1, const u64 seq2)
+{
+        return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
+}
+/* is seq1 > seq2 ? */
+static inline int after48(const u64 seq1, const u64 seq2)
+{
+        return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
+}
+/* is seq2 <= seq1 <= seq3 ? */
+static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
+{
+        return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
+}
+static inline u64 max48(const u64 seq1, const u64 seq2)
+{
+        return after48(seq1, seq2) ? seq1 : seq2;
+}
+enum {
+        DCCP_MIB_NUM = 0,
+        DCCP_MIB_ACTIVEOPENS,                   /* ActiveOpens */
+        DCCP_MIB_ESTABRESETS,                   /* EstabResets */
+        DCCP_MIB_CURRESTAB,                     /* CurrEstab */
+        DCCP_MIB_OUTSEGS,                       /* OutSegs */ 
+        DCCP_MIB_OUTRSTS,
+        DCCP_MIB_ABORTONTIMEOUT,
+        DCCP_MIB_TIMEOUTS,
+        DCCP_MIB_ABORTFAILED,
+        DCCP_MIB_PASSIVEOPENS,
+        DCCP_MIB_ATTEMPTFAILS,
+        DCCP_MIB_OUTDATAGRAMS,
+        DCCP_MIB_INERRS,
+        DCCP_MIB_OPTMANDATORYERROR,
+        DCCP_MIB_INVALIDOPT,
+        __DCCP_MIB_MAX
+};
+#define DCCP_MIB_MAX    __DCCP_MIB_MAX
+struct dccp_mib {
+        unsigned long   mibs[DCCP_MIB_MAX];
+} __SNMP_MIB_ALIGN__;
+DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
+#define DCCP_INC_STATS(field)       SNMP_INC_STATS(dccp_statistics, field)
+#define DCCP_INC_STATS_BH(field)    SNMP_INC_STATS_BH(dccp_statistics, field)
+#define DCCP_INC_STATS_USER(field)  SNMP_INC_STATS_USER(dccp_statistics, field)
+#define DCCP_DEC_STATS(field)       SNMP_DEC_STATS(dccp_statistics, field)
+#define DCCP_ADD_STATS_BH(field, val) \
+                        SNMP_ADD_STATS_BH(dccp_statistics, field, val)
+#define DCCP_ADD_STATS_USER(field, val) \
+                        SNMP_ADD_STATS_USER(dccp_statistics, field, val)
+extern int  dccp_transmit_skb(struct sock *sk, struct sk_buff *skb);
+extern int  dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
+extern int dccp_send_response(struct sock *sk);
+extern void dccp_send_ack(struct sock *sk);
+extern void dccp_send_delayed_ack(struct sock *sk);
+extern void dccp_send_sync(struct sock *sk, const u64 seq,
+                           const enum dccp_pkt_type pkt_type);
+extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo);
+extern void dccp_write_space(struct sock *sk);
+extern void dccp_init_xmit_timers(struct sock *sk);
+static inline void dccp_clear_xmit_timers(struct sock *sk)
+{
+        inet_csk_clear_xmit_timers(sk);
+}
+extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
+extern const char *dccp_packet_name(const int type);
+extern const char *dccp_state_name(const int state);
+static inline void dccp_set_state(struct sock *sk, const int state)
+{
+        const int oldstate = sk->sk_state;
+        dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
+                      dccp_role(sk), sk,
+                      dccp_state_name(oldstate), dccp_state_name(state));
+        WARN_ON(state == oldstate);
+        switch (state) {
+        case DCCP_OPEN:
+                if (oldstate != DCCP_OPEN)
+                        DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
+                break;
+        case DCCP_CLOSED:
+                if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
+                        DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
+                sk->sk_prot->unhash(sk);
+                if (inet_csk(sk)->icsk_bind_hash != NULL &&
+                    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+                        inet_put_port(&dccp_hashinfo, sk);
+                /* fall through */
+        default:
+                if (oldstate == DCCP_OPEN)
+                        DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
+        }
+        /* Change state AFTER socket is unhashed to avoid closed
+         * socket sitting in hash tables.
+         */
+        sk->sk_state = state;
+}
+static inline void dccp_done(struct sock *sk)
+{
+        dccp_set_state(sk, DCCP_CLOSED);
+        dccp_clear_xmit_timers(sk);
+        sk->sk_shutdown = SHUTDOWN_MASK;
+        if (!sock_flag(sk, SOCK_DEAD))
+                sk->sk_state_change(sk);
+        else
+                inet_csk_destroy_sock(sk);
+}
+static inline void dccp_openreq_init(struct request_sock *req,
+                                     struct dccp_sock *dp,
+                                     struct sk_buff *skb)
+{
+        /*
+         * FIXME: fill in the other req fields from the DCCP options
+         * received
+         */
+        inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
+        inet_rsk(req)->acked    = 0;
+        req->rcv_wnd = 0;
+}
+extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+extern struct sock *dccp_create_openreq_child(struct sock *sk,
+                                              const struct request_sock *req,
+                                              const struct sk_buff *skb);
+extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
+extern void dccp_v4_err(struct sk_buff *skb, u32);
+extern int dccp_v4_rcv(struct sk_buff *skb);
+extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
+                                              struct sk_buff *skb,
+                                              struct request_sock *req,
+                                              struct dst_entry *dst);
+extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+                                   struct request_sock *req,
+                                   struct request_sock **prev);
+extern int dccp_child_process(struct sock *parent, struct sock *child,
+                              struct sk_buff *skb);
+extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+                                  struct dccp_hdr *dh, unsigned len);
+extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+                                const struct dccp_hdr *dh, const unsigned len);
+extern void             dccp_close(struct sock *sk, long timeout);
+extern struct sk_buff   *dccp_make_response(struct sock *sk,
+                                            struct dst_entry *dst,
+                                            struct request_sock *req);
+extern struct sk_buff   *dccp_make_reset(struct sock *sk,
+                                         struct dst_entry *dst,
+                                         enum dccp_reset_codes code);
+extern int         dccp_connect(struct sock *sk);
+extern int         dccp_disconnect(struct sock *sk, int flags);
+extern int         dccp_getsockopt(struct sock *sk, int level, int optname,
+                                   char __user *optval, int __user *optlen);
+extern int         dccp_setsockopt(struct sock *sk, int level, int optname,
+                                   char __user *optval, int optlen);
+extern int         dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int         dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
+                                struct msghdr *msg, size_t size);
+extern int         dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
+                                struct msghdr *msg, size_t len, int nonblock,
+                                int flags, int *addr_len);
+extern void        dccp_shutdown(struct sock *sk, int how);
+extern int         dccp_v4_checksum(const struct sk_buff *skb,
+                                    const u32 saddr, const u32 daddr);
+extern int         dccp_v4_send_reset(struct sock *sk,
+                                      enum dccp_reset_codes code);
+extern void        dccp_send_close(struct sock *sk, const int active);
+struct dccp_skb_cb {
+        __u8 dccpd_type;
+        __u8 dccpd_reset_code;
+        __u8 dccpd_service;
+        __u8 dccpd_ccval;
+        __u64 dccpd_seq;
+        __u64 dccpd_ack_seq;
+        int  dccpd_opt_len;
+};
+#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
+static inline int dccp_non_data_packet(const struct sk_buff *skb)
+{
+        const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
+        return type == DCCP_PKT_ACK      ||
+               type == DCCP_PKT_CLOSE    ||
+               type == DCCP_PKT_CLOSEREQ ||
+               type == DCCP_PKT_RESET    ||
+               type == DCCP_PKT_SYNC     ||
+               type == DCCP_PKT_SYNCACK;
+}
+static inline int dccp_packet_without_ack(const struct sk_buff *skb)
+{
+        const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
+        return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
+}
+#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
+#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+        if (value > DCCP_MAX_SEQNO)
+                value -= DCCP_MAX_SEQNO + 1;
+        *seqno = value;
+}
+static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
+{
+        return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
+}
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+        if (++*seqno > DCCP_MAX_SEQNO)
+                *seqno = 0;
+}
+static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
+{
+        struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh +
+                                                           sizeof(*dh));
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+        dh->dccph_seq      = htonl((gss >> 32)) >> 8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+        dh->dccph_seq      = htonl((gss >> 32));
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+        dhx->dccph_seq_low = htonl(gss & 0xffffffff);
+}
+static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
+                                    const u64 gsr)
+{
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+        dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+        dhack->dccph_ack_nr_high = htonl((gsr >> 32));
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+        dhack->dccph_ack_nr_low  = htonl(gsr & 0xffffffff);
+}
+static inline void dccp_update_gsr(struct sock *sk, u64 seq)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        dp->dccps_gsr = seq;
+        dccp_set_seqno(&dp->dccps_swl,
+                       (dp->dccps_gsr + 1 -
+                        (dp->dccps_options.dccpo_sequence_window / 4)));
+        dccp_set_seqno(&dp->dccps_swh,
+                       (dp->dccps_gsr +
+                        (3 * dp->dccps_options.dccpo_sequence_window) / 4));
+}
+static inline void dccp_update_gss(struct sock *sk, u64 seq)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        dp->dccps_awh = dp->dccps_gss = seq;
+        dccp_set_seqno(&dp->dccps_awl,
+                       (dp->dccps_gss -
+                        dp->dccps_options.dccpo_sequence_window + 1));
+}
+extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+extern void dccp_insert_option_elapsed_time(struct sock *sk,
+                                            struct sk_buff *skb,
+                                            u32 elapsed_time);
+extern void dccp_insert_option_timestamp(struct sock *sk,
+                                         struct sk_buff *skb);
+extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+                               unsigned char option,
+                               const void *value, unsigned char len);
+extern struct socket *dccp_ctl_socket;
+#define DCCP_ACKPKTS_STATE_RECEIVED     0
+#define DCCP_ACKPKTS_STATE_ECN_MARKED   (1 << 6)
+#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6)
+#define DCCP_ACKPKTS_STATE_MASK         0xC0 /* 11000000 */
+#define DCCP_ACKPKTS_LEN_MASK           0x3F /* 00111111 */
+/** struct dccp_ackpkts - acknowledgeable packets
+ *
+ * This data structure is the one defined in the DCCP draft
+ * Appendix A.
+ *
+ * @dccpap_buf_head - circular buffer head
+ * @dccpap_buf_tail - circular buffer tail
+ * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the
+ *                     buffer (i.e. %dccpap_buf_head)
+ * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
+ *                     by the buffer with State 0
+ *
+ * Additionally, the HC-Receiver must keep some information about the
+ * Ack Vectors it has recently sent. For each packet sent carrying an
+ * Ack Vector, it remembers four variables:
+ *
+ * @dccpap_ack_seqno - the Sequence Number used for the packet
+ *                     (HC-Receiver seqno)
+ * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
+ * @dccpap_ack_ackno - the Acknowledgement Number used for the packet
+ *                     (HC-Sender seqno)
+ * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ *
+ * @dccpap_buf_len - circular buffer length
+ * @dccpap_time         - the time in usecs
+ * @dccpap_buf - circular buffer of acknowledgeable packets
+ */
+struct dccp_ackpkts {
+        unsigned int            dccpap_buf_head;
+        unsigned int            dccpap_buf_tail;
+        u64                     dccpap_buf_ackno;
+        u64                     dccpap_ack_seqno;
+        u64                     dccpap_ack_ackno;
+        unsigned int            dccpap_ack_ptr;
+        unsigned int            dccpap_buf_vector_len;
+        unsigned int            dccpap_ack_vector_len;
+        unsigned int            dccpap_buf_len;
+        struct timeval          dccpap_time;
+        u8                      dccpap_buf_nonce;
+        u8                      dccpap_ack_nonce;
+        u8                      dccpap_buf[0];
+};
+extern struct dccp_ackpkts *
+                dccp_ackpkts_alloc(unsigned int len,
+                                  const unsigned int __nocast priority);
+extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
+extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state);
+extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
+                                         struct sock *sk, u64 ackno);
+static inline suseconds_t timeval_usecs(const struct timeval *tv)
+{
+        return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
+}
+static inline suseconds_t timeval_delta(const struct timeval *large,
+                                        const struct timeval *small)
+{
+        time_t      secs  = large->tv_sec  - small->tv_sec;
+        suseconds_t usecs = large->tv_usec - small->tv_usec;
+        if (usecs < 0) {
+                secs--;
+                usecs += USEC_PER_SEC;
+        }
+        return secs * USEC_PER_SEC + usecs;
+}
+static inline void timeval_add_usecs(struct timeval *tv,
+                                     const suseconds_t usecs)
+{
+        tv->tv_usec += usecs;
+        while (tv->tv_usec >= USEC_PER_SEC) {
+                tv->tv_sec++;
+                tv->tv_usec -= USEC_PER_SEC;
+        }
+}
+static inline void timeval_sub_usecs(struct timeval *tv,
+                                     const suseconds_t usecs)
+{
+        tv->tv_usec -= usecs;
+        while (tv->tv_usec < 0) {
+                tv->tv_sec--;
+                tv->tv_usec += USEC_PER_SEC;
+        }
+}
+/*
+ * Returns the difference in usecs between timeval
+ * passed in and current time
+ */
+static inline suseconds_t timeval_now_delta(const struct timeval *tv)
+{
+        struct timeval now;
+        do_gettimeofday(&now);
+        return timeval_delta(&now, tv);
+}
+#ifdef CONFIG_IP_DCCP_DEBUG
+extern void dccp_ackvector_print(const u64 ackno,
+                                 const unsigned char *vector, int len);
+extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
+#else
+static inline void dccp_ackvector_print(const u64 ackno,
+                                        const unsigned char *vector,
+                                        int len) { }
+static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
+#endif
+#endif /* _DCCP_H */
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
new file mode 100644
index 000000000000..f675d8e642d3
--- /dev/null
+++ b/net/dccp/diag.c
@@ -0,0 +1,71 @@
+/*
+ *  net/dccp/diag.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@mandriva.com>
+ *
+ *      This program is free software; you can redistribute it and/or modify it
+ *      under the terms of the GNU General Public License version 2 as
+ *      published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/inet_diag.h>
+#include "ccid.h"
+#include "dccp.h"
+static void dccp_get_info(struct sock *sk, struct tcp_info *info)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        memset(info, 0, sizeof(*info));
+        info->tcpi_state        = sk->sk_state;
+        info->tcpi_retransmits  = icsk->icsk_retransmits;
+        info->tcpi_probes       = icsk->icsk_probes_out;
+        info->tcpi_backoff      = icsk->icsk_backoff;
+        info->tcpi_pmtu         = dp->dccps_pmtu_cookie;
+        if (dp->dccps_options.dccpo_send_ack_vector)
+                info->tcpi_options |= TCPI_OPT_SACK;
+        ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
+        ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
+}
+static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+                               void *_info)
+{
+        r->idiag_rqueue = r->idiag_wqueue = 0;
+        if (_info != NULL)
+                dccp_get_info(sk, _info);
+}
+static struct inet_diag_handler dccp_diag_handler = {
+        .idiag_hashinfo  = &dccp_hashinfo,
+        .idiag_get_info  = dccp_diag_get_info,
+        .idiag_type      = DCCPDIAG_GETSOCK,
+        .idiag_info_size = sizeof(struct tcp_info),
+};
+static int __init dccp_diag_init(void)
+{
+        return inet_diag_register(&dccp_diag_handler);
+}
+static void __exit dccp_diag_fini(void)
+{
+        inet_diag_unregister(&dccp_diag_handler);
+}
+module_init(dccp_diag_init);
+module_exit(dccp_diag_fini);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
+MODULE_DESCRIPTION("DCCP inet_diag handler");
diff --git a/net/dccp/input.c b/net/dccp/input.c
new file mode 100644
index 000000000000..ef29cef1dafe
--- /dev/null
+++ b/net/dccp/input.c
@@ -0,0 +1,600 @@
+/*
+ *  net/dccp/input.c
+ * 
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include "ccid.h"
+#include "dccp.h"
+static void dccp_fin(struct sock *sk, struct sk_buff *skb)
+{
+        sk->sk_shutdown |= RCV_SHUTDOWN;
+        sock_set_flag(sk, SOCK_DONE);
+        __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
+        __skb_queue_tail(&sk->sk_receive_queue, skb);
+        skb_set_owner_r(skb, sk);
+        sk->sk_data_ready(sk, 0);
+}
+static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
+{
+        dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
+        dccp_fin(sk, skb);
+        dccp_set_state(sk, DCCP_CLOSED);
+        sk_wake_async(sk, 1, POLL_HUP);
+}
+static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
+{
+        /*
+         *   Step 7: Check for unexpected packet types
+         *      If (S.is_server and P.type == CloseReq)
+         *        Send Sync packet acknowledging P.seqno
+         *        Drop packet and return
+         */
+        if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
+                dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
+                return;
+        }
+        dccp_set_state(sk, DCCP_CLOSING);
+        dccp_send_close(sk, 0);
+}
+static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        if (dp->dccps_options.dccpo_send_ack_vector)
+                dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
+                                             DCCP_SKB_CB(skb)->dccpd_ack_seq);
+}
+static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
+{
+        const struct dccp_hdr *dh = dccp_hdr(skb);
+        struct dccp_sock *dp = dccp_sk(sk);
+        u64 lswl, lawl;
+        /*
+         *   Step 5: Prepare sequence numbers for Sync
+         *     If P.type == Sync or P.type == SyncAck,
+         *        If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
+         *           / * P is valid, so update sequence number variables
+         *               accordingly.  After this update, P will pass the tests
+         *               in Step 6.  A SyncAck is generated if necessary in
+         *               Step 15 * /
+         *           Update S.GSR, S.SWL, S.SWH
+         *        Otherwise,
+         *           Drop packet and return
+         */
+        if (dh->dccph_type == DCCP_PKT_SYNC || 
+            dh->dccph_type == DCCP_PKT_SYNCACK) {
+                if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                              dp->dccps_awl, dp->dccps_awh) &&
+                    !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
+                        dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+                else
+                        return -1;
+        }
+        
+        /*
+         *   Step 6: Check sequence numbers
+         *      Let LSWL = S.SWL and LAWL = S.AWL
+         *      If P.type == CloseReq or P.type == Close or P.type == Reset,
+         *        LSWL := S.GSR + 1, LAWL := S.GAR
+         *      If LSWL <= P.seqno <= S.SWH
+         *           and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
+         *        Update S.GSR, S.SWL, S.SWH
+         *        If P.type != Sync,
+         *           Update S.GAR
+         *      Otherwise,
+         *        Send Sync packet acknowledging P.seqno
+         *        Drop packet and return
+         */
+        lswl = dp->dccps_swl;
+        lawl = dp->dccps_awl;
+        if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
+            dh->dccph_type == DCCP_PKT_CLOSE ||
+            dh->dccph_type == DCCP_PKT_RESET) {
+                lswl = dp->dccps_gsr;
+                dccp_inc_seqno(&lswl);
+                lawl = dp->dccps_gar;
+        }
+        if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
+            (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
+             between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                       lawl, dp->dccps_awh))) {
+                dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+                if (dh->dccph_type != DCCP_PKT_SYNC &&
+                    (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
+                     DCCP_PKT_WITHOUT_ACK_SEQ))
+                        dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
+        } else {
+                LIMIT_NETDEBUG(KERN_WARNING "DCCP: Step 6 failed for %s packet, "
+                                            "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
+                                            "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
+                                            "sending SYNC...\n",
+                               dccp_packet_name(dh->dccph_type),
+                               (unsigned long long) lswl,
+                               (unsigned long long)
+                               DCCP_SKB_CB(skb)->dccpd_seq,
+                               (unsigned long long) dp->dccps_swh,
+                               (DCCP_SKB_CB(skb)->dccpd_ack_seq ==
+                                DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists",
+                               (unsigned long long) lawl,
+                               (unsigned long long)
+                               DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                               (unsigned long long) dp->dccps_awh);
+                dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
+                return -1;
+        }
+        return 0;
+}
+int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+                         const struct dccp_hdr *dh, const unsigned len)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        if (dccp_check_seqno(sk, skb))
+                goto discard;
+        if (dccp_parse_options(sk, skb))
+                goto discard;
+        if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+                dccp_event_ack_recv(sk, skb);
+        /*
+         * FIXME: check ECN to see if we should use
+         * DCCP_ACKPKTS_STATE_ECN_MARKED
+         */
+        if (dp->dccps_options.dccpo_send_ack_vector) {
+                struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+                if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+                                     DCCP_SKB_CB(skb)->dccpd_seq,
+                                     DCCP_ACKPKTS_STATE_RECEIVED)) {
+                        LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
+                                                    "packets buffer full!\n");
+                        ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+                        inet_csk_schedule_ack(sk);
+                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                  TCP_DELACK_MIN,
+                                                  DCCP_RTO_MAX);
+                        goto discard;
+                }
+                /*
+                 * FIXME: this activation is probably wrong, have to study more
+                 * TCP delack machinery and how it fits into DCCP draft, but
+                 * for now it kinda "works" 8)
+                 */
+                if (!inet_csk_ack_scheduled(sk)) {
+                        inet_csk_schedule_ack(sk);
+                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ,
+                                                  DCCP_RTO_MAX);
+                }
+        }
+        ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+        ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+        switch (dccp_hdr(skb)->dccph_type) {
+        case DCCP_PKT_DATAACK:
+        case DCCP_PKT_DATA:
+                /*
+                 * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED
+                 * option if it is.
+                 */
+                __skb_pull(skb, dh->dccph_doff * 4);
+                __skb_queue_tail(&sk->sk_receive_queue, skb);
+                skb_set_owner_r(skb, sk);
+                sk->sk_data_ready(sk, 0);
+                return 0;
+        case DCCP_PKT_ACK:
+                goto discard;
+        case DCCP_PKT_RESET:
+                /*
+                 *  Step 9: Process Reset
+                 *      If P.type == Reset,
+                 *              Tear down connection
+                 *              S.state := TIMEWAIT
+                 *              Set TIMEWAIT timer
+                 *              Drop packet and return
+                */
+                dccp_fin(sk, skb);
+                dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
+                return 0;
+        case DCCP_PKT_CLOSEREQ:
+                dccp_rcv_closereq(sk, skb);
+                goto discard;
+        case DCCP_PKT_CLOSE:
+                dccp_rcv_close(sk, skb);
+                return 0;
+        case DCCP_PKT_REQUEST:
+                /* Step 7 
+                 *   or (S.is_server and P.type == Response)
+                 *   or (S.is_client and P.type == Request)
+                 *   or (S.state >= OPEN and P.type == Request
+                 *      and P.seqno >= S.OSR)
+                 *    or (S.state >= OPEN and P.type == Response
+                 *      and P.seqno >= S.OSR)
+                 *    or (S.state == RESPOND and P.type == Data),
+                 *  Send Sync packet acknowledging P.seqno
+                 *  Drop packet and return
+                 */
+                if (dp->dccps_role != DCCP_ROLE_LISTEN)
+                        goto send_sync;
+                goto check_seq;
+        case DCCP_PKT_RESPONSE:
+                if (dp->dccps_role != DCCP_ROLE_CLIENT)
+                        goto send_sync;
+check_seq:
+                if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
+send_sync:
+                        dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
+                                       DCCP_PKT_SYNC);
+                }
+                break;
+        case DCCP_PKT_SYNC:
+                dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
+                               DCCP_PKT_SYNCACK);
+                /*
+                 * From the draft:
+                 *
+                 * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets
+                 * MAY have non-zero-length application data areas, whose
+                 * contents * receivers MUST ignore.
+                 */
+                goto discard;
+        }
+        DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
+discard:
+        __kfree_skb(skb);
+        return 0;
+}
+static int dccp_rcv_request_sent_state_process(struct sock *sk,
+                                               struct sk_buff *skb,
+                                               const struct dccp_hdr *dh,
+                                               const unsigned len)
+{
+        /* 
+         *  Step 4: Prepare sequence numbers in REQUEST
+         *     If S.state == REQUEST,
+         *        If (P.type == Response or P.type == Reset)
+         *              and S.AWL <= P.ackno <= S.AWH,
+         *           / * Set sequence number variables corresponding to the
+         *              other endpoint, so P will pass the tests in Step 6 * /
+         *           Set S.GSR, S.ISR, S.SWL, S.SWH
+         *           / * Response processing continues in Step 10; Reset
+         *              processing continues in Step 9 * /
+        */
+        if (dh->dccph_type == DCCP_PKT_RESPONSE) {
+                const struct inet_connection_sock *icsk = inet_csk(sk);
+                struct dccp_sock *dp = dccp_sk(sk);
+                /* Stop the REQUEST timer */
+                inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
+                BUG_TRAP(sk->sk_send_head != NULL);
+                __kfree_skb(sk->sk_send_head);
+                sk->sk_send_head = NULL;
+                if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                               dp->dccps_awl, dp->dccps_awh)) {
+                        dccp_pr_debug("invalid ackno: S.AWL=%llu, "
+                                      "P.ackno=%llu, S.AWH=%llu \n",
+                                      (unsigned long long)dp->dccps_awl,
+                           (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                      (unsigned long long)dp->dccps_awh);
+                        goto out_invalid_packet;
+                }
+                dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+                dccp_update_gsr(sk, dp->dccps_isr);
+                /*
+                 * SWL and AWL are initially adjusted so that they are not less than
+                 * the initial Sequence Numbers received and sent, respectively:
+                 *      SWL := max(GSR + 1 - floor(W/4), ISR),
+                 *      AWL := max(GSS - W' + 1, ISS).
+                 * These adjustments MUST be applied only at the beginning of the
+                 * connection.
+                 *
+                 * AWL was adjusted in dccp_v4_connect -acme
+                 */
+                dccp_set_seqno(&dp->dccps_swl,
+                               max48(dp->dccps_swl, dp->dccps_isr));
+                if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
+                    ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
+                        ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+                        ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+                        /* FIXME: send appropriate RESET code */
+                        goto out_invalid_packet;
+                }
+                dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
+                /*
+                 *    Step 10: Process REQUEST state (second part)
+                 *       If S.state == REQUEST,
+                 *        / * If we get here, P is a valid Response from the
+                 *            server (see Step 4), and we should move to
+                 *            PARTOPEN state. PARTOPEN means send an Ack,
+                 *            don't send Data packets, retransmit Acks
+                 *            periodically, and always include any Init Cookie
+                 *            from the Response * /
+                 *        S.state := PARTOPEN
+                 *        Set PARTOPEN timer
+                 *        Continue with S.state == PARTOPEN
+                 *        / * Step 12 will send the Ack completing the
+                 *            three-way handshake * /
+                 */
+                dccp_set_state(sk, DCCP_PARTOPEN);
+                /* Make sure socket is routed, for correct metrics. */
+                inet_sk_rebuild_header(sk);
+                if (!sock_flag(sk, SOCK_DEAD)) {
+                        sk->sk_state_change(sk);
+                        sk_wake_async(sk, 0, POLL_OUT);
+                }
+                if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
+                    icsk->icsk_accept_queue.rskq_defer_accept) {
+                        /* Save one ACK. Data will be ready after
+                         * several ticks, if write_pending is set.
+                         *
+                         * It may be deleted, but with this feature tcpdumps
+                         * look so _wonderfully_ clever, that I was not able
+                         * to stand against the temptation 8)     --ANK
+                         */
+                        /*
+                         * OK, in DCCP we can as well do a similar trick, its
+                         * even in the draft, but there is no need for us to
+                         * schedule an ack here, as dccp_sendmsg does this for
+                         * us, also stated in the draft. -acme
+                         */
+                        __kfree_skb(skb);
+                        return 0;
+                } 
+                dccp_send_ack(sk);
+                return -1;
+        }
+out_invalid_packet:
+        return 1; /* dccp_v4_do_rcv will send a reset, but...
+                     FIXME: the reset code should be
+                            DCCP_RESET_CODE_PACKET_ERROR */
+}
+static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
+                                                   struct sk_buff *skb,
+                                                   const struct dccp_hdr *dh,
+                                                   const unsigned len)
+{
+        int queued = 0;
+        switch (dh->dccph_type) {
+        case DCCP_PKT_RESET:
+                inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+                break;
+        case DCCP_PKT_DATAACK:
+        case DCCP_PKT_ACK:
+                /*
+                 * FIXME: we should be reseting the PARTOPEN (DELACK) timer
+                 * here but only if we haven't used the DELACK timer for
+                 * something else, like sending a delayed ack for a TIMESTAMP
+                 * echo, etc, for now were not clearing it, sending an extra
+                 * ACK when there is nothing else to do in DELACK is not a big
+                 * deal after all.
+                 */
+                /* Stop the PARTOPEN timer */
+                if (sk->sk_state == DCCP_PARTOPEN)
+                        inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+                dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
+                dccp_set_state(sk, DCCP_OPEN);
+                if (dh->dccph_type == DCCP_PKT_DATAACK) {
+                        dccp_rcv_established(sk, skb, dh, len);
+                        queued = 1; /* packet was queued
+                                       (by dccp_rcv_established) */
+                }
+                break;
+        }
+        return queued;
+}
+int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+                           struct dccp_hdr *dh, unsigned len)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        const int old_state = sk->sk_state;
+        int queued = 0;
+        /*
+         *  Step 3: Process LISTEN state
+         *      (Continuing from dccp_v4_do_rcv and dccp_v6_do_rcv)
+         *
+         *     If S.state == LISTEN,
+         *        If P.type == Request or P contains a valid Init Cookie
+         *              option,
+         *           * Must scan the packet's options to check for an Init
+         *              Cookie.  Only the Init Cookie is processed here,
+         *              however; other options are processed in Step 8.  This
+         *              scan need only be performed if the endpoint uses Init
+         *              Cookies *
+         *           * Generate a new socket and switch to that socket *
+         *           Set S := new socket for this port pair
+         *           S.state = RESPOND
+         *           Choose S.ISS (initial seqno) or set from Init Cookie
+         *           Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+         *           Continue with S.state == RESPOND
+         *           * A Response packet will be generated in Step 11 *
+         *        Otherwise,
+         *           Generate Reset(No Connection) unless P.type == Reset
+         *           Drop packet and return
+         *
+         * NOTE: the check for the packet types is done in
+         *       dccp_rcv_state_process
+         */
+        if (sk->sk_state == DCCP_LISTEN) {
+                if (dh->dccph_type == DCCP_PKT_REQUEST) {
+                        if (dccp_v4_conn_request(sk, skb) < 0)
+                                return 1;
+                        /* FIXME: do congestion control initialization */
+                        goto discard;
+                }
+                if (dh->dccph_type == DCCP_PKT_RESET)
+                        goto discard;
+                /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/
+                return 1;
+        }
+        if (sk->sk_state != DCCP_REQUESTING) {
+                if (dccp_check_seqno(sk, skb))
+                        goto discard;
+                /*
+                 * Step 8: Process options and mark acknowledgeable
+                 */
+                if (dccp_parse_options(sk, skb))
+                        goto discard;
+                if (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
+                    DCCP_PKT_WITHOUT_ACK_SEQ)
+                        dccp_event_ack_recv(sk, skb);
+                ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+                ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+                /*
+                 * FIXME: check ECN to see if we should use
+                 * DCCP_ACKPKTS_STATE_ECN_MARKED
+                 */
+                if (dp->dccps_options.dccpo_send_ack_vector) {
+                        if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+                                             DCCP_SKB_CB(skb)->dccpd_seq,
+                                             DCCP_ACKPKTS_STATE_RECEIVED))
+                                goto discard;
+                        /*
+                         * FIXME: this activation is probably wrong, have to
+                         * study more TCP delack machinery and how it fits into
+                         * DCCP draft, but for now it kinda "works" 8)
+                         */
+                        if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno ==
+                             DCCP_MAX_SEQNO + 1) &&
+                            !inet_csk_ack_scheduled(sk)) {
+                                inet_csk_schedule_ack(sk);
+                                inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                          TCP_DELACK_MIN,
+                                                          DCCP_RTO_MAX);
+                        }
+                }
+        }
+        /*
+         *  Step 9: Process Reset
+         *      If P.type == Reset,
+         *              Tear down connection
+         *              S.state := TIMEWAIT
+         *              Set TIMEWAIT timer
+         *              Drop packet and return
+        */
+        if (dh->dccph_type == DCCP_PKT_RESET) {
+                /*
+                 * Queue the equivalent of TCP fin so that dccp_recvmsg
+                 * exits the loop
+                 */
+                dccp_fin(sk, skb);
+                dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
+                return 0;
+                /*
+                 *   Step 7: Check for unexpected packet types
+                 *      If (S.is_server and P.type == CloseReq)
+                 *          or (S.is_server and P.type == Response)
+                 *          or (S.is_client and P.type == Request)
+                 *          or (S.state == RESPOND and P.type == Data),
+                 *        Send Sync packet acknowledging P.seqno
+                 *        Drop packet and return
+                 */
+        } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
+                    (dh->dccph_type == DCCP_PKT_RESPONSE ||
+                     dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
+                    (dp->dccps_role == DCCP_ROLE_CLIENT &&
+                     dh->dccph_type == DCCP_PKT_REQUEST) ||
+                    (sk->sk_state == DCCP_RESPOND &&
+                     dh->dccph_type == DCCP_PKT_DATA)) {
+                dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
+                               DCCP_PKT_SYNC);
+                goto discard;
+        } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
+                dccp_rcv_closereq(sk, skb);
+                goto discard;
+        } else if (dh->dccph_type == DCCP_PKT_CLOSE) {
+                dccp_rcv_close(sk, skb);
+                return 0;
+        }
+        switch (sk->sk_state) {
+        case DCCP_CLOSED:
+                return 1;
+        case DCCP_REQUESTING:
+                /* FIXME: do congestion control initialization */
+                queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
+                if (queued >= 0)
+                        return queued;
+                __kfree_skb(skb);
+                return 0;
+        case DCCP_RESPOND:
+        case DCCP_PARTOPEN:
+                queued = dccp_rcv_respond_partopen_state_process(sk, skb,
+                                                                 dh, len);
+                break;
+        }
+        if (dh->dccph_type == DCCP_PKT_ACK ||
+            dh->dccph_type == DCCP_PKT_DATAACK) {
+                switch (old_state) {
+                case DCCP_PARTOPEN:
+                        sk->sk_state_change(sk);
+                        sk_wake_async(sk, 0, POLL_OUT);
+                        break;
+                }
+        }
+        if (!queued) { 
+discard:
+                __kfree_skb(skb);
+        }
+        return 0;
+}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
new file mode 100644
index 000000000000..3fc75dbee4b8
--- /dev/null
+++ b/net/dccp/ipv4.c
@@ -0,0 +1,1356 @@
+/*
+ *  net/dccp/ipv4.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/icmp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <net/icmp.h>
+#include <net/inet_hashtables.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/xfrm.h>
+#include "ccid.h"
+#include "dccp.h"
+struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
+        .lhash_lock     = RW_LOCK_UNLOCKED,
+        .lhash_users    = ATOMIC_INIT(0),
+        .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
+        .portalloc_lock = SPIN_LOCK_UNLOCKED,
+        .port_rover     = 1024 - 1,
+};
+EXPORT_SYMBOL_GPL(dccp_hashinfo);
+static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
+{
+        return inet_csk_get_port(&dccp_hashinfo, sk, snum);
+}
+static void dccp_v4_hash(struct sock *sk)
+{
+        inet_hash(&dccp_hashinfo, sk);
+}
+static void dccp_v4_unhash(struct sock *sk)
+{
+        inet_unhash(&dccp_hashinfo, sk);
+}
+/* called with local bh disabled */
+static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
+                                      struct inet_timewait_sock **twp)
+{
+        struct inet_sock *inet = inet_sk(sk);
+        const u32 daddr = inet->rcv_saddr;
+        const u32 saddr = inet->daddr;
+        const int dif = sk->sk_bound_dev_if;
+        INET_ADDR_COOKIE(acookie, saddr, daddr)
+        const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+        const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport,
+                                      dccp_hashinfo.ehash_size);
+        struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
+        const struct sock *sk2;
+        const struct hlist_node *node;
+        struct inet_timewait_sock *tw;
+        write_lock(&head->lock);
+        /* Check TIME-WAIT sockets first. */
+        sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
+                tw = inet_twsk(sk2);
+                if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+                        goto not_unique;
+        }
+        tw = NULL;
+        /* And established part... */
+        sk_for_each(sk2, node, &head->chain) {
+                if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+                        goto not_unique;
+        }
+        /* Must record num and sport now. Otherwise we will see
+         * in hash table socket with a funny identity. */
+        inet->num = lport;
+        inet->sport = htons(lport);
+        sk->sk_hashent = hash;
+        BUG_TRAP(sk_unhashed(sk));
+        __sk_add_node(sk, &head->chain);
+        sock_prot_inc_use(sk->sk_prot);
+        write_unlock(&head->lock);
+        if (twp != NULL) {
+                *twp = tw;
+                NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+        } else if (tw != NULL) {
+                /* Silly. Should hash-dance instead... */
+                inet_twsk_deschedule(tw, &dccp_death_row);
+                NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+                inet_twsk_put(tw);
+        }
+        return 0;
+not_unique:
+        write_unlock(&head->lock);
+        return -EADDRNOTAVAIL;
+}
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+static int dccp_v4_hash_connect(struct sock *sk)
+{
+        const unsigned short snum = inet_sk(sk)->num;
+        struct inet_bind_hashbucket *head;
+        struct inet_bind_bucket *tb;
+        int ret;
+        if (snum == 0) {
+                int rover;
+                int low = sysctl_local_port_range[0];
+                int high = sysctl_local_port_range[1];
+                int remaining = (high - low) + 1;
+                struct hlist_node *node;
+                struct inet_timewait_sock *tw = NULL;
+                local_bh_disable();
+                /* TODO. Actually it is not so bad idea to remove
+                 * dccp_hashinfo.portalloc_lock before next submission to
+                 * Linus.
+                 * As soon as we touch this place at all it is time to think.
+                 *
+                 * Now it protects single _advisory_ variable
+                 * dccp_hashinfo.port_rover, hence it is mostly useless.
+                 * Code will work nicely if we just delete it, but
+                 * I am afraid in contented case it will work not better or
+                 * even worse: another cpu just will hit the same bucket
+                 * and spin there.
+                 * So some cpu salt could remove both contention and
+                 * memory pingpong. Any ideas how to do this in a nice way?
+                 */
+                spin_lock(&dccp_hashinfo.portalloc_lock);
+                rover = dccp_hashinfo.port_rover;
+                do {
+                        rover++;
+                        if ((rover < low) || (rover > high))
+                                rover = low;
+                        head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
+                                                    dccp_hashinfo.bhash_size)];
+                        spin_lock(&head->lock);
+                        /* Does not bother with rcv_saddr checks,
+                         * because the established check is already
+                         * unique enough.
+                         */
+                        inet_bind_bucket_for_each(tb, node, &head->chain) {
+                                if (tb->port == rover) {
+                                        BUG_TRAP(!hlist_empty(&tb->owners));
+                                        if (tb->fastreuse >= 0)
+                                                goto next_port;
+                                        if (!__dccp_v4_check_established(sk,
+                                                                         rover,
+                                                                         &tw))
+                                                goto ok;
+                                        goto next_port;
+                                }
+                        }
+                        tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
+                                                     head, rover);
+                        if (tb == NULL) {
+                                spin_unlock(&head->lock);
+                                break;
+                        }
+                        tb->fastreuse = -1;
+                        goto ok;
+                next_port:
+                        spin_unlock(&head->lock);
+                } while (--remaining > 0);
+                dccp_hashinfo.port_rover = rover;
+                spin_unlock(&dccp_hashinfo.portalloc_lock);
+                local_bh_enable();
+                return -EADDRNOTAVAIL;
+ok:
+                /* All locks still held and bhs disabled */
+                dccp_hashinfo.port_rover = rover;
+                spin_unlock(&dccp_hashinfo.portalloc_lock);
+                inet_bind_hash(sk, tb, rover);
+                if (sk_unhashed(sk)) {
+                        inet_sk(sk)->sport = htons(rover);
+                        __inet_hash(&dccp_hashinfo, sk, 0);
+                }
+                spin_unlock(&head->lock);
+                if (tw != NULL) {
+                        inet_twsk_deschedule(tw, &dccp_death_row);
+                        inet_twsk_put(tw);
+                }
+                ret = 0;
+                goto out;
+        }
+        head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
+                                                 dccp_hashinfo.bhash_size)];
+        tb   = inet_csk(sk)->icsk_bind_hash;
+        spin_lock_bh(&head->lock);
+        if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
+                __inet_hash(&dccp_hashinfo, sk, 0);
+                spin_unlock_bh(&head->lock);
+                return 0;
+        } else {
+                spin_unlock(&head->lock);
+                /* No definite answer... Walk to established hash table */
+                ret = __dccp_v4_check_established(sk, snum, NULL);
+out:
+                local_bh_enable();
+                return ret;
+        }
+}
+static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
+                           int addr_len)
+{
+        struct inet_sock *inet = inet_sk(sk);
+        struct dccp_sock *dp = dccp_sk(sk);
+        const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+        struct rtable *rt;
+        u32 daddr, nexthop;
+        int tmp;
+        int err;
+        dp->dccps_role = DCCP_ROLE_CLIENT;
+        if (addr_len < sizeof(struct sockaddr_in))
+                return -EINVAL;
+        if (usin->sin_family != AF_INET)
+                return -EAFNOSUPPORT;
+        nexthop = daddr = usin->sin_addr.s_addr;
+        if (inet->opt != NULL && inet->opt->srr) {
+                if (daddr == 0)
+                        return -EINVAL;
+                nexthop = inet->opt->faddr;
+        }
+        tmp = ip_route_connect(&rt, nexthop, inet->saddr,
+                               RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+                               IPPROTO_DCCP,
+                               inet->sport, usin->sin_port, sk);
+        if (tmp < 0)
+                return tmp;
+        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+                ip_rt_put(rt);
+                return -ENETUNREACH;
+        }
+        if (inet->opt == NULL || !inet->opt->srr)
+                daddr = rt->rt_dst;
+        if (inet->saddr == 0)
+                inet->saddr = rt->rt_src;
+        inet->rcv_saddr = inet->saddr;
+        inet->dport = usin->sin_port;
+        inet->daddr = daddr;
+        dp->dccps_ext_header_len = 0;
+        if (inet->opt != NULL)
+                dp->dccps_ext_header_len = inet->opt->optlen;
+        /*
+         * Socket identity is still unknown (sport may be zero).
+         * However we set state to DCCP_REQUESTING and not releasing socket
+         * lock select source port, enter ourselves into the hash tables and
+         * complete initialization after this.
+         */
+        dccp_set_state(sk, DCCP_REQUESTING);
+        err = dccp_v4_hash_connect(sk);
+        if (err != 0)
+                goto failure;
+        err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
+        if (err != 0)
+                goto failure;
+        /* OK, now commit destination to socket.  */
+        sk_setup_caps(sk, &rt->u.dst);
+        dp->dccps_gar =
+                dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
+                                                            inet->daddr,
+                                                            inet->sport,
+                                                            usin->sin_port);
+        dccp_update_gss(sk, dp->dccps_iss);
+        /*
+         * SWL and AWL are initially adjusted so that they are not less than
+         * the initial Sequence Numbers received and sent, respectively:
+         *      SWL := max(GSR + 1 - floor(W/4), ISR),
+         *      AWL := max(GSS - W' + 1, ISS).
+         * These adjustments MUST be applied only at the beginning of the
+         * connection.
+         */
+        dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
+        inet->id = dp->dccps_iss ^ jiffies;
+        err = dccp_connect(sk);
+        rt = NULL;
+        if (err != 0)
+                goto failure;
+out:
+        return err;
+failure:
+        /*
+         * This unhashes the socket and releases the local port, if necessary.
+         */
+        dccp_set_state(sk, DCCP_CLOSED);
+        ip_rt_put(rt);
+        sk->sk_route_caps = 0;
+        inet->dport = 0;
+        goto out;
+}
+/*
+ * This routine does path mtu discovery as defined in RFC1191.
+ */
+static inline void dccp_do_pmtu_discovery(struct sock *sk,
+                                          const struct iphdr *iph,
+                                          u32 mtu)
+{
+        struct dst_entry *dst;
+        const struct inet_sock *inet = inet_sk(sk);
+        const struct dccp_sock *dp = dccp_sk(sk);
+        /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs
+         * send out by Linux are always < 576bytes so they should go through
+         * unfragmented).
+         */
+        if (sk->sk_state == DCCP_LISTEN)
+                return;
+        /* We don't check in the destentry if pmtu discovery is forbidden
+         * on this route. We just assume that no packet_to_big packets
+         * are send back when pmtu discovery is not active.
+         * There is a small race when the user changes this flag in the
+         * route, but I think that's acceptable.
+         */
+        if ((dst = __sk_dst_check(sk, 0)) == NULL)
+                return;
+        dst->ops->update_pmtu(dst, mtu);
+        /* Something is about to be wrong... Remember soft error
+         * for the case, if this connection will not able to recover.
+         */
+        if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
+                sk->sk_err_soft = EMSGSIZE;
+        mtu = dst_mtu(dst);
+        if (inet->pmtudisc != IP_PMTUDISC_DONT &&
+            dp->dccps_pmtu_cookie > mtu) {
+                dccp_sync_mss(sk, mtu);
+                /*
+                 * From: draft-ietf-dccp-spec-11.txt
+                 *
+                 *      DCCP-Sync packets are the best choice for upward
+                 *      probing, since DCCP-Sync probes do not risk application
+                 *      data loss.
+                 */
+                dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
+        } /* else let the usual retransmit timer handle it */
+}
+static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
+{
+        int err;
+        struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+        const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
+                                     sizeof(struct dccp_hdr_ext) +
+                                     sizeof(struct dccp_hdr_ack_bits);
+        struct sk_buff *skb;
+        if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
+                return;
+        skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+        if (skb == NULL)
+                return;
+        /* Reserve space for headers. */
+        skb_reserve(skb, MAX_DCCP_HEADER);
+        skb->dst = dst_clone(rxskb->dst);
+        skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
+        dh = dccp_hdr(skb);
+        memset(dh, 0, dccp_hdr_ack_len);
+        /* Build DCCP header and checksum it. */
+        dh->dccph_type     = DCCP_PKT_ACK;
+        dh->dccph_sport    = rxdh->dccph_dport;
+        dh->dccph_dport    = rxdh->dccph_sport;
+        dh->dccph_doff     = dccp_hdr_ack_len / 4;
+        dh->dccph_x        = 1;
+        dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
+        dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+                         DCCP_SKB_CB(rxskb)->dccpd_seq);
+        bh_lock_sock(dccp_ctl_socket->sk);
+        err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+                                    rxskb->nh.iph->daddr,
+                                    rxskb->nh.iph->saddr, NULL);
+        bh_unlock_sock(dccp_ctl_socket->sk);
+        if (err == NET_XMIT_CN || err == 0) {
+                DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+                DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+        }
+}
+static void dccp_v4_reqsk_send_ack(struct sk_buff *skb,
+                                   struct request_sock *req)
+{
+        dccp_v4_ctl_send_ack(skb);
+}
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
+                                 struct dst_entry *dst)
+{
+        int err = -1;
+        struct sk_buff *skb;
+        /* First, grab a route. */
+        
+        if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+                goto out;
+        skb = dccp_make_response(sk, dst, req);
+        if (skb != NULL) {
+                const struct inet_request_sock *ireq = inet_rsk(req);
+                err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
+                                            ireq->rmt_addr,
+                                            ireq->opt);
+                if (err == NET_XMIT_CN)
+                        err = 0;
+        }
+out:
+        dst_release(dst);
+        return err;
+}
+/*
+ * This routine is called by the ICMP module when it gets some sort of error
+ * condition. If err < 0 then the socket should be closed and the error
+ * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code.
+ * After adjustment header points to the first 8 bytes of the tcp header. We
+ * need to find the appropriate port.
+ *
+ * The locking strategy used here is very "optimistic". When someone else
+ * accesses the socket the ICMP is just dropped and for some paths there is no
+ * check at all. A more general error queue to queue errors for later handling
+ * is probably better.
+ */
+void dccp_v4_err(struct sk_buff *skb, u32 info)
+{
+        const struct iphdr *iph = (struct iphdr *)skb->data;
+        const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
+                                                        (iph->ihl << 2));
+        struct dccp_sock *dp;
+        struct inet_sock *inet;
+        const int type = skb->h.icmph->type;
+        const int code = skb->h.icmph->code;
+        struct sock *sk;
+        __u64 seq;
+        int err;
+        if (skb->len < (iph->ihl << 2) + 8) {
+                ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+                return;
+        }
+        sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
+                         iph->saddr, dh->dccph_sport, inet_iif(skb));
+        if (sk == NULL) {
+                ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+                return;
+        }
+        if (sk->sk_state == DCCP_TIME_WAIT) {
+                inet_twsk_put((struct inet_timewait_sock *)sk);
+                return;
+        }
+        bh_lock_sock(sk);
+        /* If too many ICMPs get dropped on busy
+         * servers this needs to be solved differently.
+         */
+        if (sock_owned_by_user(sk))
+                NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+        if (sk->sk_state == DCCP_CLOSED)
+                goto out;
+        dp = dccp_sk(sk);
+        seq = dccp_hdr_seq(skb);
+        if (sk->sk_state != DCCP_LISTEN &&
+            !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
+                NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
+                goto out;
+        }
+        switch (type) {
+        case ICMP_SOURCE_QUENCH:
+                /* Just silently ignore these. */
+                goto out;
+        case ICMP_PARAMETERPROB:
+                err = EPROTO;
+                break;
+        case ICMP_DEST_UNREACH:
+                if (code > NR_ICMP_UNREACH)
+                        goto out;
+                if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+                        if (!sock_owned_by_user(sk))
+                                dccp_do_pmtu_discovery(sk, iph, info);
+                        goto out;
+                }
+                err = icmp_err_convert[code].errno;
+                break;
+        case ICMP_TIME_EXCEEDED:
+                err = EHOSTUNREACH;
+                break;
+        default:
+                goto out;
+        }
+        switch (sk->sk_state) {
+                struct request_sock *req , **prev;
+        case DCCP_LISTEN:
+                if (sock_owned_by_user(sk))
+                        goto out;
+                req = inet_csk_search_req(sk, &prev, dh->dccph_dport,
+                                          iph->daddr, iph->saddr);
+                if (!req)
+                        goto out;
+                /*
+                 * ICMPs are not backlogged, hence we cannot get an established
+                 * socket here.
+                 */
+                BUG_TRAP(!req->sk);
+                if (seq != dccp_rsk(req)->dreq_iss) {
+                        NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+                        goto out;
+                }
+                /*
+                 * Still in RESPOND, just remove it silently.
+                 * There is no good way to pass the error to the newly
+                 * created socket, and POSIX does not want network
+                 * errors returned from accept().
+                 */
+                inet_csk_reqsk_queue_drop(sk, req, prev);
+                goto out;
+        case DCCP_REQUESTING:
+        case DCCP_RESPOND:
+                if (!sock_owned_by_user(sk)) {
+                        DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+                        sk->sk_err = err;
+                        sk->sk_error_report(sk);
+                        dccp_done(sk);
+                } else
+                        sk->sk_err_soft = err;
+                goto out;
+        }
+        /* If we've already connected we will keep trying
+         * until we time out, or the user gives up.
+         *
+         * rfc1122 4.2.3.9 allows to consider as hard errors
+         * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
+         * but it is obsoleted by pmtu discovery).
+         *
+         * Note, that in modern internet, where routing is unreliable
+         * and in each dark corner broken firewalls sit, sending random
+         * errors ordered by their masters even this two messages finally lose
+         * their original sense (even Linux sends invalid PORT_UNREACHs)
+         *
+         * Now we are in compliance with RFCs.
+         *                                                      --ANK (980905)
+         */
+        inet = inet_sk(sk);
+        if (!sock_owned_by_user(sk) && inet->recverr) {
+                sk->sk_err = err;
+                sk->sk_error_report(sk);
+        } else /* Only an error on timeout */
+                sk->sk_err_soft = err;
+out:
+        bh_unlock_sock(sk);
+        sock_put(sk);
+}
+int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
+{
+        struct sk_buff *skb;
+        /*
+         * FIXME: what if rebuild_header fails?
+         * Should we be doing a rebuild_header here?
+         */
+        int err = inet_sk_rebuild_header(sk);
+        if (err != 0)
+                return err;
+        skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
+        if (skb != NULL) {
+                const struct dccp_sock *dp = dccp_sk(sk);
+                const struct inet_sock *inet = inet_sk(sk);
+                err = ip_build_and_send_pkt(skb, sk,
+                                            inet->saddr, inet->daddr, NULL);
+                if (err == NET_XMIT_CN)
+                        err = 0;
+                ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+                ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+        }
+        return err;
+}
+static inline u64 dccp_v4_init_sequence(const struct sock *sk,
+                                        const struct sk_buff *skb)
+{
+        return secure_dccp_sequence_number(skb->nh.iph->daddr,
+                                           skb->nh.iph->saddr,
+                                           dccp_hdr(skb)->dccph_dport,
+                                           dccp_hdr(skb)->dccph_sport);
+}
+int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+        struct inet_request_sock *ireq;
+        struct dccp_sock dp;
+        struct request_sock *req;
+        struct dccp_request_sock *dreq;
+        const __u32 saddr = skb->nh.iph->saddr;
+        const __u32 daddr = skb->nh.iph->daddr;
+        struct dst_entry *dst = NULL;
+        /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
+        if (((struct rtable *)skb->dst)->rt_flags &
+            (RTCF_BROADCAST | RTCF_MULTICAST))
+                goto drop;
+        /*
+         * TW buckets are converted to open requests without
+         * limitations, they conserve resources and peer is
+         * evidently real one.
+         */
+        if (inet_csk_reqsk_queue_is_full(sk))
+                goto drop;
+        /*
+         * Accept backlog is full. If we have already queued enough
+         * of warm entries in syn queue, drop request. It is better than
+         * clogging syn queue with openreqs with exponentially increasing
+         * timeout.
+         */
+        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+                goto drop;
+        req = reqsk_alloc(sk->sk_prot->rsk_prot);
+        if (req == NULL)
+                goto drop;
+        /* FIXME: process options */
+        dccp_openreq_init(req, &dp, skb);
+        ireq = inet_rsk(req);
+        ireq->loc_addr = daddr;
+        ireq->rmt_addr = saddr;
+        /* FIXME: Merge Aristeu's option parsing code when ready */
+        req->rcv_wnd    = 100; /* Fake, option parsing will get the
+                                  right value */
+        ireq->opt       = NULL;
+        /* 
+         * Step 3: Process LISTEN state
+         *
+         * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+         *
+         * In fact we defer setting S.GSR, S.SWL, S.SWH to
+         * dccp_create_openreq_child.
+         */
+        dreq = dccp_rsk(req);
+        dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+        dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
+        dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
+        if (dccp_v4_send_response(sk, req, dst))
+                goto drop_and_free;
+        inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+        return 0;
+drop_and_free:
+        /*
+         * FIXME: should be reqsk_free after implementing req->rsk_ops
+         */
+        __reqsk_free(req);
+drop:
+        DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+        return -1;
+}
+/*
+ * The three way handshake has completed - we got a valid ACK or DATAACK -
+ * now create the new socket.
+ *
+ * This is the equivalent of TCP's tcp_v4_syn_recv_sock
+ */
+struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+                                       struct request_sock *req,
+                                       struct dst_entry *dst)
+{
+        struct inet_request_sock *ireq;
+        struct inet_sock *newinet;
+        struct dccp_sock *newdp;
+        struct sock *newsk;
+        if (sk_acceptq_is_full(sk))
+                goto exit_overflow;
+        if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+                goto exit;
+        newsk = dccp_create_openreq_child(sk, req, skb);
+        if (newsk == NULL)
+                goto exit;
+        sk_setup_caps(newsk, dst);
+        newdp              = dccp_sk(newsk);
+        newinet            = inet_sk(newsk);
+        ireq               = inet_rsk(req);
+        newinet->daddr     = ireq->rmt_addr;
+        newinet->rcv_saddr = ireq->loc_addr;
+        newinet->saddr     = ireq->loc_addr;
+        newinet->opt       = ireq->opt;
+        ireq->opt          = NULL;
+        newinet->mc_index  = inet_iif(skb);
+        newinet->mc_ttl    = skb->nh.iph->ttl;
+        newinet->id        = jiffies;
+        dccp_sync_mss(newsk, dst_mtu(dst));
+        __inet_hash(&dccp_hashinfo, newsk, 0);
+        __inet_inherit_port(&dccp_hashinfo, sk, newsk);
+        return newsk;
+exit_overflow:
+        NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+exit:
+        NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+        dst_release(dst);
+        return NULL;
+}
+static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
+{
+        const struct dccp_hdr *dh = dccp_hdr(skb);
+        const struct iphdr *iph = skb->nh.iph;
+        struct sock *nsk;
+        struct request_sock **prev;
+        /* Find possible connection requests. */
+        struct request_sock *req = inet_csk_search_req(sk, &prev,
+                                                       dh->dccph_sport,
+                                                       iph->saddr, iph->daddr);
+        if (req != NULL)
+                return dccp_check_req(sk, skb, req, prev);
+        nsk = __inet_lookup_established(&dccp_hashinfo,
+                                        iph->saddr, dh->dccph_sport,
+                                        iph->daddr, ntohs(dh->dccph_dport),
+                                        inet_iif(skb));
+        if (nsk != NULL) {
+                if (nsk->sk_state != DCCP_TIME_WAIT) {
+                        bh_lock_sock(nsk);
+                        return nsk;
+                }
+                inet_twsk_put((struct inet_timewait_sock *)nsk);
+                return NULL;
+        }
+        return sk;
+}
+int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr,
+                     const u32 daddr)
+{
+        const struct dccp_hdr* dh = dccp_hdr(skb);
+        int checksum_len;
+        u32 tmp;
+        if (dh->dccph_cscov == 0)
+                checksum_len = skb->len;
+        else {
+                checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
+                checksum_len = checksum_len < skb->len ? checksum_len :
+                                                         skb->len;
+        }
+        tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
+        return csum_tcpudp_magic(saddr, daddr, checksum_len,
+                                 IPPROTO_DCCP, tmp);
+}
+static int dccp_v4_verify_checksum(struct sk_buff *skb,
+                                   const u32 saddr, const u32 daddr)
+{
+        struct dccp_hdr *dh = dccp_hdr(skb);
+        int checksum_len;
+        u32 tmp;
+        if (dh->dccph_cscov == 0)
+                checksum_len = skb->len;
+        else {
+                checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
+                checksum_len = checksum_len < skb->len ? checksum_len :
+                                                         skb->len;
+        }
+        tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
+        return csum_tcpudp_magic(saddr, daddr, checksum_len,
+                                 IPPROTO_DCCP, tmp) == 0 ? 0 : -1;
+}
+static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
+                                           struct sk_buff *skb)
+{
+        struct rtable *rt;
+        struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
+                            .nl_u = { .ip4_u =
+                                      { .daddr = skb->nh.iph->saddr,
+                                        .saddr = skb->nh.iph->daddr,
+                                        .tos = RT_CONN_FLAGS(sk) } },
+                            .proto = sk->sk_protocol,
+                            .uli_u = { .ports =
+                                       { .sport = dccp_hdr(skb)->dccph_dport,
+                                         .dport = dccp_hdr(skb)->dccph_sport }
+                                     }
+                          };
+        if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+                IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+                return NULL;
+        }
+        return &rt->u.dst;
+}
+static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
+{
+        int err;
+        struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+        const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
+                                       sizeof(struct dccp_hdr_ext) +
+                                       sizeof(struct dccp_hdr_reset);
+        struct sk_buff *skb;
+        struct dst_entry *dst;
+        u64 seqno;
+        /* Never send a reset in response to a reset. */
+        if (rxdh->dccph_type == DCCP_PKT_RESET)
+                return;
+        if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
+                return;
+        dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
+        if (dst == NULL)
+                return;
+        skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+        if (skb == NULL)
+                goto out;
+        /* Reserve space for headers. */
+        skb_reserve(skb, MAX_DCCP_HEADER);
+        skb->dst = dst_clone(dst);
+        skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
+        dh = dccp_hdr(skb);
+        memset(dh, 0, dccp_hdr_reset_len);
+        /* Build DCCP header and checksum it. */
+        dh->dccph_type     = DCCP_PKT_RESET;
+        dh->dccph_sport    = rxdh->dccph_dport;
+        dh->dccph_dport    = rxdh->dccph_sport;
+        dh->dccph_doff     = dccp_hdr_reset_len / 4;
+        dh->dccph_x        = 1;
+        dccp_hdr_reset(skb)->dccph_reset_code =
+                                DCCP_SKB_CB(rxskb)->dccpd_reset_code;
+        /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
+        seqno = 0;
+        if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+                dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
+        dccp_hdr_set_seq(dh, seqno);
+        dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
+                         DCCP_SKB_CB(rxskb)->dccpd_seq);
+        dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr,
+                                              rxskb->nh.iph->daddr);
+        bh_lock_sock(dccp_ctl_socket->sk);
+        err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+                                    rxskb->nh.iph->daddr,
+                                    rxskb->nh.iph->saddr, NULL);
+        bh_unlock_sock(dccp_ctl_socket->sk);
+        if (err == NET_XMIT_CN || err == 0) {
+                DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+                DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+        }
+out:
+         dst_release(dst);
+}
+int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+        struct dccp_hdr *dh = dccp_hdr(skb);
+        if (sk->sk_state == DCCP_OPEN) { /* Fast path */
+                if (dccp_rcv_established(sk, skb, dh, skb->len))
+                        goto reset;
+                return 0;
+        }
+        /*
+         *  Step 3: Process LISTEN state
+         *     If S.state == LISTEN,
+         *        If P.type == Request or P contains a valid Init Cookie
+         *              option,
+         *           * Must scan the packet's options to check for an Init
+         *              Cookie.  Only the Init Cookie is processed here,
+         *              however; other options are processed in Step 8.  This
+         *              scan need only be performed if the endpoint uses Init
+         *              Cookies *
+         *           * Generate a new socket and switch to that socket *
+         *           Set S := new socket for this port pair
+         *           S.state = RESPOND
+         *           Choose S.ISS (initial seqno) or set from Init Cookie
+         *           Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+         *           Continue with S.state == RESPOND
+         *           * A Response packet will be generated in Step 11 *
+         *        Otherwise,
+         *           Generate Reset(No Connection) unless P.type == Reset
+         *           Drop packet and return
+         *
+         * NOTE: the check for the packet types is done in
+         *       dccp_rcv_state_process
+         */
+        if (sk->sk_state == DCCP_LISTEN) {
+                struct sock *nsk = dccp_v4_hnd_req(sk, skb);
+                if (nsk == NULL)
+                        goto discard;
+                if (nsk != sk) {
+                        if (dccp_child_process(sk, nsk, skb))
+                                goto reset;
+                        return 0;
+                }
+        }
+        if (dccp_rcv_state_process(sk, skb, dh, skb->len))
+                goto reset;
+        return 0;
+reset:
+        DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+        dccp_v4_ctl_send_reset(skb);
+discard:
+        kfree_skb(skb);
+        return 0;
+}
+static inline int dccp_invalid_packet(struct sk_buff *skb)
+{
+        const struct dccp_hdr *dh;
+        if (skb->pkt_type != PACKET_HOST)
+                return 1;
+        if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
+                LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n");
+                return 1;
+        }
+        dh = dccp_hdr(skb);
+        /* If the packet type is not understood, drop packet and return */
+        if (dh->dccph_type >= DCCP_PKT_INVALID) {
+                LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n");
+                return 1;
+        }
+        /*
+         * If P.Data Offset is too small for packet type, or too large for
+         * packet, drop packet and return
+         */
+        if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
+                LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
+                                            "too small 1\n",
+                               dh->dccph_doff);
+                return 1;
+        }
+        if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
+                LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
+                                            "too small 2\n",
+                               dh->dccph_doff);
+                return 1;
+        }
+        dh = dccp_hdr(skb);
+        /*
+         * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
+         * has short sequence numbers), drop packet and return
+         */
+        if (dh->dccph_x == 0 &&
+            dh->dccph_type != DCCP_PKT_DATA &&
+            dh->dccph_type != DCCP_PKT_ACK &&
+            dh->dccph_type != DCCP_PKT_DATAACK) {
+                LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack "
+                                            "nor DataAck and P.X == 0\n",
+                               dccp_packet_name(dh->dccph_type));
+                return 1;
+        }
+        /* If the header checksum is incorrect, drop packet and return */
+        if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
+                                    skb->nh.iph->daddr) < 0) {
+                LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is "
+                                            "incorrect\n");
+                return 1;
+        }
+        return 0;
+}
+/* this is called when real data arrives */
+int dccp_v4_rcv(struct sk_buff *skb)
+{
+        const struct dccp_hdr *dh;
+        struct sock *sk;
+        int rc;
+        /* Step 1: Check header basics: */
+        if (dccp_invalid_packet(skb))
+                goto discard_it;
+        dh = dccp_hdr(skb);
+#if 0
+        /*
+         * Use something like this to simulate some DATA/DATAACK loss to test
+         * dccp_ackpkts_add, you'll get something like this on a session that
+         * sends 10 DATA/DATAACK packets:
+         *
+         * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
+         *
+         * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
+         * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
+         *                                                 with the same state
+         * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
+         *
+         * So...
+         *
+         * 281473596467422 was received
+         * 281473596467421 was not received
+         * 281473596467420 was received
+         * 281473596467419 was not received
+         * 281473596467418 was received
+         * 281473596467417 was not received
+         * 281473596467416 was received
+         * 281473596467415 was not received
+         * 281473596467414 was received
+         * 281473596467413 was received (this one was the 3way handshake
+         *                               RESPONSE)
+         *
+         */
+        if (dh->dccph_type == DCCP_PKT_DATA ||
+            dh->dccph_type == DCCP_PKT_DATAACK) {
+                static int discard = 0;
+                if (discard) {
+                        discard = 0;
+                        goto discard_it;
+                }
+                discard = 1;
+        }
+#endif
+        DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
+        DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
+        dccp_pr_debug("%8.8s "
+                      "src=%u.%u.%u.%u@%-5d "
+                      "dst=%u.%u.%u.%u@%-5d seq=%llu",
+                      dccp_packet_name(dh->dccph_type),
+                      NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
+                      NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
+                      (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+        if (dccp_packet_without_ack(skb)) {
+                DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
+                dccp_pr_debug_cat("\n");
+        } else {
+                DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
+                dccp_pr_debug_cat(", ack=%llu\n",
+                                  (unsigned long long)
+                                  DCCP_SKB_CB(skb)->dccpd_ack_seq);
+        }
+        /* Step 2:
+         *      Look up flow ID in table and get corresponding socket */
+        sk = __inet_lookup(&dccp_hashinfo,
+                           skb->nh.iph->saddr, dh->dccph_sport,
+                           skb->nh.iph->daddr, ntohs(dh->dccph_dport),
+                           inet_iif(skb));
+        /* 
+         * Step 2:
+         *      If no socket ...
+         *              Generate Reset(No Connection) unless P.type == Reset
+         *              Drop packet and return
+         */
+        if (sk == NULL) {
+                dccp_pr_debug("failed to look up flow ID in table and "
+                              "get corresponding socket\n");
+                goto no_dccp_socket;
+        }
+        /* 
+         * Step 2:
+         *      ... or S.state == TIMEWAIT,
+         *              Generate Reset(No Connection) unless P.type == Reset
+         *              Drop packet and return
+         */
+               
+        if (sk->sk_state == DCCP_TIME_WAIT) {
+                dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: "
+                              "do_time_wait\n");
+                goto do_time_wait;
+        }
+        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+                dccp_pr_debug("xfrm4_policy_check failed\n");
+                goto discard_and_relse;
+        }
+        if (sk_filter(sk, skb, 0)) {
+                dccp_pr_debug("sk_filter failed\n");
+                goto discard_and_relse;
+        }
+        skb->dev = NULL;
+        bh_lock_sock(sk);
+        rc = 0;
+        if (!sock_owned_by_user(sk))
+                rc = dccp_v4_do_rcv(sk, skb);
+        else
+                sk_add_backlog(sk, skb);
+        bh_unlock_sock(sk);
+        sock_put(sk);
+        return rc;
+no_dccp_socket:
+        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+                goto discard_it;
+        /*
+         * Step 2:
+         *              Generate Reset(No Connection) unless P.type == Reset
+         *              Drop packet and return
+         */
+        if (dh->dccph_type != DCCP_PKT_RESET) {
+                DCCP_SKB_CB(skb)->dccpd_reset_code =
+                                        DCCP_RESET_CODE_NO_CONNECTION;
+                dccp_v4_ctl_send_reset(skb);
+        }
+discard_it:
+        /* Discard frame. */
+        kfree_skb(skb);
+        return 0;
+discard_and_relse:
+        sock_put(sk);
+        goto discard_it;
+do_time_wait:
+        inet_twsk_put((struct inet_timewait_sock *)sk);
+        goto no_dccp_socket;
+}
+static int dccp_v4_init_sock(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        static int dccp_ctl_socket_init = 1;
+        dccp_options_init(&dp->dccps_options);
+        if (dp->dccps_options.dccpo_send_ack_vector) {
+                dp->dccps_hc_rx_ackpkts =
+                        dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
+                                           GFP_KERNEL);
+                if (dp->dccps_hc_rx_ackpkts == NULL)
+                        return -ENOMEM;
+        }
+        /*
+         * FIXME: We're hardcoding the CCID, and doing this at this point makes
+         * the listening (master) sock get CCID control blocks, which is not
+         * necessary, but for now, to not mess with the test userspace apps,
+         * lets leave it here, later the real solution is to do this in a
+         * setsockopt(CCIDs-I-want/accept). -acme
+         */
+        if (likely(!dccp_ctl_socket_init)) {
+                dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+                                                 sk);
+                dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+                                                 sk);
+                if (dp->dccps_hc_rx_ccid == NULL ||
+                    dp->dccps_hc_tx_ccid == NULL) {
+                        ccid_exit(dp->dccps_hc_rx_ccid, sk);
+                        ccid_exit(dp->dccps_hc_tx_ccid, sk);
+                        dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
+                        dp->dccps_hc_rx_ackpkts = NULL;
+                        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+                        return -ENOMEM;
+                }
+        } else
+                dccp_ctl_socket_init = 0;
+        dccp_init_xmit_timers(sk);
+        inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
+        sk->sk_state = DCCP_CLOSED;
+        sk->sk_write_space = dccp_write_space;
+        dp->dccps_mss_cache = 536;
+        dp->dccps_role = DCCP_ROLE_UNDEFINED;
+        return 0;
+}
+static int dccp_v4_destroy_sock(struct sock *sk)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        /*
+         * DCCP doesn't use sk_qrite_queue, just sk_send_head
+         * for retransmissions
+         */
+        if (sk->sk_send_head != NULL) {
+                kfree_skb(sk->sk_send_head);
+                sk->sk_send_head = NULL;
+        }
+        /* Clean up a referenced DCCP bind bucket. */
+        if (inet_csk(sk)->icsk_bind_hash != NULL)
+                inet_put_port(&dccp_hashinfo, sk);
+        ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+        ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+        dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
+        dp->dccps_hc_rx_ackpkts = NULL;
+        ccid_exit(dp->dccps_hc_rx_ccid, sk);
+        ccid_exit(dp->dccps_hc_tx_ccid, sk);
+        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+        return 0;
+}
+static void dccp_v4_reqsk_destructor(struct request_sock *req)
+{
+        kfree(inet_rsk(req)->opt);
+}
+static struct request_sock_ops dccp_request_sock_ops = {
+        .family         = PF_INET,
+        .obj_size       = sizeof(struct dccp_request_sock),
+        .rtx_syn_ack    = dccp_v4_send_response,
+        .send_ack       = dccp_v4_reqsk_send_ack,
+        .destructor     = dccp_v4_reqsk_destructor,
+        .send_reset     = dccp_v4_ctl_send_reset,
+};
+struct proto dccp_v4_prot = {
+        .name                   = "DCCP",
+        .owner                  = THIS_MODULE,
+        .close                  = dccp_close,
+        .connect                = dccp_v4_connect,
+        .disconnect             = dccp_disconnect,
+        .ioctl                  = dccp_ioctl,
+        .init                   = dccp_v4_init_sock,
+        .setsockopt             = dccp_setsockopt,
+        .getsockopt             = dccp_getsockopt,
+        .sendmsg                = dccp_sendmsg,
+        .recvmsg                = dccp_recvmsg,
+        .backlog_rcv            = dccp_v4_do_rcv,
+        .hash                   = dccp_v4_hash,
+        .unhash                 = dccp_v4_unhash,
+        .accept                 = inet_csk_accept,
+        .get_port               = dccp_v4_get_port,
+        .shutdown               = dccp_shutdown,
+        .destroy                = dccp_v4_destroy_sock,
+        .orphan_count           = &dccp_orphan_count,
+        .max_header             = MAX_DCCP_HEADER,
+        .obj_size               = sizeof(struct dccp_sock),
+        .rsk_prot               = &dccp_request_sock_ops,
+        .twsk_obj_size          = sizeof(struct inet_timewait_sock),
+};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
new file mode 100644
index 000000000000..ce5dff4ac22e
--- /dev/null
+++ b/net/dccp/minisocks.c
@@ -0,0 +1,264 @@
+/*
+ *  net/dccp/minisocks.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <net/inet_timewait_sock.h>
+#include "ccid.h"
+#include "dccp.h"
+struct inet_timewait_death_row dccp_death_row = {
+        .sysctl_max_tw_buckets = NR_FILE * 2,
+        .period         = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
+        .death_lock     = SPIN_LOCK_UNLOCKED,
+        .hashinfo       = &dccp_hashinfo,
+        .tw_timer       = TIMER_INITIALIZER(inet_twdr_hangman, 0,
+                                            (unsigned long)&dccp_death_row),
+        .twkill_work    = __WORK_INITIALIZER(dccp_death_row.twkill_work,
+                                             inet_twdr_twkill_work,
+                                             &dccp_death_row),
+/* Short-time timewait calendar */
+        .twcal_hand     = -1,
+        .twcal_timer    = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
+                                            (unsigned long)&dccp_death_row),
+};
+void dccp_time_wait(struct sock *sk, int state, int timeo)
+{
+        struct inet_timewait_sock *tw = NULL;
+        if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
+                tw = inet_twsk_alloc(sk, state);
+        if (tw != NULL) {
+                const struct inet_connection_sock *icsk = inet_csk(sk);
+                const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
+                /* Linkage updates. */
+                __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
+                /* Get the TIME_WAIT timeout firing. */
+                if (timeo < rto)
+                        timeo = rto;
+                tw->tw_timeout = DCCP_TIMEWAIT_LEN;
+                if (state == DCCP_TIME_WAIT)
+                        timeo = DCCP_TIMEWAIT_LEN;
+                inet_twsk_schedule(tw, &dccp_death_row, timeo,
+                                   DCCP_TIMEWAIT_LEN);
+                inet_twsk_put(tw);
+        } else {
+                /* Sorry, if we're out of memory, just CLOSE this
+                 * socket up.  We've got bigger problems than
+                 * non-graceful socket closings.
+                 */
+                LIMIT_NETDEBUG(KERN_INFO "DCCP: time wait bucket "
+                                         "table overflow\n");
+        }
+        dccp_done(sk);
+}
+struct sock *dccp_create_openreq_child(struct sock *sk,
+                                       const struct request_sock *req,
+                                       const struct sk_buff *skb)
+{
+        /*
+         * Step 3: Process LISTEN state
+         *
+         * // Generate a new socket and switch to that socket
+         * Set S := new socket for this port pair
+         */
+        struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
+        if (newsk != NULL) {
+                const struct dccp_request_sock *dreq = dccp_rsk(req);
+                struct inet_connection_sock *newicsk = inet_csk(sk);
+                struct dccp_sock *newdp = dccp_sk(newsk);
+                newdp->dccps_hc_rx_ackpkts = NULL;
+                newdp->dccps_role = DCCP_ROLE_SERVER;
+                newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
+                if (newdp->dccps_options.dccpo_send_ack_vector) {
+                        newdp->dccps_hc_rx_ackpkts =
+                                dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
+                                                   GFP_ATOMIC);
+                        /*
+                         * XXX: We're using the same CCIDs set on the parent,
+                         * i.e. sk_clone copied the master sock and left the
+                         * CCID pointers for this child, that is why we do the
+                         * __ccid_get calls.
+                         */
+                        if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
+                                goto out_free;
+                }
+                if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid,
+                                             newsk) != 0 ||
+                             ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
+                                             newsk) != 0)) {
+                        dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
+                        ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
+                        ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
+out_free:
+                        /* It is still raw copy of parent, so invalidate
+                         * destructor and make plain sk_free() */
+                        newsk->sk_destruct = NULL;
+                        sk_free(newsk);
+                        return NULL;
+                }
+                __ccid_get(newdp->dccps_hc_rx_ccid);
+                __ccid_get(newdp->dccps_hc_tx_ccid);
+                /*
+                 * Step 3: Process LISTEN state
+                 *
+                 *      Choose S.ISS (initial seqno) or set from Init Cookie
+                 *      Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
+                 *      Cookie
+                 */
+                /* See dccp_v4_conn_request */
+                newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
+                newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
+                dccp_update_gsr(newsk, dreq->dreq_isr);
+                newdp->dccps_iss = dreq->dreq_iss;
+                dccp_update_gss(newsk, dreq->dreq_iss);
+                /*
+                 * SWL and AWL are initially adjusted so that they are not less than
+                 * the initial Sequence Numbers received and sent, respectively:
+                 *      SWL := max(GSR + 1 - floor(W/4), ISR),
+                 *      AWL := max(GSS - W' + 1, ISS).
+                 * These adjustments MUST be applied only at the beginning of the
+                 * connection.
+                 */
+                dccp_set_seqno(&newdp->dccps_swl,
+                               max48(newdp->dccps_swl, newdp->dccps_isr));
+                dccp_set_seqno(&newdp->dccps_awl,
+                               max48(newdp->dccps_awl, newdp->dccps_iss));
+                dccp_init_xmit_timers(newsk);
+                DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
+        }
+        return newsk;
+}
+/* 
+ * Process an incoming packet for RESPOND sockets represented
+ * as an request_sock.
+ */
+struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+                            struct request_sock *req,
+                            struct request_sock **prev)
+{
+        struct sock *child = NULL;
+        /* Check for retransmitted REQUEST */
+        if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
+                if (after48(DCCP_SKB_CB(skb)->dccpd_seq,
+                            dccp_rsk(req)->dreq_isr)) {
+                        struct dccp_request_sock *dreq = dccp_rsk(req);
+                        dccp_pr_debug("Retransmitted REQUEST\n");
+                        /* Send another RESPONSE packet */
+                        dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
+                        dccp_set_seqno(&dreq->dreq_isr,
+                                       DCCP_SKB_CB(skb)->dccpd_seq);
+                        req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+                }
+                /* Network Duplicate, discard packet */
+                return NULL;
+        }
+        DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
+        if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
+            dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
+                goto drop;
+        /* Invalid ACK */
+        if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
+                dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
+                              "dreq_iss=%llu\n",
+                              (unsigned long long)
+                              DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                              (unsigned long long)
+                              dccp_rsk(req)->dreq_iss);
+                goto drop;
+        }
+        child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
+        if (child == NULL)
+                goto listen_overflow;
+        /* FIXME: deal with options */
+        inet_csk_reqsk_queue_unlink(sk, req, prev);
+        inet_csk_reqsk_queue_removed(sk, req);
+        inet_csk_reqsk_queue_add(sk, req, child);
+out:
+        return child;
+listen_overflow:
+        dccp_pr_debug("listen_overflow!\n");
+        DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
+drop:
+        if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
+                req->rsk_ops->send_reset(skb);
+        inet_csk_reqsk_queue_drop(sk, req, prev);
+        goto out;
+}
+/*
+ *  Queue segment on the new socket if the new socket is active,
+ *  otherwise we just shortcircuit this and continue with
+ *  the new socket.
+ */
+int dccp_child_process(struct sock *parent, struct sock *child,
+                       struct sk_buff *skb)
+{
+        int ret = 0;
+        const int state = child->sk_state;
+        if (!sock_owned_by_user(child)) {
+                ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb),
+                                             skb->len);
+                /* Wakeup parent, send SIGIO */
+                if (state == DCCP_RESPOND && child->sk_state != state)
+                        parent->sk_data_ready(parent, 0);
+        } else {
+                /* Alas, it is possible again, because we do lookup
+                 * in main socket hash table and lock on listening
+                 * socket does not protect us more.
+                 */
+                sk_add_backlog(child, skb);
+        }
+        bh_unlock_sock(child);
+        sock_put(child);
+        return ret;
+}
diff --git a/net/dccp/options.c b/net/dccp/options.c
new file mode 100644
index 000000000000..382c5894acb2
--- /dev/null
+++ b/net/dccp/options.c
@@ -0,0 +1,855 @@
+/*
+ *  net/dccp/options.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include "ccid.h"
+#include "dccp.h"
+static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
+                                             struct sock *sk,
+                                             const u64 ackno,
+                                             const unsigned char len,
+                                             const unsigned char *vector);
+/* stores the default values for new connection. may be changed with sysctl */
+static const struct dccp_options dccpo_default_values = {
+        .dccpo_sequence_window    = DCCPF_INITIAL_SEQUENCE_WINDOW,
+        .dccpo_ccid               = DCCPF_INITIAL_CCID,
+        .dccpo_send_ack_vector    = DCCPF_INITIAL_SEND_ACK_VECTOR,
+        .dccpo_send_ndp_count     = DCCPF_INITIAL_SEND_NDP_COUNT,
+};
+void dccp_options_init(struct dccp_options *dccpo)
+{
+        memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
+}
+static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
+{
+        u32 value = 0;
+        if (len > 3)
+                value += *bf++ << 24;
+        if (len > 2)
+                value += *bf++ << 16;
+        if (len > 1)
+                value += *bf++ << 8;
+        if (len > 0)
+                value += *bf;
+        return value;
+}
+int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+        const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                        "CLIENT rx opt: " : "server rx opt: ";
+#endif
+        const struct dccp_hdr *dh = dccp_hdr(skb);
+        const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
+        unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
+        unsigned char *opt_ptr = options;
+        const unsigned char *opt_end = (unsigned char *)dh +
+                                        (dh->dccph_doff * 4);
+        struct dccp_options_received *opt_recv = &dp->dccps_options_received;
+        unsigned char opt, len;
+        unsigned char *value;
+        memset(opt_recv, 0, sizeof(*opt_recv));
+        while (opt_ptr != opt_end) {
+                opt   = *opt_ptr++;
+                len   = 0;
+                value = NULL;
+                /* Check if this isn't a single byte option */
+                if (opt > DCCPO_MAX_RESERVED) {
+                        if (opt_ptr == opt_end)
+                                goto out_invalid_option;
+                        len = *opt_ptr++;
+                        if (len < 3)
+                                goto out_invalid_option;
+                        /*
+                         * Remove the type and len fields, leaving
+                         * just the value size
+                         */
+                        len     -= 2;
+                        value   = opt_ptr;
+                        opt_ptr += len;
+                        if (opt_ptr > opt_end)
+                                goto out_invalid_option;
+                }
+                switch (opt) {
+                case DCCPO_PADDING:
+                        break;
+                case DCCPO_NDP_COUNT:
+                        if (len > 3)
+                                goto out_invalid_option;
+                        opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
+                        dccp_pr_debug("%sNDP count=%d\n", debug_prefix,
+                                      opt_recv->dccpor_ndp);
+                        break;
+                case DCCPO_ACK_VECTOR_0:
+                        if (len > DCCP_MAX_ACK_VECTOR_LEN)
+                                goto out_invalid_option;
+                        if (pkt_type == DCCP_PKT_DATA)
+                                continue;
+                        opt_recv->dccpor_ack_vector_len = len;
+                        opt_recv->dccpor_ack_vector_idx = value - options;
+                        dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
+                                      debug_prefix, len,
+                                      (unsigned long long)
+                                      DCCP_SKB_CB(skb)->dccpd_ack_seq);
+                        dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                             value, len);
+                        dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts,
+                                                         sk,
+                                                 DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                                         len, value);
+                        break;
+                case DCCPO_TIMESTAMP:
+                        if (len != 4)
+                                goto out_invalid_option;
+                        opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
+                        dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
+                        do_gettimeofday(&dp->dccps_timestamp_time);
+                        dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
+                                      debug_prefix, opt_recv->dccpor_timestamp,
+                                      (unsigned long long)
+                                      DCCP_SKB_CB(skb)->dccpd_ack_seq);
+                        break;
+                case DCCPO_TIMESTAMP_ECHO:
+                        if (len != 4 && len != 6 && len != 8)
+                                goto out_invalid_option;
+                        opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
+                        dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ",
+                                      debug_prefix,
+                                      opt_recv->dccpor_timestamp_echo,
+                                      len + 2,
+                                      (unsigned long long)
+                                      DCCP_SKB_CB(skb)->dccpd_ack_seq);
+                        if (len > 4) {
+                                if (len == 6)
+                                        opt_recv->dccpor_elapsed_time =
+                                                 ntohs(*(u16 *)(value + 4));
+                                else
+                                        opt_recv->dccpor_elapsed_time =
+                                                 ntohl(*(u32 *)(value + 4));
+                                dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n",
+                                      debug_prefix,
+                                      opt_recv->dccpor_elapsed_time);
+                        }
+                        break;
+                case DCCPO_ELAPSED_TIME:
+                        if (len != 2 && len != 4)
+                                goto out_invalid_option;
+                        if (pkt_type == DCCP_PKT_DATA)
+                                continue;
+                        if (len == 2)
+                                opt_recv->dccpor_elapsed_time =
+                                                        ntohs(*(u16 *)value);
+                        else
+                                opt_recv->dccpor_elapsed_time =
+                                                        ntohl(*(u32 *)value);
+                        dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
+                                      opt_recv->dccpor_elapsed_time);
+                        break;
+                        /*
+                         * From draft-ietf-dccp-spec-11.txt:
+                         *
+                         *      Option numbers 128 through 191 are for
+                         *      options sent from the HC-Sender to the
+                         *      HC-Receiver; option numbers 192 through 255
+                         *      are for options sent from the HC-Receiver to
+                         *      the HC-Sender.
+                         */
+                case 128 ... 191: {
+                        const u16 idx = value - options;
+                        if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
+                                                     opt, len, idx,
+                                                     value) != 0)
+                                goto out_invalid_option;
+                }
+                        break;
+                case 192 ... 255: {
+                        const u16 idx = value - options;
+                        if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
+                                                     opt, len, idx,
+                                                     value) != 0)
+                                goto out_invalid_option;
+                }
+                        break;
+                default:
+                        pr_info("DCCP(%p): option %d(len=%d) not "
+                                "implemented, ignoring\n",
+                                sk, opt, len);
+                        break;
+                }
+        }
+        return 0;
+out_invalid_option:
+        DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
+        DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
+        pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
+        return -1;
+}
+static void dccp_encode_value_var(const u32 value, unsigned char *to,
+                                  const unsigned int len)
+{
+        if (len > 3)
+                *to++ = (value & 0xFF000000) >> 24;
+        if (len > 2)
+                *to++ = (value & 0xFF0000) >> 16;
+        if (len > 1)
+                *to++ = (value & 0xFF00) >> 8;
+        if (len > 0)
+                *to++ = (value & 0xFF);
+}
+static inline int dccp_ndp_len(const int ndp)
+{
+        return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
+}
+void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+                        const unsigned char option,
+                        const void *value, const unsigned char len)
+{
+        unsigned char *to;
+        if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
+                LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
+                               "%d option!\n", option);
+                return;
+        }
+        DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
+        to    = skb_push(skb, len + 2);
+        *to++ = option;
+        *to++ = len + 2;
+        memcpy(to, value, len);
+}
+EXPORT_SYMBOL_GPL(dccp_insert_option);
+static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        int ndp = dp->dccps_ndp_count;
+        if (dccp_non_data_packet(skb))
+                ++dp->dccps_ndp_count;
+        else
+                dp->dccps_ndp_count = 0;
+        if (ndp > 0) {
+                unsigned char *ptr;
+                const int ndp_len = dccp_ndp_len(ndp);
+                const int len = ndp_len + 2;
+                if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+                        return;
+                DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+                ptr = skb_push(skb, len);
+                *ptr++ = DCCPO_NDP_COUNT;
+                *ptr++ = len;
+                dccp_encode_value_var(ndp, ptr, ndp_len);
+        }
+}
+static inline int dccp_elapsed_time_len(const u32 elapsed_time)
+{
+        return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
+}
+void dccp_insert_option_elapsed_time(struct sock *sk,
+                                     struct sk_buff *skb,
+                                     u32 elapsed_time)
+{
+#ifdef CONFIG_IP_DCCP_DEBUG
+        struct dccp_sock *dp = dccp_sk(sk);
+        const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                        "CLIENT TX opt: " : "server TX opt: ";
+#endif
+        const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
+        const int len = 2 + elapsed_time_len;
+        unsigned char *to;
+        if (elapsed_time_len == 0)
+                return;
+        if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+                LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
+                                         "insert elapsed time!\n");
+                return;
+        }
+        DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+        to    = skb_push(skb, len);
+        *to++ = DCCPO_ELAPSED_TIME;
+        *to++ = len;
+        if (elapsed_time_len == 2) {
+                const u16 var16 = htons((u16)elapsed_time);
+                memcpy(to, &var16, 2);
+        } else {
+                const u32 var32 = htonl(elapsed_time);
+                memcpy(to, &var32, 4);
+        }
+        dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
+                      debug_prefix, elapsed_time,
+                      len,
+                      (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+}
+EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
+static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+        const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                        "CLIENT TX opt: " : "server TX opt: ";
+#endif
+        struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+        int len = ap->dccpap_buf_vector_len + 2;
+        const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10;
+        unsigned char *to, *from;
+        if (elapsed_time != 0)
+                dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
+        if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+                LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
+                                         "insert ACK Vector!\n");
+                return;
+        }
+        /*
+         * XXX: now we have just one ack vector sent record, so
+         * we have to wait for it to be cleared.
+         *
+         * Of course this is not acceptable, but this is just for
+         * basic testing now.
+         */
+        if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
+                return;
+        DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+        to    = skb_push(skb, len);
+        *to++ = DCCPO_ACK_VECTOR_0;
+        *to++ = len;
+        len  = ap->dccpap_buf_vector_len;
+        from = ap->dccpap_buf + ap->dccpap_buf_head;
+        /* Check if buf_head wraps */
+        if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
+                const unsigned int tailsize = (ap->dccpap_buf_len -
+                                               ap->dccpap_buf_head);
+                memcpy(to, from, tailsize);
+                to   += tailsize;
+                len  -= tailsize;
+                from = ap->dccpap_buf;
+        }
+        memcpy(to, from, len);
+        /*
+         *      From draft-ietf-dccp-spec-11.txt:
+         *
+         *      For each acknowledgement it sends, the HC-Receiver will add an
+         *      acknowledgement record.  ack_seqno will equal the HC-Receiver
+         *      sequence number it used for the ack packet; ack_ptr will equal
+         *      buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
+         *      equal buf_nonce.
+         *
+         * This implemention uses just one ack record for now.
+         */
+        ap->dccpap_ack_seqno      = DCCP_SKB_CB(skb)->dccpd_seq;
+        ap->dccpap_ack_ptr        = ap->dccpap_buf_head;
+        ap->dccpap_ack_ackno      = ap->dccpap_buf_ackno;
+        ap->dccpap_ack_nonce      = ap->dccpap_buf_nonce;
+        ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
+        dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
+                      "ack_ackno=%llu\n",
+                      debug_prefix, ap->dccpap_ack_vector_len,
+                      (unsigned long long) ap->dccpap_ack_seqno,
+                      (unsigned long long) ap->dccpap_ack_ackno);
+}
+void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
+{
+        struct timeval tv;
+        u32 now;
+        
+        do_gettimeofday(&tv);
+        now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10;
+        /* yes this will overflow but that is the point as we want a
+         * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
+        now = htonl(now);
+        dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
+}
+EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
+static void dccp_insert_option_timestamp_echo(struct sock *sk,
+                                              struct sk_buff *skb)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+        const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                        "CLIENT TX opt: " : "server TX opt: ";
+#endif
+        u32 tstamp_echo;
+        const u32 elapsed_time =
+                        timeval_now_delta(&dp->dccps_timestamp_time) / 10;
+        const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
+        const int len = 6 + elapsed_time_len;
+        unsigned char *to;
+        if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+                LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
+                                         "timestamp echo!\n");
+                return;
+        }
+        DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+        to    = skb_push(skb, len);
+        *to++ = DCCPO_TIMESTAMP_ECHO;
+        *to++ = len;
+        tstamp_echo = htonl(dp->dccps_timestamp_echo);
+        memcpy(to, &tstamp_echo, 4);
+        to += 4;
+        
+        if (elapsed_time_len == 2) {
+                const u16 var16 = htons((u16)elapsed_time);
+                memcpy(to, &var16, 2);
+        } else if (elapsed_time_len == 4) {
+                const u32 var32 = htonl(elapsed_time);
+                memcpy(to, &var32, 4);
+        }
+        dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
+                      debug_prefix, dp->dccps_timestamp_echo,
+                      len,
+                      (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+        dp->dccps_timestamp_echo = 0;
+        dp->dccps_timestamp_time.tv_sec = 0;
+        dp->dccps_timestamp_time.tv_usec = 0;
+}
+void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
+        if (dp->dccps_options.dccpo_send_ndp_count)
+                dccp_insert_option_ndp(sk, skb);
+        if (!dccp_packet_without_ack(skb)) {
+                if (dp->dccps_options.dccpo_send_ack_vector &&
+                    (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno !=
+                     DCCP_MAX_SEQNO + 1))
+                        dccp_insert_option_ack_vector(sk, skb);
+                if (dp->dccps_timestamp_echo != 0)
+                        dccp_insert_option_timestamp_echo(sk, skb);
+        }
+        ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
+        ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
+        /* XXX: insert other options when appropriate */
+        if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
+                /* The length of all options has to be a multiple of 4 */
+                int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
+                if (padding != 0) {
+                        padding = 4 - padding;
+                        memset(skb_push(skb, padding), 0, padding);
+                        DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
+                }
+        }
+}
+struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len,
+                                        const unsigned int __nocast priority)
+{
+        struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
+        if (ap != NULL) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+                memset(ap->dccpap_buf, 0xFF, len);
+#endif
+                ap->dccpap_buf_len   = len;
+                ap->dccpap_buf_head  =
+                        ap->dccpap_buf_tail =
+                                ap->dccpap_buf_len - 1;
+                ap->dccpap_buf_ackno =
+                        ap->dccpap_ack_ackno =
+                                ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+                ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
+                ap->dccpap_ack_ptr   = 0;
+                ap->dccpap_time.tv_sec = 0;
+                ap->dccpap_time.tv_usec = 0;
+                ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
+        }
+        return ap;
+}
+void dccp_ackpkts_free(struct dccp_ackpkts *ap)
+{
+        if (ap != NULL) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+                memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
+#endif
+                kfree(ap);
+        }
+}
+static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
+                                    const unsigned int index)
+{
+        return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
+}
+static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
+                                  const unsigned int index)
+{
+        return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
+}
+/*
+ * If several packets are missing, the HC-Receiver may prefer to enter multiple
+ * bytes with run length 0, rather than a single byte with a larger run length;
+ * this simplifies table updates if one of the missing packets arrives.
+ */
+static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
+                                                  const unsigned int packets,
+                                                  const unsigned char state)
+{
+        unsigned int gap;
+        signed long new_head;
+        if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
+                return -ENOBUFS;
+        gap      = packets - 1;
+        new_head = ap->dccpap_buf_head - packets;
+        if (new_head < 0) {
+                if (gap > 0) {
+                        memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
+                               gap + new_head + 1);
+                        gap = -new_head;
+                }
+                new_head += ap->dccpap_buf_len;
+        } 
+        ap->dccpap_buf_head = new_head;
+        if (gap > 0)
+                memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
+                       DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
+        ap->dccpap_buf[ap->dccpap_buf_head] = state;
+        ap->dccpap_buf_vector_len += packets;
+        return 0;
+}
+/*
+ * Implements the draft-ietf-dccp-spec-11.txt Appendix A
+ */
+int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
+{
+        /*
+         * Check at the right places if the buffer is full, if it is, tell the
+         * caller to start dropping packets till the HC-Sender acks our ACK
+         * vectors, when we will free up space in dccpap_buf.
+         *
+         * We may well decide to do buffer compression, etc, but for now lets
+         * just drop.
+         *
+         * From Appendix A:
+         *
+         *      Of course, the circular buffer may overflow, either when the
+         *      HC-Sender is sending data at a very high rate, when the
+         *      HC-Receiver's acknowledgements are not reaching the HC-Sender,
+         *      or when the HC-Sender is forgetting to acknowledge those acks
+         *      (so the HC-Receiver is unable to clean up old state). In this
+         *      case, the HC-Receiver should either compress the buffer (by
+         *      increasing run lengths when possible), transfer its state to
+         *      a larger buffer, or, as a last resort, drop all received
+         *      packets, without processing them whatsoever, until its buffer
+         *      shrinks again.
+         */
+        /* See if this is the first ackno being inserted */
+        if (ap->dccpap_buf_vector_len == 0) {
+                ap->dccpap_buf[ap->dccpap_buf_head] = state;
+                ap->dccpap_buf_vector_len = 1;
+        } else if (after48(ackno, ap->dccpap_buf_ackno)) {
+                const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno,
+                                                   ackno);
+                /*
+                 * Look if the state of this packet is the same as the
+                 * previous ackno and if so if we can bump the head len.
+                 */
+                if (delta == 1 &&
+                    dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
+                    (dccp_ackpkts_len(ap, ap->dccpap_buf_head) <
+                     DCCP_ACKPKTS_LEN_MASK))
+                        ap->dccpap_buf[ap->dccpap_buf_head]++;
+                else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
+                        return -ENOBUFS;
+        } else {
+                /*
+                 * A.1.2.  Old Packets
+                 *
+                 *      When a packet with Sequence Number S arrives, and
+                 *      S <= buf_ackno, the HC-Receiver will scan the table
+                 *      for the byte corresponding to S. (Indexing structures
+                 *      could reduce the complexity of this scan.)
+                 */
+                u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
+                unsigned int index = ap->dccpap_buf_head;
+                while (1) {
+                        const u8 len = dccp_ackpkts_len(ap, index);
+                        const u8 state = dccp_ackpkts_state(ap, index);
+                        /*
+                         * valid packets not yet in dccpap_buf have a reserved
+                         * entry, with a len equal to 0.
+                         */
+                        if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
+                            len == 0 && delta == 0) { /* Found our
+                                                         reserved seat! */
+                                dccp_pr_debug("Found %llu reserved seat!\n",
+                                              (unsigned long long) ackno);
+                                ap->dccpap_buf[index] = state;
+                                goto out;
+                        }
+                        /* len == 0 means one packet */
+                        if (delta < len + 1)
+                                goto out_duplicate;
+                        delta -= len + 1;
+                        if (++index == ap->dccpap_buf_len)
+                                index = 0;
+                }
+        }
+        ap->dccpap_buf_ackno = ackno;
+        do_gettimeofday(&ap->dccpap_time);
+out:
+        dccp_pr_debug("");
+        dccp_ackpkts_print(ap);
+        return 0;
+out_duplicate:
+        /* Duplicate packet */
+        dccp_pr_debug("Received a dup or already considered lost "
+                      "packet: %llu\n", (unsigned long long) ackno);
+        return -EILSEQ;
+}
+#ifdef CONFIG_IP_DCCP_DEBUG
+void dccp_ackvector_print(const u64 ackno, const unsigned char *vector,
+                          int len)
+{
+        if (!dccp_debug)
+                return;
+        printk("ACK vector len=%d, ackno=%llu |", len,
+               (unsigned long long) ackno);
+        while (len--) {
+                const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
+                const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
+                printk("%d,%d|", state, rl);
+                ++vector;
+        }
+        printk("\n");
+}
+void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
+{
+        dccp_ackvector_print(ap->dccpap_buf_ackno,
+                             ap->dccpap_buf + ap->dccpap_buf_head,
+                             ap->dccpap_buf_vector_len);
+}
+#endif
+static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
+{
+        /*
+         * As we're keeping track of the ack vector size
+         * (dccpap_buf_vector_len) and the sent ack vector size
+         * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
+         * keep this code here as in the future we'll implement a vector of
+         * ack records, as suggested in draft-ietf-dccp-spec-11.txt
+         * Appendix A. -acme
+         */
+#if 0
+        ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
+        if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
+                ap->dccpap_buf_tail -= ap->dccpap_buf_len;
+#endif
+        ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
+}
+void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
+                                 u64 ackno)
+{
+        /* Check if we actually sent an ACK vector */
+        if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
+                return;
+        if (ackno == ap->dccpap_ack_seqno) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+                struct dccp_sock *dp = dccp_sk(sk);
+                const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                        "CLIENT rx ack: " : "server rx ack: ";
+#endif
+                dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
+                              "ack_ackno=%llu, ACKED!\n",
+                              debug_prefix, 1,
+                              (unsigned long long) ap->dccpap_ack_seqno,
+                              (unsigned long long) ap->dccpap_ack_ackno);
+                dccp_ackpkts_trow_away_ack_record(ap);
+                ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+        }
+}
+static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
+                                             struct sock *sk, u64 ackno,
+                                             const unsigned char len,
+                                             const unsigned char *vector)
+{
+        unsigned char i;
+        /* Check if we actually sent an ACK vector */
+        if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
+                return;
+        /*
+         * We're in the receiver half connection, so if the received an ACK
+         * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're
+         * not interested.
+         *
+         * Extra explanation with example:
+         * 
+         * if we received an ACK vector with ackno 50, it can only be acking
+         * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
+         */
+        /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */
+        if (before48(ackno, ap->dccpap_ack_seqno)) {
+                /* dccp_pr_debug_cat("yes\n"); */
+                return;
+        }
+        /* dccp_pr_debug_cat("no\n"); */
+        i = len;
+        while (i--) {
+                const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
+                u64 ackno_end_rl;
+                dccp_set_seqno(&ackno_end_rl, ackno - rl);
+                /*
+                 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
+                 * ap->dccpap_ack_seqno, ackno);
+                 */
+                if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
+                        const u8 state = (*vector &
+                                          DCCP_ACKPKTS_STATE_MASK) >> 6;
+                        /* dccp_pr_debug_cat("yes\n"); */
+                        if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+                                struct dccp_sock *dp = dccp_sk(sk);
+                                const char *debug_prefix =
+                                        dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                        "CLIENT rx ack: " : "server rx ack: ";
+#endif
+                                dccp_pr_debug("%sACK vector 0, len=%d, "
+                                              "ack_seqno=%llu, ack_ackno=%llu, "
+                                              "ACKED!\n",
+                                              debug_prefix, len,
+                                              (unsigned long long)
+                                              ap->dccpap_ack_seqno,
+                                              (unsigned long long)
+                                              ap->dccpap_ack_ackno);
+                                dccp_ackpkts_trow_away_ack_record(ap);
+                        }
+                        /*
+                         * If dccpap_ack_seqno was not received, no problem
+                         * we'll send another ACK vector.
+                         */
+                        ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+                        break;
+                }
+                /* dccp_pr_debug_cat("no\n"); */
+                dccp_set_seqno(&ackno, ackno_end_rl - 1);
+                ++vector;
+        }
+}
diff --git a/net/dccp/output.c b/net/dccp/output.c
new file mode 100644
index 000000000000..28de157a4326
--- /dev/null
+++ b/net/dccp/output.c
@@ -0,0 +1,528 @@
+/*
+ *  net/dccp/output.c
+ * 
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include "ccid.h"
+#include "dccp.h"
+static inline void dccp_event_ack_sent(struct sock *sk)
+{
+        inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+}
+/*
+ * All SKB's seen here are completely headerless. It is our
+ * job to build the DCCP header, and pass the packet down to
+ * IP so it can do the same plus pass the packet off to the
+ * device.
+ */
+int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+        if (likely(skb != NULL)) {
+                const struct inet_sock *inet = inet_sk(sk);
+                struct dccp_sock *dp = dccp_sk(sk);
+                struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+                struct dccp_hdr *dh;
+                /* XXX For now we're using only 48 bits sequence numbers */
+                const int dccp_header_size = sizeof(*dh) +
+                                             sizeof(struct dccp_hdr_ext) +
+                                          dccp_packet_hdr_len(dcb->dccpd_type);
+                int err, set_ack = 1;
+                u64 ackno = dp->dccps_gsr;
+                dccp_inc_seqno(&dp->dccps_gss);
+                switch (dcb->dccpd_type) {
+                case DCCP_PKT_DATA:
+                        set_ack = 0;
+                        break;
+                case DCCP_PKT_SYNC:
+                case DCCP_PKT_SYNCACK:
+                        ackno = dcb->dccpd_seq;
+                        break;
+                }
+                dcb->dccpd_seq = dp->dccps_gss;
+                dccp_insert_options(sk, skb);
+                
+                skb->h.raw = skb_push(skb, dccp_header_size);
+                dh = dccp_hdr(skb);
+                /*
+                 * Data packets are not cloned as they are never retransmitted
+                 */
+                if (skb_cloned(skb))
+                        skb_set_owner_w(skb, sk);
+                /* Build DCCP header and checksum it. */
+                memset(dh, 0, dccp_header_size);
+                dh->dccph_type  = dcb->dccpd_type;
+                dh->dccph_sport = inet->sport;
+                dh->dccph_dport = inet->dport;
+                dh->dccph_doff  = (dccp_header_size + dcb->dccpd_opt_len) / 4;
+                dh->dccph_ccval = dcb->dccpd_ccval;
+                /* XXX For now we're using only 48 bits sequence numbers */
+                dh->dccph_x     = 1;
+                dp->dccps_awh = dp->dccps_gss;
+                dccp_hdr_set_seq(dh, dp->dccps_gss);
+                if (set_ack)
+                        dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
+                switch (dcb->dccpd_type) {
+                case DCCP_PKT_REQUEST:
+                        dccp_hdr_request(skb)->dccph_req_service =
+                                                        dcb->dccpd_service;
+                        break;
+                case DCCP_PKT_RESET:
+                        dccp_hdr_reset(skb)->dccph_reset_code =
+                                                        dcb->dccpd_reset_code;
+                        break;
+                }
+                dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr,
+                                                      inet->daddr);
+                if (set_ack)
+                        dccp_event_ack_sent(sk);
+                DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+                err = ip_queue_xmit(skb, 0);
+                if (err <= 0)
+                        return err;
+                /* NET_XMIT_CN is special. It does not guarantee,
+                 * that this packet is lost. It tells that device
+                 * is about to start to drop packets or already
+                 * drops some packets of the same priority and
+                 * invokes us to send less aggressively.
+                 */
+                return err == NET_XMIT_CN ? 0 : err;
+        }
+        return -ENOBUFS;
+}
+unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        int mss_now;
+        /*
+         * FIXME: we really should be using the af_specific thing to support
+         *        IPv6.
+         * mss_now = pmtu - tp->af_specific->net_header_len -
+         *           sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
+         */
+        mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) -
+                  sizeof(struct dccp_hdr_ext);
+        /* Now subtract optional transport overhead */
+        mss_now -= dp->dccps_ext_header_len;
+        /*
+         * FIXME: this should come from the CCID infrastructure, where, say,
+         * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
+         * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
+         * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
+         * make it a multiple of 4
+         */
+        mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
+        /* And store cached results */
+        dp->dccps_pmtu_cookie = pmtu;
+        dp->dccps_mss_cache = mss_now;
+        return mss_now;
+}
+void dccp_write_space(struct sock *sk)
+{
+        read_lock(&sk->sk_callback_lock);
+        if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+                wake_up_interruptible(sk->sk_sleep);
+        /* Should agree with poll, otherwise some programs break */
+        if (sock_writeable(sk))
+                sk_wake_async(sk, 2, POLL_OUT);
+        read_unlock(&sk->sk_callback_lock);
+}
+/**
+ * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
+ * @sk: socket to wait for
+ * @timeo: for how long
+ */
+static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
+                              long *timeo)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        DEFINE_WAIT(wait);
+        long delay;
+        int rc;
+        while (1) {
+                prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+                if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+                        goto do_error;
+                if (!*timeo)
+                        goto do_nonblock;
+                if (signal_pending(current))
+                        goto do_interrupted;
+                rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
+                                            skb->len);
+                if (rc <= 0)
+                        break;
+                delay = msecs_to_jiffies(rc);
+                if (delay > *timeo || delay < 0)
+                        goto do_nonblock;
+                sk->sk_write_pending++;
+                release_sock(sk);
+                *timeo -= schedule_timeout(delay);
+                lock_sock(sk);
+                sk->sk_write_pending--;
+        }
+out:
+        finish_wait(sk->sk_sleep, &wait);
+        return rc;
+do_error:
+        rc = -EPIPE;
+        goto out;
+do_nonblock:
+        rc = -EAGAIN;
+        goto out;
+do_interrupted:
+        rc = sock_intr_errno(*timeo);
+        goto out;
+}
+int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
+{
+        const struct dccp_sock *dp = dccp_sk(sk);
+        int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
+                                         skb->len);
+        if (err > 0)
+                err = dccp_wait_for_ccid(sk, skb, timeo);
+        if (err == 0) {
+                const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+                struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+                const int len = skb->len;
+                if (sk->sk_state == DCCP_PARTOPEN) {
+                        /* See 8.1.5.  Handshake Completion */
+                        inet_csk_schedule_ack(sk);
+                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                  inet_csk(sk)->icsk_rto,
+                                                  DCCP_RTO_MAX);
+                        dcb->dccpd_type = DCCP_PKT_DATAACK;
+                        /*
+                         * FIXME: we really should have a
+                         * dccps_ack_pending or use icsk.
+                         */
+                } else if (inet_csk_ack_scheduled(sk) ||
+                           dp->dccps_timestamp_echo != 0 ||
+                           (dp->dccps_options.dccpo_send_ack_vector &&
+                            ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
+                            ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
+                        dcb->dccpd_type = DCCP_PKT_DATAACK;
+                else
+                        dcb->dccpd_type = DCCP_PKT_DATA;
+                err = dccp_transmit_skb(sk, skb);
+                ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+        }
+        return err;
+}
+int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+        if (inet_sk_rebuild_header(sk) != 0)
+                return -EHOSTUNREACH; /* Routing failure or similar. */
+        return dccp_transmit_skb(sk, (skb_cloned(skb) ?
+                                      pskb_copy(skb, GFP_ATOMIC):
+                                      skb_clone(skb, GFP_ATOMIC)));
+}
+struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+                                   struct request_sock *req)
+{
+        struct dccp_hdr *dh;
+        const int dccp_header_size = sizeof(struct dccp_hdr) +
+                                     sizeof(struct dccp_hdr_ext) +
+                                     sizeof(struct dccp_hdr_response);
+        struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
+                                               dccp_header_size, 1,
+                                           GFP_ATOMIC);
+        if (skb == NULL)
+                return NULL;
+        /* Reserve space for headers. */
+        skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+        skb->dst = dst_clone(dst);
+        skb->csum = 0;
+        DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
+        DCCP_SKB_CB(skb)->dccpd_seq  = dccp_rsk(req)->dreq_iss;
+        dccp_insert_options(sk, skb);
+        skb->h.raw = skb_push(skb, dccp_header_size);
+        dh = dccp_hdr(skb);
+        memset(dh, 0, dccp_header_size);
+        dh->dccph_sport = inet_sk(sk)->sport;
+        dh->dccph_dport = inet_rsk(req)->rmt_port;
+        dh->dccph_doff  = (dccp_header_size +
+                           DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
+        dh->dccph_type  = DCCP_PKT_RESPONSE;
+        dh->dccph_x     = 1;
+        dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
+        dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
+        dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
+                                              inet_rsk(req)->rmt_addr);
+        DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+        return skb;
+}
+struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
+                                const enum dccp_reset_codes code)
+                                   
+{
+        struct dccp_hdr *dh;
+        struct dccp_sock *dp = dccp_sk(sk);
+        const int dccp_header_size = sizeof(struct dccp_hdr) +
+                                     sizeof(struct dccp_hdr_ext) +
+                                     sizeof(struct dccp_hdr_reset);
+        struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
+                                               dccp_header_size, 1,
+                                           GFP_ATOMIC);
+        if (skb == NULL)
+                return NULL;
+        /* Reserve space for headers. */
+        skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+        skb->dst = dst_clone(dst);
+        skb->csum = 0;
+        dccp_inc_seqno(&dp->dccps_gss);
+        DCCP_SKB_CB(skb)->dccpd_reset_code = code;
+        DCCP_SKB_CB(skb)->dccpd_type       = DCCP_PKT_RESET;
+        DCCP_SKB_CB(skb)->dccpd_seq        = dp->dccps_gss;
+        dccp_insert_options(sk, skb);
+        skb->h.raw = skb_push(skb, dccp_header_size);
+        dh = dccp_hdr(skb);
+        memset(dh, 0, dccp_header_size);
+        dh->dccph_sport = inet_sk(sk)->sport;
+        dh->dccph_dport = inet_sk(sk)->dport;
+        dh->dccph_doff  = (dccp_header_size +
+                           DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
+        dh->dccph_type  = DCCP_PKT_RESET;
+        dh->dccph_x     = 1;
+        dccp_hdr_set_seq(dh, dp->dccps_gss);
+        dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
+        dccp_hdr_reset(skb)->dccph_reset_code = code;
+        dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr,
+                                              inet_sk(sk)->daddr);
+        DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+        return skb;
+}
+/*
+ * Do all connect socket setups that can be done AF independent.
+ */
+static inline void dccp_connect_init(struct sock *sk)
+{
+        struct dst_entry *dst = __sk_dst_get(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        sk->sk_err = 0;
+        sock_reset_flag(sk, SOCK_DONE);
+        
+        dccp_sync_mss(sk, dst_mtu(dst));
+        /*
+         * FIXME: set dp->{dccps_swh,dccps_swl}, with
+         * something like dccp_inc_seq
+         */
+        icsk->icsk_retransmits = 0;
+}
+int dccp_connect(struct sock *sk)
+{
+        struct sk_buff *skb;
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        dccp_connect_init(sk);
+        skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
+        if (unlikely(skb == NULL))
+                return -ENOBUFS;
+        /* Reserve space for headers. */
+        skb_reserve(skb, MAX_DCCP_HEADER);
+        DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
+        /* FIXME: set service to something meaningful, coming
+         * from userspace*/
+        DCCP_SKB_CB(skb)->dccpd_service = 0;
+        skb->csum = 0;
+        skb_set_owner_w(skb, sk);
+        BUG_TRAP(sk->sk_send_head == NULL);
+        sk->sk_send_head = skb;
+        dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
+        DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
+        /* Timer for repeating the REQUEST until an answer. */
+        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                  icsk->icsk_rto, DCCP_RTO_MAX);
+        return 0;
+}
+void dccp_send_ack(struct sock *sk)
+{
+        /* If we have been reset, we may not send again. */
+        if (sk->sk_state != DCCP_CLOSED) {
+                struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+                if (skb == NULL) {
+                        inet_csk_schedule_ack(sk);
+                        inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
+                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                  TCP_DELACK_MAX,
+                                                  DCCP_RTO_MAX);
+                        return;
+                }
+                /* Reserve space for headers */
+                skb_reserve(skb, MAX_DCCP_HEADER);
+                skb->csum = 0;
+                DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
+                skb_set_owner_w(skb, sk);
+                dccp_transmit_skb(sk, skb);
+        }
+}
+EXPORT_SYMBOL_GPL(dccp_send_ack);
+void dccp_send_delayed_ack(struct sock *sk)
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        /*
+         * FIXME: tune this timer. elapsed time fixes the skew, so no problem
+         * with using 2s, and active senders also piggyback the ACK into a
+         * DATAACK packet, so this is really for quiescent senders.
+         */
+        unsigned long timeout = jiffies + 2 * HZ;
+        /* Use new timeout only if there wasn't a older one earlier. */
+        if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
+                /* If delack timer was blocked or is about to expire,
+                 * send ACK now.
+                 *
+                 * FIXME: check the "about to expire" part
+                 */
+                if (icsk->icsk_ack.blocked) {
+                        dccp_send_ack(sk);
+                        return;
+                }
+                if (!time_before(timeout, icsk->icsk_ack.timeout))
+                        timeout = icsk->icsk_ack.timeout;
+        }
+        icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+        icsk->icsk_ack.timeout = timeout;
+        sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
+}
+void dccp_send_sync(struct sock *sk, const u64 seq,
+                    const enum dccp_pkt_type pkt_type)
+{
+        /*
+         * We are not putting this on the write queue, so
+         * dccp_transmit_skb() will set the ownership to this
+         * sock.
+         */
+        struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+        if (skb == NULL)
+                /* FIXME: how to make sure the sync is sent? */
+                return;
+        /* Reserve space for headers and prepare control bits. */
+        skb_reserve(skb, MAX_DCCP_HEADER);
+        skb->csum = 0;
+        DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
+        DCCP_SKB_CB(skb)->dccpd_seq = seq;
+        skb_set_owner_w(skb, sk);
+        dccp_transmit_skb(sk, skb);
+}
+/*
+ * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
+ * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
+ * any circumstances.
+ */
+void dccp_send_close(struct sock *sk, const int active)
+{
+        struct dccp_sock *dp = dccp_sk(sk);
+        struct sk_buff *skb;
+        const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC;
+        skb = alloc_skb(sk->sk_prot->max_header, prio);
+        if (skb == NULL)
+                return;
+        /* Reserve space for headers and prepare control bits. */
+        skb_reserve(skb, sk->sk_prot->max_header);
+        skb->csum = 0;
+        DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
+                                        DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
+        skb_set_owner_w(skb, sk);
+        if (active) {
+                BUG_TRAP(sk->sk_send_head == NULL);
+                sk->sk_send_head = skb;
+                dccp_transmit_skb(sk, skb_clone(skb, prio));
+        } else
+                dccp_transmit_skb(sk, skb);
+        ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+        ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
new file mode 100644
index 000000000000..18a0e69c9dc7
--- /dev/null
+++ b/net/dccp/proto.c
@@ -0,0 +1,826 @@
+/*
+ *  net/dccp/proto.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *      This program is free software; you can redistribute it and/or modify it
+ *      under the terms of the GNU General Public License version 2 as
+ *      published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <net/checksum.h>
+#include <net/inet_common.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <asm/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/poll.h>
+#include <linux/dccp.h>
+#include "ccid.h"
+#include "dccp.h"
+DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
+atomic_t dccp_orphan_count = ATOMIC_INIT(0);
+static struct net_protocol dccp_protocol = {
+        .handler        = dccp_v4_rcv,
+        .err_handler    = dccp_v4_err,
+};
+const char *dccp_packet_name(const int type)
+{
+        static const char *dccp_packet_names[] = {
+                [DCCP_PKT_REQUEST]  = "REQUEST",
+                [DCCP_PKT_RESPONSE] = "RESPONSE",
+                [DCCP_PKT_DATA]     = "DATA",
+                [DCCP_PKT_ACK]      = "ACK",
+                [DCCP_PKT_DATAACK]  = "DATAACK",
+                [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
+                [DCCP_PKT_CLOSE]    = "CLOSE",
+                [DCCP_PKT_RESET]    = "RESET",
+                [DCCP_PKT_SYNC]     = "SYNC",
+                [DCCP_PKT_SYNCACK]  = "SYNCACK",
+        };
+        if (type >= DCCP_NR_PKT_TYPES)
+                return "INVALID";
+        else
+                return dccp_packet_names[type];
+}
+EXPORT_SYMBOL_GPL(dccp_packet_name);
+const char *dccp_state_name(const int state)
+{
+        static char *dccp_state_names[] = {
+        [DCCP_OPEN]       = "OPEN",
+        [DCCP_REQUESTING] = "REQUESTING",
+        [DCCP_PARTOPEN]   = "PARTOPEN",
+        [DCCP_LISTEN]     = "LISTEN",
+        [DCCP_RESPOND]    = "RESPOND",
+        [DCCP_CLOSING]    = "CLOSING",
+        [DCCP_TIME_WAIT]  = "TIME_WAIT",
+        [DCCP_CLOSED]     = "CLOSED",
+        };
+        if (state >= DCCP_MAX_STATES)
+                return "INVALID STATE!";
+        else
+                return dccp_state_names[state];
+}
+EXPORT_SYMBOL_GPL(dccp_state_name);
+static inline int dccp_listen_start(struct sock *sk)
+{
+        dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
+        return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
+}
+int dccp_disconnect(struct sock *sk, int flags)
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        struct inet_sock *inet = inet_sk(sk);
+        int err = 0;
+        const int old_state = sk->sk_state;
+        if (old_state != DCCP_CLOSED)
+                dccp_set_state(sk, DCCP_CLOSED);
+        /* ABORT function of RFC793 */
+        if (old_state == DCCP_LISTEN) {
+                inet_csk_listen_stop(sk);
+        /* FIXME: do the active reset thing */
+        } else if (old_state == DCCP_REQUESTING)
+                sk->sk_err = ECONNRESET;
+        dccp_clear_xmit_timers(sk);
+        __skb_queue_purge(&sk->sk_receive_queue);
+        if (sk->sk_send_head != NULL) {
+                __kfree_skb(sk->sk_send_head);
+                sk->sk_send_head = NULL;
+        }
+        inet->dport = 0;
+        if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+                inet_reset_saddr(sk);
+        sk->sk_shutdown = 0;
+        sock_reset_flag(sk, SOCK_DONE);
+        icsk->icsk_backoff = 0;
+        inet_csk_delack_init(sk);
+        __sk_dst_reset(sk);
+        BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
+        sk->sk_error_report(sk);
+        return err;
+}
+/*
+ *      Wait for a DCCP event.
+ *
+ *      Note that we don't need to lock the socket, as the upper poll layers
+ *      take care of normal races (between the test and the event) and we don't
+ *      go look at any of the socket buffers directly.
+ */
+static unsigned int dccp_poll(struct file *file, struct socket *sock,
+                              poll_table *wait)
+{
+        unsigned int mask;
+        struct sock *sk = sock->sk;
+        poll_wait(file, sk->sk_sleep, wait);
+        if (sk->sk_state == DCCP_LISTEN)
+                return inet_csk_listen_poll(sk);
+        /* Socket is not locked. We are protected from async events
+           by poll logic and correct handling of state changes
+           made by another threads is impossible in any case.
+         */
+        mask = 0;
+        if (sk->sk_err)
+                mask = POLLERR;
+        if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
+                mask |= POLLHUP;
+        if (sk->sk_shutdown & RCV_SHUTDOWN)
+                mask |= POLLIN | POLLRDNORM;
+        /* Connected? */
+        if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+                if (atomic_read(&sk->sk_rmem_alloc) > 0)
+                        mask |= POLLIN | POLLRDNORM;
+                if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+                        if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+                                mask |= POLLOUT | POLLWRNORM;
+                        } else {  /* send SIGIO later */
+                                set_bit(SOCK_ASYNC_NOSPACE,
+                                        &sk->sk_socket->flags);
+                                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+                                /* Race breaker. If space is freed after
+                                 * wspace test but before the flags are set,
+                                 * IO signal will be lost.
+                                 */
+                                if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+                                        mask |= POLLOUT | POLLWRNORM;
+                        }
+                }
+        }
+        return mask;
+}
+int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+        dccp_pr_debug("entry\n");
+        return -ENOIOCTLCMD;
+}
+int dccp_setsockopt(struct sock *sk, int level, int optname,
+                    char __user *optval, int optlen)
+{
+        struct dccp_sock *dp;
+        int err;
+        int val;
+        if (level != SOL_DCCP)
+                return ip_setsockopt(sk, level, optname, optval, optlen);
+        if (optlen < sizeof(int))
+                return -EINVAL;
+        if (get_user(val, (int __user *)optval))
+                return -EFAULT;
+        lock_sock(sk);
+        dp = dccp_sk(sk);
+        err = 0;
+        switch (optname) {
+        case DCCP_SOCKOPT_PACKET_SIZE:
+                dp->dccps_packet_size = val;
+                break;
+        default:
+                err = -ENOPROTOOPT;
+                break;
+        }
+        
+        release_sock(sk);
+        return err;
+}
+int dccp_getsockopt(struct sock *sk, int level, int optname,
+                    char __user *optval, int __user *optlen)
+{
+        struct dccp_sock *dp;
+        int val, len;
+        if (level != SOL_DCCP)
+                return ip_getsockopt(sk, level, optname, optval, optlen);
+        if (get_user(len, optlen))
+                return -EFAULT;
+        len = min_t(unsigned int, len, sizeof(int));
+        if (len < 0)
+                return -EINVAL;
+        dp = dccp_sk(sk);
+        switch (optname) {
+        case DCCP_SOCKOPT_PACKET_SIZE:
+                val = dp->dccps_packet_size;
+                break;
+        default:
+                return -ENOPROTOOPT;
+        }
+        if (put_user(len, optlen) || copy_to_user(optval, &val, len))
+                return -EFAULT;
+        return 0;
+}
+int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+                 size_t len)
+{
+        const struct dccp_sock *dp = dccp_sk(sk);
+        const int flags = msg->msg_flags;
+        const int noblock = flags & MSG_DONTWAIT;
+        struct sk_buff *skb;
+        int rc, size;
+        long timeo;
+        if (len > dp->dccps_mss_cache)
+                return -EMSGSIZE;
+        lock_sock(sk);
+        timeo = sock_sndtimeo(sk, noblock);
+        /*
+         * We have to use sk_stream_wait_connect here to set sk_write_pending,
+         * so that the trick in dccp_rcv_request_sent_state_process.
+         */
+        /* Wait for a connection to finish. */
+        if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
+                if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
+                        goto out_release;
+        size = sk->sk_prot->max_header + len;
+        release_sock(sk);
+        skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+        lock_sock(sk);
+        if (skb == NULL)
+                goto out_release;
+        skb_reserve(skb, sk->sk_prot->max_header);
+        rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+        if (rc != 0)
+                goto out_discard;
+        rc = dccp_write_xmit(sk, skb, &timeo);
+        /*
+         * XXX we don't use sk_write_queue, so just discard the packet.
+         *     Current plan however is to _use_ sk_write_queue with
+         *     an algorith similar to tcp_sendmsg, where the main difference
+         *     is that in DCCP we have to respect packet boundaries, so
+         *     no coalescing of skbs.
+         *
+         *     This bug was _quickly_ found & fixed by just looking at an OSTRA
+         *     generated callgraph 8) -acme
+         */
+        if (rc != 0)
+                goto out_discard;
+out_release:
+        release_sock(sk);
+        return rc ? : len;
+out_discard:
+        kfree_skb(skb);
+        goto out_release;
+}
+int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+                 size_t len, int nonblock, int flags, int *addr_len)
+{
+        const struct dccp_hdr *dh;
+        long timeo;
+        lock_sock(sk);
+        if (sk->sk_state == DCCP_LISTEN) {
+                len = -ENOTCONN;
+                goto out;
+        }
+        timeo = sock_rcvtimeo(sk, nonblock);
+        do {
+                struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+                if (skb == NULL)
+                        goto verify_sock_status;
+                dh = dccp_hdr(skb);
+                if (dh->dccph_type == DCCP_PKT_DATA ||
+                    dh->dccph_type == DCCP_PKT_DATAACK)
+                        goto found_ok_skb;
+                if (dh->dccph_type == DCCP_PKT_RESET ||
+                    dh->dccph_type == DCCP_PKT_CLOSE) {
+                        dccp_pr_debug("found fin ok!\n");
+                        len = 0;
+                        goto found_fin_ok;
+                }
+                dccp_pr_debug("packet_type=%s\n",
+                              dccp_packet_name(dh->dccph_type));
+                sk_eat_skb(sk, skb);
+verify_sock_status:
+                if (sock_flag(sk, SOCK_DONE)) {
+                        len = 0;
+                        break;
+                }
+                if (sk->sk_err) {
+                        len = sock_error(sk);
+                        break;
+                }
+                if (sk->sk_shutdown & RCV_SHUTDOWN) {
+                        len = 0;
+                        break;
+                }
+                if (sk->sk_state == DCCP_CLOSED) {
+                        if (!sock_flag(sk, SOCK_DONE)) {
+                                /* This occurs when user tries to read
+                                 * from never connected socket.
+                                 */
+                                len = -ENOTCONN;
+                                break;
+                        }
+                        len = 0;
+                        break;
+                }
+                if (!timeo) {
+                        len = -EAGAIN;
+                        break;
+                }
+                if (signal_pending(current)) {
+                        len = sock_intr_errno(timeo);
+                        break;
+                }
+                sk_wait_data(sk, &timeo);
+                continue;
+        found_ok_skb:
+                if (len > skb->len)
+                        len = skb->len;
+                else if (len < skb->len)
+                        msg->msg_flags |= MSG_TRUNC;
+                if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
+                        /* Exception. Bailout! */
+                        len = -EFAULT;
+                        break;
+                }
+        found_fin_ok:
+                if (!(flags & MSG_PEEK))
+                        sk_eat_skb(sk, skb);
+                break;
+        } while (1);
+out:
+        release_sock(sk);
+        return len;
+}
+static int inet_dccp_listen(struct socket *sock, int backlog)
+{
+        struct sock *sk = sock->sk;
+        unsigned char old_state;
+        int err;
+        lock_sock(sk);
+        err = -EINVAL;
+        if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
+                goto out;
+        old_state = sk->sk_state;
+        if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
+                goto out;
+        /* Really, if the socket is already in listen state
+         * we can only allow the backlog to be adjusted.
+         */
+        if (old_state != DCCP_LISTEN) {
+                /*
+                 * FIXME: here it probably should be sk->sk_prot->listen_start
+                 * see tcp_listen_start
+                 */
+                err = dccp_listen_start(sk);
+                if (err)
+                        goto out;
+        }
+        sk->sk_max_ack_backlog = backlog;
+        err = 0;
+out:
+        release_sock(sk);
+        return err;
+}
+static const unsigned char dccp_new_state[] = {
+        /* current state:   new state:      action:     */
+        [0]               = DCCP_CLOSED,
+        [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
+        [DCCP_REQUESTING] = DCCP_CLOSED,
+        [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
+        [DCCP_LISTEN]     = DCCP_CLOSED,
+        [DCCP_RESPOND]    = DCCP_CLOSED,
+        [DCCP_CLOSING]    = DCCP_CLOSED,
+        [DCCP_TIME_WAIT]  = DCCP_CLOSED,
+        [DCCP_CLOSED]     = DCCP_CLOSED,
+};
+static int dccp_close_state(struct sock *sk)
+{
+        const int next = dccp_new_state[sk->sk_state];
+        const int ns = next & DCCP_STATE_MASK;
+        if (ns != sk->sk_state)
+                dccp_set_state(sk, ns);
+        return next & DCCP_ACTION_FIN;
+}
+void dccp_close(struct sock *sk, long timeout)
+{
+        struct sk_buff *skb;
+        lock_sock(sk);
+        sk->sk_shutdown = SHUTDOWN_MASK;
+        if (sk->sk_state == DCCP_LISTEN) {
+                dccp_set_state(sk, DCCP_CLOSED);
+                /* Special case. */
+                inet_csk_listen_stop(sk);
+                goto adjudge_to_death;
+        }
+        /*
+         * We need to flush the recv. buffs.  We do this only on the
+         * descriptor close, not protocol-sourced closes, because the
+          *reader process may not have drained the data yet!
+         */
+        /* FIXME: check for unread data */
+        while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+                __kfree_skb(skb);
+        }
+        if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+                /* Check zero linger _after_ checking for unread data. */
+                sk->sk_prot->disconnect(sk, 0);
+        } else if (dccp_close_state(sk)) {
+                dccp_send_close(sk, 1);
+        }
+        sk_stream_wait_close(sk, timeout);
+adjudge_to_death:
+        /*
+         * It is the last release_sock in its life. It will remove backlog.
+         */
+        release_sock(sk);
+        /*
+         * Now socket is owned by kernel and we acquire BH lock
+         * to finish close. No need to check for user refs.
+         */
+        local_bh_disable();
+        bh_lock_sock(sk);
+        BUG_TRAP(!sock_owned_by_user(sk));
+        sock_hold(sk);
+        sock_orphan(sk);
+        /*
+         * The last release_sock may have processed the CLOSE or RESET
+         * packet moving sock to CLOSED state, if not we have to fire
+         * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
+         * in draft-ietf-dccp-spec-11. -acme
+         */
+        if (sk->sk_state == DCCP_CLOSING) {
+                /* FIXME: should start at 2 * RTT */
+                /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                          inet_csk(sk)->icsk_rto,
+                                          DCCP_RTO_MAX);
+#if 0
+                /* Yeah, we should use sk->sk_prot->orphan_count, etc */
+                dccp_set_state(sk, DCCP_CLOSED);
+#endif
+        }
+        atomic_inc(sk->sk_prot->orphan_count);
+        if (sk->sk_state == DCCP_CLOSED)
+                inet_csk_destroy_sock(sk);
+        /* Otherwise, socket is reprieved until protocol close. */
+        bh_unlock_sock(sk);
+        local_bh_enable();
+        sock_put(sk);
+}
+void dccp_shutdown(struct sock *sk, int how)
+{
+        dccp_pr_debug("entry\n");
+}
+static struct proto_ops inet_dccp_ops = {
+        .family         = PF_INET,
+        .owner          = THIS_MODULE,
+        .release        = inet_release,
+        .bind           = inet_bind,
+        .connect        = inet_stream_connect,
+        .socketpair     = sock_no_socketpair,
+        .accept         = inet_accept,
+        .getname        = inet_getname,
+        /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
+        .poll           = dccp_poll,
+        .ioctl          = inet_ioctl,
+        /* FIXME: work on inet_listen to rename it to sock_common_listen */
+        .listen         = inet_dccp_listen,
+        .shutdown       = inet_shutdown,
+        .setsockopt     = sock_common_setsockopt,
+        .getsockopt     = sock_common_getsockopt,
+        .sendmsg        = inet_sendmsg,
+        .recvmsg        = sock_common_recvmsg,
+        .mmap           = sock_no_mmap,
+        .sendpage       = sock_no_sendpage,
+};
+extern struct net_proto_family inet_family_ops;
+static struct inet_protosw dccp_v4_protosw = {
+        .type           = SOCK_DCCP,
+        .protocol       = IPPROTO_DCCP,
+        .prot           = &dccp_v4_prot,
+        .ops            = &inet_dccp_ops,
+        .capability     = -1,
+        .no_check       = 0,
+        .flags          = 0,
+};
+/*
+ * This is the global socket data structure used for responding to
+ * the Out-of-the-blue (OOTB) packets. A control sock will be created
+ * for this socket at the initialization time.
+ */
+struct socket *dccp_ctl_socket;
+static char dccp_ctl_socket_err_msg[] __initdata =
+        KERN_ERR "DCCP: Failed to create the control socket.\n";
+static int __init dccp_ctl_sock_init(void)
+{
+        int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
+                                  &dccp_ctl_socket);
+        if (rc < 0)
+                printk(dccp_ctl_socket_err_msg);
+        else {
+                dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
+                inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
+                /* Unhash it so that IP input processing does not even
+                 * see it, we do not wish this socket to see incoming
+                 * packets.
+                 */
+                dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
+        }
+        return rc;
+}
+#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
+void dccp_ctl_sock_exit(void)
+{
+        if (dccp_ctl_socket != NULL) {
+                sock_release(dccp_ctl_socket);
+                dccp_ctl_socket = NULL;
+        }
+}
+EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
+#endif
+static int __init init_dccp_v4_mibs(void)
+{
+        int rc = -ENOMEM;
+        dccp_statistics[0] = alloc_percpu(struct dccp_mib);
+        if (dccp_statistics[0] == NULL)
+                goto out;
+        dccp_statistics[1] = alloc_percpu(struct dccp_mib);
+        if (dccp_statistics[1] == NULL)
+                goto out_free_one;
+        rc = 0;
+out:
+        return rc;
+out_free_one:
+        free_percpu(dccp_statistics[0]);
+        dccp_statistics[0] = NULL;
+        goto out;
+}
+static int thash_entries;
+module_param(thash_entries, int, 0444);
+MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
+#ifdef CONFIG_IP_DCCP_DEBUG
+int dccp_debug;
+module_param(dccp_debug, int, 0444);
+MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
+#endif
+static int __init dccp_init(void)
+{
+        unsigned long goal;
+        int ehash_order, bhash_order, i;
+        int rc = proto_register(&dccp_v4_prot, 1);
+        if (rc)
+                goto out;
+        dccp_hashinfo.bind_bucket_cachep =
+                kmem_cache_create("dccp_bind_bucket",
+                                  sizeof(struct inet_bind_bucket), 0,
+                                  SLAB_HWCACHE_ALIGN, NULL, NULL);
+        if (!dccp_hashinfo.bind_bucket_cachep)
+                goto out_proto_unregister;
+        /*
+         * Size and allocate the main established and bind bucket
+         * hash tables.
+         *
+         * The methodology is similar to that of the buffer cache.
+         */
+        if (num_physpages >= (128 * 1024))
+                goal = num_physpages >> (21 - PAGE_SHIFT);
+        else
+                goal = num_physpages >> (23 - PAGE_SHIFT);
+        if (thash_entries)
+                goal = (thash_entries *
+                        sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
+        for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
+                ;
+        do {
+                dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
+                                        sizeof(struct inet_ehash_bucket);
+                dccp_hashinfo.ehash_size >>= 1;
+                while (dccp_hashinfo.ehash_size &
+                       (dccp_hashinfo.ehash_size - 1))
+                        dccp_hashinfo.ehash_size--;
+                dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
+                        __get_free_pages(GFP_ATOMIC, ehash_order);
+        } while (!dccp_hashinfo.ehash && --ehash_order > 0);
+        if (!dccp_hashinfo.ehash) {
+                printk(KERN_CRIT "Failed to allocate DCCP "
+                                 "established hash table\n");
+                goto out_free_bind_bucket_cachep;
+        }
+        for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
+                rwlock_init(&dccp_hashinfo.ehash[i].lock);
+                INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
+        }
+        bhash_order = ehash_order;
+        do {
+                dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
+                                        sizeof(struct inet_bind_hashbucket);
+                if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
+                    bhash_order > 0)
+                        continue;
+                dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
+                        __get_free_pages(GFP_ATOMIC, bhash_order);
+        } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
+        if (!dccp_hashinfo.bhash) {
+                printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
+                goto out_free_dccp_ehash;
+        }
+        for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
+                spin_lock_init(&dccp_hashinfo.bhash[i].lock);
+                INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
+        }
+        if (init_dccp_v4_mibs())
+                goto out_free_dccp_bhash;
+        rc = -EAGAIN;
+        if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
+                goto out_free_dccp_v4_mibs;
+        inet_register_protosw(&dccp_v4_protosw);
+        rc = dccp_ctl_sock_init();
+        if (rc)
+                goto out_unregister_protosw;
+out:
+        return rc;
+out_unregister_protosw:
+        inet_unregister_protosw(&dccp_v4_protosw);
+        inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
+out_free_dccp_v4_mibs:
+        free_percpu(dccp_statistics[0]);
+        free_percpu(dccp_statistics[1]);
+        dccp_statistics[0] = dccp_statistics[1] = NULL;
+out_free_dccp_bhash:
+        free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
+        dccp_hashinfo.bhash = NULL;
+out_free_dccp_ehash:
+        free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
+        dccp_hashinfo.ehash = NULL;
+out_free_bind_bucket_cachep:
+        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+        dccp_hashinfo.bind_bucket_cachep = NULL;
+out_proto_unregister:
+        proto_unregister(&dccp_v4_prot);
+        goto out;
+}
+static const char dccp_del_proto_err_msg[] __exitdata =
+        KERN_ERR "can't remove dccp net_protocol\n";
+static void __exit dccp_fini(void)
+{
+        inet_unregister_protosw(&dccp_v4_protosw);
+        if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
+                printk(dccp_del_proto_err_msg);
+        free_percpu(dccp_statistics[0]);
+        free_percpu(dccp_statistics[1]);
+        free_pages((unsigned long)dccp_hashinfo.bhash,
+                   get_order(dccp_hashinfo.bhash_size *
+                             sizeof(struct inet_bind_hashbucket)));
+        free_pages((unsigned long)dccp_hashinfo.ehash,
+                   get_order(dccp_hashinfo.ehash_size *
+                             sizeof(struct inet_ehash_bucket)));
+        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+        proto_unregister(&dccp_v4_prot);
+}
+module_init(dccp_init);
+module_exit(dccp_fini);
+/*
+ * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
+ * values directly, Also cover the case where the protocol is not specified,
+ * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
+ */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
+MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
new file mode 100644
index 000000000000..aa34b576e228
--- /dev/null
+++ b/net/dccp/timer.c
@@ -0,0 +1,255 @@
+/*
+ *  net/dccp/timer.c
+ * 
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+#include "dccp.h"
+static void dccp_write_timer(unsigned long data);
+static void dccp_keepalive_timer(unsigned long data);
+static void dccp_delack_timer(unsigned long data);
+void dccp_init_xmit_timers(struct sock *sk)
+{
+        inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
+                                  &dccp_keepalive_timer);
+}
+static void dccp_write_err(struct sock *sk)
+{
+        sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
+        sk->sk_error_report(sk);
+        dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+        dccp_done(sk);
+        DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
+}
+/* A write timeout has occurred. Process the after effects. */
+static int dccp_write_timeout(struct sock *sk)
+{
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        int retry_until;
+        if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
+                if (icsk->icsk_retransmits != 0)
+                        dst_negative_advice(&sk->sk_dst_cache);
+                retry_until = icsk->icsk_syn_retries ? :
+                            /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
+        } else {
+                if (icsk->icsk_retransmits >=
+                     /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
+                        /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu
+                           black hole detection. :-(
+                           It is place to make it. It is not made. I do not want
+                           to make it. It is disguisting. It does not work in any
+                           case. Let me to cite the same draft, which requires for
+                           us to implement this:
+   "The one security concern raised by this memo is that ICMP black holes
+   are often caused by over-zealous security administrators who block
+   all ICMP messages.  It is vitally important that those who design and
+   deploy security systems understand the impact of strict filtering on
+   upper-layer protocols.  The safest web site in the world is worthless
+   if most TCP implementations cannot transfer data from it.  It would
+   be far nicer to have all of the black holes fixed rather than fixing
+   all of the TCP implementations."
+                           Golden words :-).
+                   */
+                        dst_negative_advice(&sk->sk_dst_cache);
+                }
+                retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
+                /*
+                 * FIXME: see tcp_write_timout and tcp_out_of_resources
+                 */
+        }
+        if (icsk->icsk_retransmits >= retry_until) {
+                /* Has it gone just too far? */
+                dccp_write_err(sk);
+                return 1;
+        }
+        return 0;
+}
+/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
+static void dccp_delack_timer(unsigned long data)
+{
+        struct sock *sk = (struct sock *)data;
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        bh_lock_sock(sk);
+        if (sock_owned_by_user(sk)) {
+                /* Try again later. */
+                icsk->icsk_ack.blocked = 1;
+                NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+                sk_reset_timer(sk, &icsk->icsk_delack_timer,
+                               jiffies + TCP_DELACK_MIN);
+                goto out;
+        }
+        if (sk->sk_state == DCCP_CLOSED ||
+            !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+                goto out;
+        if (time_after(icsk->icsk_ack.timeout, jiffies)) {
+                sk_reset_timer(sk, &icsk->icsk_delack_timer,
+                               icsk->icsk_ack.timeout);
+                goto out;
+        }
+        icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
+        if (inet_csk_ack_scheduled(sk)) {
+                if (!icsk->icsk_ack.pingpong) {
+                        /* Delayed ACK missed: inflate ATO. */
+                        icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
+                                                 icsk->icsk_rto);
+                } else {
+                        /* Delayed ACK missed: leave pingpong mode and
+                         * deflate ATO.
+                         */
+                        icsk->icsk_ack.pingpong = 0;
+                        icsk->icsk_ack.ato = TCP_ATO_MIN;
+                }
+                dccp_send_ack(sk);
+                NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+        }
+out:
+        bh_unlock_sock(sk);
+        sock_put(sk);
+}
+/*
+ *      The DCCP retransmit timer.
+ */
+static void dccp_retransmit_timer(struct sock *sk)
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        /*
+         * sk->sk_send_head has to have one skb with
+         * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
+         * packet types (REQUEST, RESPONSE, the ACK in the 3way handshake
+         * (PARTOPEN timer), etc).
+         */
+        BUG_TRAP(sk->sk_send_head != NULL);
+        /* 
+         * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
+         * sent, no need to retransmit, this sock is dead.
+         */
+        if (dccp_write_timeout(sk))
+                goto out;
+        /*
+         * We want to know the number of packets retransmitted, not the
+         * total number of retransmissions of clones of original packets.
+         */
+        if (icsk->icsk_retransmits == 0)
+                DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
+        if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
+                /*
+                 * Retransmission failed because of local congestion,
+                 * do not backoff.
+                 */
+                if (icsk->icsk_retransmits == 0)
+                        icsk->icsk_retransmits = 1;
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                          min(icsk->icsk_rto,
+                                              TCP_RESOURCE_PROBE_INTERVAL),
+                                          DCCP_RTO_MAX);
+                goto out;
+        }
+        icsk->icsk_backoff++;
+        icsk->icsk_retransmits++;
+        icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
+        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
+                                  DCCP_RTO_MAX);
+        if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
+                __sk_dst_reset(sk);
+out:;
+}
+static void dccp_write_timer(unsigned long data)
+{
+        struct sock *sk = (struct sock *)data;
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        int event = 0;
+        bh_lock_sock(sk);
+        if (sock_owned_by_user(sk)) {
+                /* Try again later */
+                sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
+                               jiffies + (HZ / 20));
+                goto out;
+        }
+        if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
+                goto out;
+        if (time_after(icsk->icsk_timeout, jiffies)) {
+                sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
+                               icsk->icsk_timeout);
+                goto out;
+        }
+        event = icsk->icsk_pending;
+        icsk->icsk_pending = 0;
+        switch (event) {
+        case ICSK_TIME_RETRANS:
+                dccp_retransmit_timer(sk);
+                break;
+        }
+out:
+        bh_unlock_sock(sk);
+        sock_put(sk);
+}
+/*
+ *      Timer for listening sockets
+ */
+static void dccp_response_timer(struct sock *sk)
+{
+        inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT,
+                                   DCCP_RTO_MAX);
+}
+static void dccp_keepalive_timer(unsigned long data)
+{
+        struct sock *sk = (struct sock *)data;
+        /* Only process if socket is not in use. */
+        bh_lock_sock(sk);
+        if (sock_owned_by_user(sk)) {
+                /* Try again later. */ 
+                inet_csk_reset_keepalive_timer(sk, HZ / 20);
+                goto out;
+        }
+        if (sk->sk_state == DCCP_LISTEN) {
+                dccp_response_timer(sk);
+                goto out;
+        }
+out:
+        bh_unlock_sock(sk);
+        sock_put(sk);
+}
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 2101da542ba8..92f2ec46fd22 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -1,6 +1,29 @@
 #
 # DECnet configuration
 #
+config DECNET
+        tristate "DECnet Support"
+        ---help---
+          The DECnet networking protocol was used in many products made by
+          Digital (now Compaq).  It provides reliable stream and sequenced
+          packet communications over which run a variety of services similar
+          to those which run over TCP/IP.
+          To find some tools to use with the kernel layer support, please
+          look at Patrick Caulfield's web site:
+          <http://linux-decnet.sourceforge.net/>.
+          More detailed documentation is available in
+          <file:Documentation/networking/decnet.txt>.
+          Be sure to say Y to "/proc file system support" and "Sysctl support"
+          below when using DECnet, since you will need sysctl support to aid
+          in configuration at run time.
+          The DECnet code is also available as a module ( = code which can be
+          inserted in and removed from the running kernel whenever you want).
+          The module is called decnet.
 config DECNET_ROUTER
        bool "DECnet: router support (EXPERIMENTAL)"
        depends on DECNET && EXPERIMENTAL
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 96a02800cd28..348f36b529f7 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -118,7 +118,7 @@ Version 0.0.6    2.1.110   07-aug-98   Eduardo Marcelo Serrat
 #include <linux/netfilter.h>
 #include <linux/seq_file.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <net/flow.h>
 #include <asm/system.h>
 #include <asm/ioctls.h>
@@ -1763,7 +1763,7 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
                nskb = skb->next;
                if (skb->len == 0) {
-                        skb_unlink(skb);
+                        skb_unlink(skb, queue);
                        kfree_skb(skb);
                        /* 
                         * N.B. Don't refer to skb or cb after this point
@@ -1876,17 +1876,27 @@ static inline unsigned int dn_current_mss(struct sock *sk, int flags)
        return mss_now;
 }
-static int dn_error(struct sock *sk, int flags, int err)
+/* 
+ * N.B. We get the timeout wrong here, but then we always did get it
+ * wrong before and this is another step along the road to correcting
+ * it. It ought to get updated each time we pass through the routine,
+ * but in practise it probably doesn't matter too much for now.
+ */
+static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
+                              unsigned long datalen, int noblock,
+                              int *errcode)
 {
-        if (err == -EPIPE)
+        struct sk_buff *skb = sock_alloc_send_skb(sk, datalen,
-                err = sock_error(sk) ? : -EPIPE;
+                                                   noblock, errcode);
-        if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
+        if (skb) {
-                send_sig(SIGPIPE, current, 0);
+                skb->protocol = __constant_htons(ETH_P_DNA_RT);
-        return err;
+                skb->pkt_type = PACKET_OUTGOING;
+        }
+        return skb;
 }
 static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
-           struct msghdr *msg, size_t size)
+                      struct msghdr *msg, size_t size)
 {
        struct sock *sk = sock->sk;
        struct dn_scp *scp = DN_SK(sk);
@@ -1901,7 +1911,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
        struct dn_skb_cb *cb;
        size_t len;
        unsigned char fctype;
-        long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+        long timeo;
        if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT))
                return -EOPNOTSUPP;
@@ -1909,18 +1919,21 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
        if (addr_len && (addr_len != sizeof(struct sockaddr_dn)))
                return -EINVAL;
+        lock_sock(sk);
+        timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
        /*
         * The only difference between stream sockets and sequenced packet
         * sockets is that the stream sockets always behave as if MSG_EOR
         * has been set.
         */
        if (sock->type == SOCK_STREAM) {
-                if (flags & MSG_EOR)
+                if (flags & MSG_EOR) {
-                        return -EINVAL;
+                        err = -EINVAL;
+                        goto out;
+                }
                flags |= MSG_EOR;
        }
-        lock_sock(sk);
        err = dn_check_state(sk, addr, addr_len, &timeo, flags);
        if (err)
@@ -1989,8 +2002,12 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
                /*
                 * Get a suitably sized skb.
+                 * 64 is a bit of a hack really, but its larger than any
+                 * link-layer headers and has served us well as a good
+                 * guess as to their real length.
                 */
-                skb = dn_alloc_send_skb(sk, &len, flags & MSG_DONTWAIT, timeo, &err);
+                skb = dn_alloc_send_pskb(sk, len + 64 + DN_MAX_NSP_DATA_HEADER,
+                                         flags & MSG_DONTWAIT, &err);
                if (err)
                        break;
@@ -2000,7 +2017,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
                cb = DN_SKB_CB(skb);
-                skb_reserve(skb, DN_MAX_NSP_DATA_HEADER);
+                skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER);
                if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
                        err = -EFAULT;
@@ -2045,7 +2062,7 @@ out:
        return sent ? sent : err;
 out_err:
-        err = dn_error(sk, flags, err);
+        err = sk_stream_error(sk, flags, err);
        release_sock(sk);
        return err;
 }
@@ -2073,7 +2090,7 @@ static struct notifier_block dn_dev_notifier = {
        .notifier_call = dn_device_event,
 };
-extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *);
+extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
 static struct packet_type dn_dix_packet_type = {
        .type =         __constant_htons(ETH_P_DNA_RT),
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 00233ecbc9cb..5610bb16dbf9 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -752,16 +752,16 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
        skb = alloc_skb(size, GFP_KERNEL);
        if (!skb) {
-                netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS);
+                netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS);
                return;
        }
        if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
                kfree_skb(skb);
-                netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL);
+                netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL);
                return;
        }
-        NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_IFADDR;
+        NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR;
-        netlink_broadcast(rtnl, skb, 0, RTMGRP_DECnet_IFADDR, GFP_KERNEL);
+        netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL);
 }
 static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index f32dba9e26fe..8d0cc3cf3e49 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -148,12 +148,12 @@ static int dn_neigh_construct(struct neighbour *neigh)
        __neigh_parms_put(neigh->parms);
        neigh->parms = neigh_parms_clone(parms);
-        rcu_read_unlock();
        if (dn_db->use_long)
                neigh->ops = &dn_long_ops;
        else
                neigh->ops = &dn_short_ops;
+        rcu_read_unlock();
        if (dn->flags & DN_NDFLAG_P3)
                neigh->ops = &dn_phase3_ops;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 202dbde9850d..369f25b60f3f 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -60,7 +60,7 @@
 #include <linux/inet.h>
 #include <linux/route.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 8cce1fdbda90..53633d352868 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -137,69 +137,6 @@ struct sk_buff *dn_alloc_skb(struct sock *sk, int size, int pri)
 }
 /*
- * Wrapper for the above, for allocs of data skbs. We try and get the
- * whole size thats been asked for (plus 11 bytes of header). If this
- * fails, then we try for any size over 16 bytes for SOCK_STREAMS.
- */
-struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int noblock, long timeo, int *err)
-{
-        int space;
-        int len;
-        struct sk_buff *skb = NULL;
-        *err = 0;
-        while(skb == NULL) {
-                if (signal_pending(current)) {
-                        *err = sock_intr_errno(timeo);
-                        break;
-                }
-                if (sk->sk_shutdown & SEND_SHUTDOWN) {
-                        *err = EINVAL;
-                        break;
-                }
-                if (sk->sk_err)
-                        break;
-                len = *size + 11;
-                space = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
-                if (space < len) {
-                        if ((sk->sk_socket->type == SOCK_STREAM) &&
-                            (space >= (16 + 11)))
-                                len = space;
-                }
-                if (space < len) {
-                        set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-                        if (noblock) {
-                                *err = EWOULDBLOCK;
-                                break;
-                        }
-                        clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-                        SOCK_SLEEP_PRE(sk)
-                        if ((sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc)) <
-                            len)
-                                schedule();
-                        SOCK_SLEEP_POST(sk)
-                        continue;
-                }
-                if ((skb = dn_alloc_skb(sk, len, sk->sk_allocation)) == NULL)
-                        continue;
-                *size = len - 11;
-        }
-        return skb;
-}
-/*
 * Calculate persist timer based upon the smoothed round
 * trip time and the variance. Backoff according to the
 * nsp_backoff[] array.
@@ -479,7 +416,7 @@ int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff
                xmit_count = cb2->xmit_count;
                segnum = cb2->segnum;
                /* Remove and drop ack'ed packet */
-                skb_unlink(ack);
+                skb_unlink(ack, q);
                kfree_skb(ack);
                ack = NULL;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 2399fa8a3f86..2c915f305be3 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -572,7 +572,7 @@ static int dn_route_ptp_hello(struct sk_buff *skb)
        return NET_RX_SUCCESS;
 }
-int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct dn_skb_cb *cb;
        unsigned char flags = 0;
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 28ba5777a25a..eeba56f99323 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n
 static DEFINE_RWLOCK(dn_fib_tables_lock);
 struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1];
-static kmem_cache_t *dn_hash_kmem;
+static kmem_cache_t *dn_hash_kmem __read_mostly;
 static int dn_fib_hash_zombies;
 static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
@@ -349,10 +349,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
                kfree_skb(skb);
                return;
        }
-        NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_ROUTE;
+        NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE;
        if (nlh->nlmsg_flags & NLM_F_ECHO)
                atomic_inc(&skb->users);
-        netlink_broadcast(rtnl, skb, pid, RTMGRP_DECnet_ROUTE, GFP_KERNEL);
+        netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL);
        if (nlh->nlmsg_flags & NLM_F_ECHO)
                netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
 }
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 284a9998e53d..1ab94c6e22ed 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -19,6 +19,7 @@
 #include <linux/netfilter.h>
 #include <linux/spinlock.h>
 #include <linux/netlink.h>
+#include <linux/netfilter_decnet.h>
 #include <net/sock.h>
 #include <net/flow.h>
@@ -71,10 +72,10 @@ static void dnrmg_send_peer(struct sk_buff *skb)
        switch(flags & DN_RT_CNTL_MSK) {
                case DN_RT_PKT_L1RT:
-                        group = DNRMG_L1_GROUP;
+                        group = DNRNG_NLGRP_L1;
                        break;
                case DN_RT_PKT_L2RT:
-                        group = DNRMG_L2_GROUP;
+                        group = DNRNG_NLGRP_L2;
                        break;
                default:
                        return;
@@ -83,7 +84,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
        skb2 = dnrmg_build_message(skb, &status);
        if (skb2 == NULL)
                return;
-        NETLINK_CB(skb2).dst_groups = group;
+        NETLINK_CB(skb2).dst_group = group;
        netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC);
 }
@@ -138,7 +139,8 @@ static int __init init(void)
 {
        int rv = 0;
-        dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk);
+        dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
+                                      dnrmg_receive_user_sk, THIS_MODULE);
        if (dnrmg == NULL) {
                printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
                return -ENOMEM;
@@ -162,6 +164,7 @@ static void __exit fini(void)
 MODULE_DESCRIPTION("DECnet Routing Message Grabulator");
 MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG);
 module_init(init);
 module_exit(fini);
diff --git a/net/econet/Kconfig b/net/econet/Kconfig
new file mode 100644
index 000000000000..39a2d2975e0e
--- /dev/null
+++ b/net/econet/Kconfig
@@ -0,0 +1,36 @@
+#
+# Acorn Econet/AUN protocols 
+#
+config ECONET
+        tristate "Acorn Econet/AUN protocols (EXPERIMENTAL)"
+        depends on EXPERIMENTAL && INET
+        ---help---
+          Econet is a fairly old and slow networking protocol mainly used by
+          Acorn computers to access file and print servers. It uses native
+          Econet network cards. AUN is an implementation of the higher level
+          parts of Econet that runs over ordinary Ethernet connections, on
+          top of the UDP packet protocol, which in turn runs on top of the
+          Internet protocol IP.
+          If you say Y here, you can choose with the next two options whether
+          to send Econet/AUN traffic over a UDP Ethernet connection or over
+          a native Econet network card.
+          To compile this driver as a module, choose M here: the module
+          will be called econet.
+config ECONET_AUNUDP
+        bool "AUN over UDP"
+        depends on ECONET
+        help
+          Say Y here if you want to send Econet/AUN traffic over a UDP
+          connection (UDP is a packet based protocol that runs on top of the
+          Internet protocol IP) using an ordinary Ethernet network card.
+config ECONET_NATIVE
+        bool "Native Econet"
+        depends on ECONET
+        help
+          Say Y here if you have a native Econet network card installed in
+          your computer.
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index de691e119e17..4a62093eb343 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -159,7 +159,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
        err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
        if (err)
                goto out_free;
-        sk->sk_stamp = skb->stamp;
+        skb_get_timestamp(skb, &sk->sk_stamp);
        if (msg->msg_name)
                memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
@@ -869,7 +869,7 @@ static void aun_tx_ack(unsigned long seq, int result)
 foundit:
        tx_result(skb->sk, eb->cookie, result);
-        skb_unlink(skb);
+        skb_unlink(skb, &aun_queue);
        spin_unlock_irqrestore(&aun_queue_lock, flags);
        kfree_skb(skb);
 }
@@ -947,7 +947,7 @@ static void ab_cleanup(unsigned long h)
                {
                        tx_result(skb->sk, eb->cookie, 
                                  ECTYPE_TRANSMIT_NOT_PRESENT);
-                        skb_unlink(skb);
+                        skb_unlink(skb, &aun_queue);
                        kfree_skb(skb);
                }
                skb = newskb;
@@ -1009,7 +1009,7 @@ release:
 *      Receive an Econet frame from a device.
 */
-static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct ec_framehdr *hdr;
        struct sock *sk;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index ab60ea63688e..87a052a9a84f 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -62,8 +62,6 @@
 #include <asm/system.h>
 #include <asm/checksum.h>
-extern int __init netdev_boot_setup(char *str);
 __setup("ether=", netdev_boot_setup);
 /*
@@ -155,7 +153,7 @@ int eth_rebuild_header(struct sk_buff *skb)
 *      This is normal practice and works for any 'now in use' protocol.
 */
 
-unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)
+__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
        struct ethhdr *eth;
        unsigned char *rawp;
@@ -163,7 +161,6 @@ unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)
        skb->mac.raw=skb->data;
        skb_pull(skb,ETH_HLEN);
        eth = eth_hdr(skb);
-        skb->input_dev = dev;
        
        if(*eth->h_dest&1)
        {
diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c
index b81a6d532342..66b39fc342d2 100644
--- a/net/ethernet/sysctl_net_ether.c
+++ b/net/ethernet/sysctl_net_ether.c
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <linux/sysctl.h>
+#include <linux/if_ether.h>
 ctl_table ether_table[] = {
        {0}
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
new file mode 100644
index 000000000000..58ed4319e693
--- /dev/null
+++ b/net/ieee80211/Kconfig
@@ -0,0 +1,69 @@
+config IEEE80211
+        tristate "Generic IEEE 802.11 Networking Stack"
+        select NET_RADIO
+        ---help---
+        This option enables the hardware independent IEEE 802.11
+        networking stack.
+config IEEE80211_DEBUG
+        bool "Enable full debugging output"
+        depends on IEEE80211
+        ---help---
+          This option will enable debug tracing output for the
+          ieee80211 network stack.
+          This will result in the kernel module being ~70k larger.  You
+          can control which debug output is sent to the kernel log by
+          setting the value in
+          /proc/net/ieee80211/debug_level
+          For example:
+          % echo 0x00000FFO > /proc/net/ieee80211/debug_level
+          For a list of values you can assign to debug_level, you
+          can look at the bit mask values in <net/ieee80211.h>
+          If you are not trying to debug or develop the ieee80211
+          subsystem, you most likely want to say N here.
+config IEEE80211_CRYPT_WEP
+        tristate "IEEE 802.11 WEP encryption (802.1x)"
+        depends on IEEE80211
+        select CRYPTO
+        select CRYPTO_ARC4
+        select CRC32
+        ---help---
+        Include software based cipher suites in support of IEEE
+        802.11's WEP.  This is needed for WEP as well as 802.1x.
+        This can be compiled as a modules and it will be called
+        "ieee80211_crypt_wep".
+config IEEE80211_CRYPT_CCMP
+        tristate "IEEE 802.11i CCMP support"
+        depends on IEEE80211
+        select CRYPTO
+        select CRYPTO_AES
+        ---help---
+        Include software based cipher suites in support of IEEE 802.11i
+        (aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with CCMP enabled
+        networks.
+        This can be compiled as a modules and it will be called
+        "ieee80211_crypt_ccmp".
+config IEEE80211_CRYPT_TKIP
+        tristate "IEEE 802.11i TKIP encryption"
+        depends on IEEE80211
+        select CRYPTO
+        select CRYPTO_MICHAEL_MIC
+        ---help---
+        Include software based cipher suites in support of IEEE 802.11i
+        (aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with TKIP enabled
+        networks.
+        This can be compiled as a modules and it will be called
+        "ieee80211_crypt_tkip".
diff --git a/net/ieee80211/Makefile b/net/ieee80211/Makefile
new file mode 100644
index 000000000000..a6ccac5baea8
--- /dev/null
+++ b/net/ieee80211/Makefile
@@ -0,0 +1,11 @@
+obj-$(CONFIG_IEEE80211) += ieee80211.o
+obj-$(CONFIG_IEEE80211) += ieee80211_crypt.o
+obj-$(CONFIG_IEEE80211_CRYPT_WEP) += ieee80211_crypt_wep.o
+obj-$(CONFIG_IEEE80211_CRYPT_CCMP) += ieee80211_crypt_ccmp.o
+obj-$(CONFIG_IEEE80211_CRYPT_TKIP) += ieee80211_crypt_tkip.o
+ieee80211-objs := \
+        ieee80211_module.o \
+        ieee80211_tx.o \
+        ieee80211_rx.o \
+        ieee80211_wx.o
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c
new file mode 100644
index 000000000000..05a6f2f298db
--- /dev/null
+++ b/net/ieee80211/ieee80211_crypt.c
@@ -0,0 +1,259 @@
+/*
+ * Host AP crypto routines
+ *
+ * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ *
+ */
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/string.h>
+#include <asm/errno.h>
+#include <net/ieee80211.h>
+MODULE_AUTHOR("Jouni Malinen");
+MODULE_DESCRIPTION("HostAP crypto");
+MODULE_LICENSE("GPL");
+struct ieee80211_crypto_alg {
+        struct list_head list;
+        struct ieee80211_crypto_ops *ops;
+};
+struct ieee80211_crypto {
+        struct list_head algs;
+        spinlock_t lock;
+};
+static struct ieee80211_crypto *hcrypt;
+void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee,
+                                           int force)
+{
+        struct list_head *ptr, *n;
+        struct ieee80211_crypt_data *entry;
+        for (ptr = ieee->crypt_deinit_list.next, n = ptr->next;
+             ptr != &ieee->crypt_deinit_list; ptr = n, n = ptr->next) {
+                entry = list_entry(ptr, struct ieee80211_crypt_data, list);
+                if (atomic_read(&entry->refcnt) != 0 && !force)
+                        continue;
+                list_del(ptr);
+                if (entry->ops) {
+                        entry->ops->deinit(entry->priv);
+                        module_put(entry->ops->owner);
+                }
+                kfree(entry);
+        }
+}
+void ieee80211_crypt_deinit_handler(unsigned long data)
+{
+        struct ieee80211_device *ieee = (struct ieee80211_device *)data;
+        unsigned long flags;
+        spin_lock_irqsave(&ieee->lock, flags);
+        ieee80211_crypt_deinit_entries(ieee, 0);
+        if (!list_empty(&ieee->crypt_deinit_list)) {
+                printk(KERN_DEBUG "%s: entries remaining in delayed crypt "
+                       "deletion list\n", ieee->dev->name);
+                ieee->crypt_deinit_timer.expires = jiffies + HZ;
+                add_timer(&ieee->crypt_deinit_timer);
+        }
+        spin_unlock_irqrestore(&ieee->lock, flags);
+}
+void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee,
+                                    struct ieee80211_crypt_data **crypt)
+{
+        struct ieee80211_crypt_data *tmp;
+        unsigned long flags;
+        if (*crypt == NULL)
+                return;
+        tmp = *crypt;
+        *crypt = NULL;
+        /* must not run ops->deinit() while there may be pending encrypt or
+         * decrypt operations. Use a list of delayed deinits to avoid needing
+         * locking. */
+        spin_lock_irqsave(&ieee->lock, flags);
+        list_add(&tmp->list, &ieee->crypt_deinit_list);
+        if (!timer_pending(&ieee->crypt_deinit_timer)) {
+                ieee->crypt_deinit_timer.expires = jiffies + HZ;
+                add_timer(&ieee->crypt_deinit_timer);
+        }
+        spin_unlock_irqrestore(&ieee->lock, flags);
+}
+int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops)
+{
+        unsigned long flags;
+        struct ieee80211_crypto_alg *alg;
+        if (hcrypt == NULL)
+                return -1;
+        alg = kmalloc(sizeof(*alg), GFP_KERNEL);
+        if (alg == NULL)
+                return -ENOMEM;
+        memset(alg, 0, sizeof(*alg));
+        alg->ops = ops;
+        spin_lock_irqsave(&hcrypt->lock, flags);
+        list_add(&alg->list, &hcrypt->algs);
+        spin_unlock_irqrestore(&hcrypt->lock, flags);
+        printk(KERN_DEBUG "ieee80211_crypt: registered algorithm '%s'\n",
+               ops->name);
+        return 0;
+}
+int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops)
+{
+        unsigned long flags;
+        struct list_head *ptr;
+        struct ieee80211_crypto_alg *del_alg = NULL;
+        if (hcrypt == NULL)
+                return -1;
+        spin_lock_irqsave(&hcrypt->lock, flags);
+        for (ptr = hcrypt->algs.next; ptr != &hcrypt->algs; ptr = ptr->next) {
+                struct ieee80211_crypto_alg *alg =
+                        (struct ieee80211_crypto_alg *) ptr;
+                if (alg->ops == ops) {
+                        list_del(&alg->list);
+                        del_alg = alg;
+                        break;
+                }
+        }
+        spin_unlock_irqrestore(&hcrypt->lock, flags);
+        if (del_alg) {
+                printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm "
+                       "'%s'\n", ops->name);
+                kfree(del_alg);
+        }
+        return del_alg ? 0 : -1;
+}
+struct ieee80211_crypto_ops * ieee80211_get_crypto_ops(const char *name)
+{
+        unsigned long flags;
+        struct list_head *ptr;
+        struct ieee80211_crypto_alg *found_alg = NULL;
+        if (hcrypt == NULL)
+                return NULL;
+        spin_lock_irqsave(&hcrypt->lock, flags);
+        for (ptr = hcrypt->algs.next; ptr != &hcrypt->algs; ptr = ptr->next) {
+                struct ieee80211_crypto_alg *alg =
+                        (struct ieee80211_crypto_alg *) ptr;
+                if (strcmp(alg->ops->name, name) == 0) {
+                        found_alg = alg;
+                        break;
+                }
+        }
+        spin_unlock_irqrestore(&hcrypt->lock, flags);
+        if (found_alg)
+                return found_alg->ops;
+        else
+                return NULL;
+}
+static void * ieee80211_crypt_null_init(int keyidx) { return (void *) 1; }
+static void ieee80211_crypt_null_deinit(void *priv) {}
+static struct ieee80211_crypto_ops ieee80211_crypt_null = {
+        .name                   = "NULL",
+        .init                   = ieee80211_crypt_null_init,
+        .deinit                 = ieee80211_crypt_null_deinit,
+        .encrypt_mpdu           = NULL,
+        .decrypt_mpdu           = NULL,
+        .encrypt_msdu           = NULL,
+        .decrypt_msdu           = NULL,
+        .set_key                = NULL,
+        .get_key                = NULL,
+        .extra_prefix_len       = 0,
+        .extra_postfix_len      = 0,
+        .owner                  = THIS_MODULE,
+};
+static int __init ieee80211_crypto_init(void)
+{
+        int ret = -ENOMEM;
+        hcrypt = kmalloc(sizeof(*hcrypt), GFP_KERNEL);
+        if (!hcrypt)
+                goto out;
+        memset(hcrypt, 0, sizeof(*hcrypt));
+        INIT_LIST_HEAD(&hcrypt->algs);
+        spin_lock_init(&hcrypt->lock);
+        ret = ieee80211_register_crypto_ops(&ieee80211_crypt_null);
+        if (ret < 0) {
+                kfree(hcrypt);
+                hcrypt = NULL;
+        }
+out:
+        return ret;
+}
+static void __exit ieee80211_crypto_deinit(void)
+{
+        struct list_head *ptr, *n;
+        if (hcrypt == NULL)
+                return;
+        for (ptr = hcrypt->algs.next, n = ptr->next; ptr != &hcrypt->algs;
+             ptr = n, n = ptr->next) {
+                struct ieee80211_crypto_alg *alg =
+                        (struct ieee80211_crypto_alg *) ptr;
+                list_del(ptr);
+                printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm "
+                       "'%s' (deinit)\n", alg->ops->name);
+                kfree(alg);
+        }
+        kfree(hcrypt);
+}
+EXPORT_SYMBOL(ieee80211_crypt_deinit_entries);
+EXPORT_SYMBOL(ieee80211_crypt_deinit_handler);
+EXPORT_SYMBOL(ieee80211_crypt_delayed_deinit);
+EXPORT_SYMBOL(ieee80211_register_crypto_ops);
+EXPORT_SYMBOL(ieee80211_unregister_crypto_ops);
+EXPORT_SYMBOL(ieee80211_get_crypto_ops);
+module_init(ieee80211_crypto_init);
+module_exit(ieee80211_crypto_deinit);
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
new file mode 100644
index 000000000000..11d15573b26a
--- /dev/null
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -0,0 +1,470 @@
+/*
+ * Host AP crypt: host-based CCMP encryption implementation for Host AP driver
+ *
+ * Copyright (c) 2003-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ */
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <asm/string.h>
+#include <linux/wireless.h>
+#include <net/ieee80211.h>
+#include <linux/crypto.h>
+#include <asm/scatterlist.h>
+MODULE_AUTHOR("Jouni Malinen");
+MODULE_DESCRIPTION("Host AP crypt: CCMP");
+MODULE_LICENSE("GPL");
+#define AES_BLOCK_LEN 16
+#define CCMP_HDR_LEN 8
+#define CCMP_MIC_LEN 8
+#define CCMP_TK_LEN 16
+#define CCMP_PN_LEN 6
+struct ieee80211_ccmp_data {
+        u8 key[CCMP_TK_LEN];
+        int key_set;
+        u8 tx_pn[CCMP_PN_LEN];
+        u8 rx_pn[CCMP_PN_LEN];
+        u32 dot11RSNAStatsCCMPFormatErrors;
+        u32 dot11RSNAStatsCCMPReplays;
+        u32 dot11RSNAStatsCCMPDecryptErrors;
+        int key_idx;
+        struct crypto_tfm *tfm;
+        /* scratch buffers for virt_to_page() (crypto API) */
+        u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN],
+                tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN];
+        u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN];
+};
+static void ieee80211_ccmp_aes_encrypt(struct crypto_tfm *tfm,
+                                       const u8 pt[16], u8 ct[16])
+{
+        struct scatterlist src, dst;
+        src.page = virt_to_page(pt);
+        src.offset = offset_in_page(pt);
+        src.length = AES_BLOCK_LEN;
+        dst.page = virt_to_page(ct);
+        dst.offset = offset_in_page(ct);
+        dst.length = AES_BLOCK_LEN;
+        crypto_cipher_encrypt(tfm, &dst, &src, AES_BLOCK_LEN);
+}
+static void * ieee80211_ccmp_init(int key_idx)
+{
+        struct ieee80211_ccmp_data *priv;
+        priv = kmalloc(sizeof(*priv), GFP_ATOMIC);
+        if (priv == NULL)
+                goto fail;
+        memset(priv, 0, sizeof(*priv));
+        priv->key_idx = key_idx;
+        priv->tfm = crypto_alloc_tfm("aes", 0);
+        if (priv->tfm == NULL) {
+                printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate "
+                       "crypto API aes\n");
+                goto fail;
+        }
+        return priv;
+fail:
+        if (priv) {
+                if (priv->tfm)
+                        crypto_free_tfm(priv->tfm);
+                kfree(priv);
+        }
+        return NULL;
+}
+static void ieee80211_ccmp_deinit(void *priv)
+{
+        struct ieee80211_ccmp_data *_priv = priv;
+        if (_priv && _priv->tfm)
+                crypto_free_tfm(_priv->tfm);
+        kfree(priv);
+}
+static inline void xor_block(u8 *b, u8 *a, size_t len)
+{
+        int i;
+        for (i = 0; i < len; i++)
+                b[i] ^= a[i];
+}
+static void ccmp_init_blocks(struct crypto_tfm *tfm,
+                             struct ieee80211_hdr *hdr,
+                             u8 *pn, size_t dlen, u8 *b0, u8 *auth,
+                             u8 *s0)
+{
+        u8 *pos, qc = 0;
+        size_t aad_len;
+        u16 fc;
+        int a4_included, qc_included;
+        u8 aad[2 * AES_BLOCK_LEN];
+        fc = le16_to_cpu(hdr->frame_ctl);
+        a4_included = ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
+                       (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS));
+        qc_included = ((WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA) &&
+                       (WLAN_FC_GET_STYPE(fc) & 0x08));
+        aad_len = 22;
+        if (a4_included)
+                aad_len += 6;
+        if (qc_included) {
+                pos = (u8 *) &hdr->addr4;
+                if (a4_included)
+                        pos += 6;
+                qc = *pos & 0x0f;
+                aad_len += 2;
+        }
+        /* CCM Initial Block:
+         * Flag (Include authentication header, M=3 (8-octet MIC),
+         *       L=1 (2-octet Dlen))
+         * Nonce: 0x00 | A2 | PN
+         * Dlen */
+        b0[0] = 0x59;
+        b0[1] = qc;
+        memcpy(b0 + 2, hdr->addr2, ETH_ALEN);
+        memcpy(b0 + 8, pn, CCMP_PN_LEN);
+        b0[14] = (dlen >> 8) & 0xff;
+        b0[15] = dlen & 0xff;
+        /* AAD:
+         * FC with bits 4..6 and 11..13 masked to zero; 14 is always one
+         * A1 | A2 | A3
+         * SC with bits 4..15 (seq#) masked to zero
+         * A4 (if present)
+         * QC (if present)
+         */
+        pos = (u8 *) hdr;
+        aad[0] = 0; /* aad_len >> 8 */
+        aad[1] = aad_len & 0xff;
+        aad[2] = pos[0] & 0x8f;
+        aad[3] = pos[1] & 0xc7;
+        memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN);
+        pos = (u8 *) &hdr->seq_ctl;
+        aad[22] = pos[0] & 0x0f;
+        aad[23] = 0; /* all bits masked */
+        memset(aad + 24, 0, 8);
+        if (a4_included)
+                memcpy(aad + 24, hdr->addr4, ETH_ALEN);
+        if (qc_included) {
+                aad[a4_included ? 30 : 24] = qc;
+                /* rest of QC masked */
+        }
+        /* Start with the first block and AAD */
+        ieee80211_ccmp_aes_encrypt(tfm, b0, auth);
+        xor_block(auth, aad, AES_BLOCK_LEN);
+        ieee80211_ccmp_aes_encrypt(tfm, auth, auth);
+        xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN);
+        ieee80211_ccmp_aes_encrypt(tfm, auth, auth);
+        b0[0] &= 0x07;
+        b0[14] = b0[15] = 0;
+        ieee80211_ccmp_aes_encrypt(tfm, b0, s0);
+}
+static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+        struct ieee80211_ccmp_data *key = priv;
+        int data_len, i, blocks, last, len;
+        u8 *pos, *mic;
+        struct ieee80211_hdr *hdr;
+        u8 *b0 = key->tx_b0;
+        u8 *b = key->tx_b;
+        u8 *e = key->tx_e;
+        u8 *s0 = key->tx_s0;
+        if (skb_headroom(skb) < CCMP_HDR_LEN ||
+            skb_tailroom(skb) < CCMP_MIC_LEN ||
+            skb->len < hdr_len)
+                return -1;
+        data_len = skb->len - hdr_len;
+        pos = skb_push(skb, CCMP_HDR_LEN);
+        memmove(pos, pos + CCMP_HDR_LEN, hdr_len);
+        pos += hdr_len;
+        mic = skb_put(skb, CCMP_MIC_LEN);
+        i = CCMP_PN_LEN - 1;
+        while (i >= 0) {
+                key->tx_pn[i]++;
+                if (key->tx_pn[i] != 0)
+                        break;
+                i--;
+        }
+        *pos++ = key->tx_pn[5];
+        *pos++ = key->tx_pn[4];
+        *pos++ = 0;
+        *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */;
+        *pos++ = key->tx_pn[3];
+        *pos++ = key->tx_pn[2];
+        *pos++ = key->tx_pn[1];
+        *pos++ = key->tx_pn[0];
+        hdr = (struct ieee80211_hdr *) skb->data;
+        ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0);
+        blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN;
+        last = data_len % AES_BLOCK_LEN;
+        for (i = 1; i <= blocks; i++) {
+                len = (i == blocks && last) ? last : AES_BLOCK_LEN;
+                /* Authentication */
+                xor_block(b, pos, len);
+                ieee80211_ccmp_aes_encrypt(key->tfm, b, b);
+                /* Encryption, with counter */
+                b0[14] = (i >> 8) & 0xff;
+                b0[15] = i & 0xff;
+                ieee80211_ccmp_aes_encrypt(key->tfm, b0, e);
+                xor_block(pos, e, len);
+                pos += len;
+        }
+        for (i = 0; i < CCMP_MIC_LEN; i++)
+                mic[i] = b[i] ^ s0[i];
+        return 0;
+}
+static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+        struct ieee80211_ccmp_data *key = priv;
+        u8 keyidx, *pos;
+        struct ieee80211_hdr *hdr;
+        u8 *b0 = key->rx_b0;
+        u8 *b = key->rx_b;
+        u8 *a = key->rx_a;
+        u8 pn[6];
+        int i, blocks, last, len;
+        size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN;
+        u8 *mic = skb->data + skb->len - CCMP_MIC_LEN;
+        if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) {
+                key->dot11RSNAStatsCCMPFormatErrors++;
+                return -1;
+        }
+        hdr = (struct ieee80211_hdr *) skb->data;
+        pos = skb->data + hdr_len;
+        keyidx = pos[3];
+        if (!(keyidx & (1 << 5))) {
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "CCMP: received packet without ExtIV"
+                               " flag from " MAC_FMT "\n", MAC_ARG(hdr->addr2));
+                }
+                key->dot11RSNAStatsCCMPFormatErrors++;
+                return -2;
+        }
+        keyidx >>= 6;
+        if (key->key_idx != keyidx) {
+                printk(KERN_DEBUG "CCMP: RX tkey->key_idx=%d frame "
+                       "keyidx=%d priv=%p\n", key->key_idx, keyidx, priv);
+                return -6;
+        }
+        if (!key->key_set) {
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "CCMP: received packet from " MAC_FMT
+                               " with keyid=%d that does not have a configured"
+                               " key\n", MAC_ARG(hdr->addr2), keyidx);
+                }
+                return -3;
+        }
+        pn[0] = pos[7];
+        pn[1] = pos[6];
+        pn[2] = pos[5];
+        pn[3] = pos[4];
+        pn[4] = pos[1];
+        pn[5] = pos[0];
+        pos += 8;
+        if (memcmp(pn, key->rx_pn, CCMP_PN_LEN) <= 0) {
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "CCMP: replay detected: STA=" MAC_FMT
+                               " previous PN %02x%02x%02x%02x%02x%02x "
+                               "received PN %02x%02x%02x%02x%02x%02x\n",
+                               MAC_ARG(hdr->addr2), MAC_ARG(key->rx_pn),
+                               MAC_ARG(pn));
+                }
+                key->dot11RSNAStatsCCMPReplays++;
+                return -4;
+        }
+        ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b);
+        xor_block(mic, b, CCMP_MIC_LEN);
+        blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN;
+        last = data_len % AES_BLOCK_LEN;
+        for (i = 1; i <= blocks; i++) {
+                len = (i == blocks && last) ? last : AES_BLOCK_LEN;
+                /* Decrypt, with counter */
+                b0[14] = (i >> 8) & 0xff;
+                b0[15] = i & 0xff;
+                ieee80211_ccmp_aes_encrypt(key->tfm, b0, b);
+                xor_block(pos, b, len);
+                /* Authentication */
+                xor_block(a, pos, len);
+                ieee80211_ccmp_aes_encrypt(key->tfm, a, a);
+                pos += len;
+        }
+        if (memcmp(mic, a, CCMP_MIC_LEN) != 0) {
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "CCMP: decrypt failed: STA="
+                               MAC_FMT "\n", MAC_ARG(hdr->addr2));
+                }
+                key->dot11RSNAStatsCCMPDecryptErrors++;
+                return -5;
+        }
+        memcpy(key->rx_pn, pn, CCMP_PN_LEN);
+        /* Remove hdr and MIC */
+        memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len);
+        skb_pull(skb, CCMP_HDR_LEN);
+        skb_trim(skb, skb->len - CCMP_MIC_LEN);
+        return keyidx;
+}
+static int ieee80211_ccmp_set_key(void *key, int len, u8 *seq, void *priv)
+{
+        struct ieee80211_ccmp_data *data = priv;
+        int keyidx;
+        struct crypto_tfm *tfm = data->tfm;
+        keyidx = data->key_idx;
+        memset(data, 0, sizeof(*data));
+        data->key_idx = keyidx;
+        data->tfm = tfm;
+        if (len == CCMP_TK_LEN) {
+                memcpy(data->key, key, CCMP_TK_LEN);
+                data->key_set = 1;
+                if (seq) {
+                        data->rx_pn[0] = seq[5];
+                        data->rx_pn[1] = seq[4];
+                        data->rx_pn[2] = seq[3];
+                        data->rx_pn[3] = seq[2];
+                        data->rx_pn[4] = seq[1];
+                        data->rx_pn[5] = seq[0];
+                }
+                crypto_cipher_setkey(data->tfm, data->key, CCMP_TK_LEN);
+        } else if (len == 0)
+                data->key_set = 0;
+        else
+                return -1;
+        return 0;
+}
+static int ieee80211_ccmp_get_key(void *key, int len, u8 *seq, void *priv)
+{
+        struct ieee80211_ccmp_data *data = priv;
+        if (len < CCMP_TK_LEN)
+                return -1;
+        if (!data->key_set)
+                return 0;
+        memcpy(key, data->key, CCMP_TK_LEN);
+        if (seq) {
+                seq[0] = data->tx_pn[5];
+                seq[1] = data->tx_pn[4];
+                seq[2] = data->tx_pn[3];
+                seq[3] = data->tx_pn[2];
+                seq[4] = data->tx_pn[1];
+                seq[5] = data->tx_pn[0];
+        }
+        return CCMP_TK_LEN;
+}
+static char * ieee80211_ccmp_print_stats(char *p, void *priv)
+{
+        struct ieee80211_ccmp_data *ccmp = priv;
+        p += sprintf(p, "key[%d] alg=CCMP key_set=%d "
+                     "tx_pn=%02x%02x%02x%02x%02x%02x "
+                     "rx_pn=%02x%02x%02x%02x%02x%02x "
+                     "format_errors=%d replays=%d decrypt_errors=%d\n",
+                     ccmp->key_idx, ccmp->key_set,
+                     MAC_ARG(ccmp->tx_pn), MAC_ARG(ccmp->rx_pn),
+                     ccmp->dot11RSNAStatsCCMPFormatErrors,
+                     ccmp->dot11RSNAStatsCCMPReplays,
+                     ccmp->dot11RSNAStatsCCMPDecryptErrors);
+        return p;
+}
+static struct ieee80211_crypto_ops ieee80211_crypt_ccmp = {
+        .name                   = "CCMP",
+        .init                   = ieee80211_ccmp_init,
+        .deinit                 = ieee80211_ccmp_deinit,
+        .encrypt_mpdu           = ieee80211_ccmp_encrypt,
+        .decrypt_mpdu           = ieee80211_ccmp_decrypt,
+        .encrypt_msdu           = NULL,
+        .decrypt_msdu           = NULL,
+        .set_key                = ieee80211_ccmp_set_key,
+        .get_key                = ieee80211_ccmp_get_key,
+        .print_stats            = ieee80211_ccmp_print_stats,
+        .extra_prefix_len       = CCMP_HDR_LEN,
+        .extra_postfix_len      = CCMP_MIC_LEN,
+        .owner                  = THIS_MODULE,
+};
+static int __init ieee80211_crypto_ccmp_init(void)
+{
+        return ieee80211_register_crypto_ops(&ieee80211_crypt_ccmp);
+}
+static void __exit ieee80211_crypto_ccmp_exit(void)
+{
+        ieee80211_unregister_crypto_ops(&ieee80211_crypt_ccmp);
+}
+module_init(ieee80211_crypto_ccmp_init);
+module_exit(ieee80211_crypto_ccmp_exit);
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
new file mode 100644
index 000000000000..f91d92c6df25
--- /dev/null
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -0,0 +1,708 @@
+/*
+ * Host AP crypt: host-based TKIP encryption implementation for Host AP driver
+ *
+ * Copyright (c) 2003-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ */
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <asm/string.h>
+#include <net/ieee80211.h>
+#include <linux/crypto.h>
+#include <asm/scatterlist.h>
+#include <linux/crc32.h>
+MODULE_AUTHOR("Jouni Malinen");
+MODULE_DESCRIPTION("Host AP crypt: TKIP");
+MODULE_LICENSE("GPL");
+struct ieee80211_tkip_data {
+#define TKIP_KEY_LEN 32
+        u8 key[TKIP_KEY_LEN];
+        int key_set;
+        u32 tx_iv32;
+        u16 tx_iv16;
+        u16 tx_ttak[5];
+        int tx_phase1_done;
+        u32 rx_iv32;
+        u16 rx_iv16;
+        u16 rx_ttak[5];
+        int rx_phase1_done;
+        u32 rx_iv32_new;
+        u16 rx_iv16_new;
+        u32 dot11RSNAStatsTKIPReplays;
+        u32 dot11RSNAStatsTKIPICVErrors;
+        u32 dot11RSNAStatsTKIPLocalMICFailures;
+        int key_idx;
+        struct crypto_tfm *tfm_arc4;
+        struct crypto_tfm *tfm_michael;
+        /* scratch buffers for virt_to_page() (crypto API) */
+        u8 rx_hdr[16], tx_hdr[16];
+};
+static void * ieee80211_tkip_init(int key_idx)
+{
+        struct ieee80211_tkip_data *priv;
+        priv = kmalloc(sizeof(*priv), GFP_ATOMIC);
+        if (priv == NULL)
+                goto fail;
+        memset(priv, 0, sizeof(*priv));
+        priv->key_idx = key_idx;
+        priv->tfm_arc4 = crypto_alloc_tfm("arc4", 0);
+        if (priv->tfm_arc4 == NULL) {
+                printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
+                       "crypto API arc4\n");
+                goto fail;
+        }
+        priv->tfm_michael = crypto_alloc_tfm("michael_mic", 0);
+        if (priv->tfm_michael == NULL) {
+                printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
+                       "crypto API michael_mic\n");
+                goto fail;
+        }
+        return priv;
+fail:
+        if (priv) {
+                if (priv->tfm_michael)
+                        crypto_free_tfm(priv->tfm_michael);
+                if (priv->tfm_arc4)
+                        crypto_free_tfm(priv->tfm_arc4);
+                kfree(priv);
+        }
+        return NULL;
+}
+static void ieee80211_tkip_deinit(void *priv)
+{
+        struct ieee80211_tkip_data *_priv = priv;
+        if (_priv && _priv->tfm_michael)
+                crypto_free_tfm(_priv->tfm_michael);
+        if (_priv && _priv->tfm_arc4)
+                crypto_free_tfm(_priv->tfm_arc4);
+        kfree(priv);
+}
+static inline u16 RotR1(u16 val)
+{
+        return (val >> 1) | (val << 15);
+}
+static inline u8 Lo8(u16 val)
+{
+        return val & 0xff;
+}
+static inline u8 Hi8(u16 val)
+{
+        return val >> 8;
+}
+static inline u16 Lo16(u32 val)
+{
+        return val & 0xffff;
+}
+static inline u16 Hi16(u32 val)
+{
+        return val >> 16;
+}
+static inline u16 Mk16(u8 hi, u8 lo)
+{
+        return lo | (((u16) hi) << 8);
+}
+static inline u16 Mk16_le(u16 *v)
+{
+        return le16_to_cpu(*v);
+}
+static const u16 Sbox[256] =
+{
+        0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154,
+        0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A,
+        0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B,
+        0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B,
+        0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F,
+        0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F,
+        0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5,
+        0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F,
+        0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB,
+        0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397,
+        0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED,
+        0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A,
+        0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194,
+        0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3,
+        0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104,
+        0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D,
+        0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39,
+        0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695,
+        0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83,
+        0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76,
+        0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4,
+        0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B,
+        0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0,
+        0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018,
+        0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751,
+        0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85,
+        0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12,
+        0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9,
+        0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7,
+        0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A,
+        0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8,
+        0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A,
+};
+static inline u16 _S_(u16 v)
+{
+        u16 t = Sbox[Hi8(v)];
+        return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8));
+}
+#define PHASE1_LOOP_COUNT 8
+static void tkip_mixing_phase1(u16 *TTAK, const u8 *TK, const u8 *TA, u32 IV32)
+{
+        int i, j;
+        /* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */
+        TTAK[0] = Lo16(IV32);
+        TTAK[1] = Hi16(IV32);
+        TTAK[2] = Mk16(TA[1], TA[0]);
+        TTAK[3] = Mk16(TA[3], TA[2]);
+        TTAK[4] = Mk16(TA[5], TA[4]);
+        for (i = 0; i < PHASE1_LOOP_COUNT; i++) {
+                j = 2 * (i & 1);
+                TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j]));
+                TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j]));
+                TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j]));
+                TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j]));
+                TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i;
+        }
+}
+static void tkip_mixing_phase2(u8 *WEPSeed, const u8 *TK, const u16 *TTAK,
+                               u16 IV16)
+{
+        /* Make temporary area overlap WEP seed so that the final copy can be
+         * avoided on little endian hosts. */
+        u16 *PPK = (u16 *) &WEPSeed[4];
+        /* Step 1 - make copy of TTAK and bring in TSC */
+        PPK[0] = TTAK[0];
+        PPK[1] = TTAK[1];
+        PPK[2] = TTAK[2];
+        PPK[3] = TTAK[3];
+        PPK[4] = TTAK[4];
+        PPK[5] = TTAK[4] + IV16;
+        /* Step 2 - 96-bit bijective mixing using S-box */
+        PPK[0] += _S_(PPK[5] ^ Mk16_le((u16 *) &TK[0]));
+        PPK[1] += _S_(PPK[0] ^ Mk16_le((u16 *) &TK[2]));
+        PPK[2] += _S_(PPK[1] ^ Mk16_le((u16 *) &TK[4]));
+        PPK[3] += _S_(PPK[2] ^ Mk16_le((u16 *) &TK[6]));
+        PPK[4] += _S_(PPK[3] ^ Mk16_le((u16 *) &TK[8]));
+        PPK[5] += _S_(PPK[4] ^ Mk16_le((u16 *) &TK[10]));
+        PPK[0] += RotR1(PPK[5] ^ Mk16_le((u16 *) &TK[12]));
+        PPK[1] += RotR1(PPK[0] ^ Mk16_le((u16 *) &TK[14]));
+        PPK[2] += RotR1(PPK[1]);
+        PPK[3] += RotR1(PPK[2]);
+        PPK[4] += RotR1(PPK[3]);
+        PPK[5] += RotR1(PPK[4]);
+        /* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value
+         * WEPSeed[0..2] is transmitted as WEP IV */
+        WEPSeed[0] = Hi8(IV16);
+        WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F;
+        WEPSeed[2] = Lo8(IV16);
+        WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((u16 *) &TK[0])) >> 1);
+#ifdef __BIG_ENDIAN
+        {
+                int i;
+                for (i = 0; i < 6; i++)
+                        PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8);
+        }
+#endif
+}
+static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+        struct ieee80211_tkip_data *tkey = priv;
+        int len;
+        u8 rc4key[16], *pos, *icv;
+        struct ieee80211_hdr *hdr;
+        u32 crc;
+        struct scatterlist sg;
+        if (skb_headroom(skb) < 8 || skb_tailroom(skb) < 4 ||
+            skb->len < hdr_len)
+                return -1;
+        hdr = (struct ieee80211_hdr *) skb->data;
+        if (!tkey->tx_phase1_done) {
+                tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2,
+                                   tkey->tx_iv32);
+                tkey->tx_phase1_done = 1;
+        }
+        tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16);
+        len = skb->len - hdr_len;
+        pos = skb_push(skb, 8);
+        memmove(pos, pos + 8, hdr_len);
+        pos += hdr_len;
+        icv = skb_put(skb, 4);
+        *pos++ = rc4key[0];
+        *pos++ = rc4key[1];
+        *pos++ = rc4key[2];
+        *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */;
+        *pos++ = tkey->tx_iv32 & 0xff;
+        *pos++ = (tkey->tx_iv32 >> 8) & 0xff;
+        *pos++ = (tkey->tx_iv32 >> 16) & 0xff;
+        *pos++ = (tkey->tx_iv32 >> 24) & 0xff;
+        crc = ~crc32_le(~0, pos, len);
+        icv[0] = crc;
+        icv[1] = crc >> 8;
+        icv[2] = crc >> 16;
+        icv[3] = crc >> 24;
+        crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16);
+        sg.page = virt_to_page(pos);
+        sg.offset = offset_in_page(pos);
+        sg.length = len + 4;
+        crypto_cipher_encrypt(tkey->tfm_arc4, &sg, &sg, len + 4);
+        tkey->tx_iv16++;
+        if (tkey->tx_iv16 == 0) {
+                tkey->tx_phase1_done = 0;
+                tkey->tx_iv32++;
+        }
+        return 0;
+}
+static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+        struct ieee80211_tkip_data *tkey = priv;
+        u8 rc4key[16];
+        u8 keyidx, *pos;
+        u32 iv32;
+        u16 iv16;
+        struct ieee80211_hdr *hdr;
+        u8 icv[4];
+        u32 crc;
+        struct scatterlist sg;
+        int plen;
+        if (skb->len < hdr_len + 8 + 4)
+                return -1;
+        hdr = (struct ieee80211_hdr *) skb->data;
+        pos = skb->data + hdr_len;
+        keyidx = pos[3];
+        if (!(keyidx & (1 << 5))) {
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "TKIP: received packet without ExtIV"
+                               " flag from " MAC_FMT "\n", MAC_ARG(hdr->addr2));
+                }
+                return -2;
+        }
+        keyidx >>= 6;
+        if (tkey->key_idx != keyidx) {
+                printk(KERN_DEBUG "TKIP: RX tkey->key_idx=%d frame "
+                       "keyidx=%d priv=%p\n", tkey->key_idx, keyidx, priv);
+                return -6;
+        }
+        if (!tkey->key_set) {
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "TKIP: received packet from " MAC_FMT
+                               " with keyid=%d that does not have a configured"
+                               " key\n", MAC_ARG(hdr->addr2), keyidx);
+                }
+                return -3;
+        }
+        iv16 = (pos[0] << 8) | pos[2];
+        iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24);
+        pos += 8;
+        if (iv32 < tkey->rx_iv32 ||
+            (iv32 == tkey->rx_iv32 && iv16 <= tkey->rx_iv16)) {
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "TKIP: replay detected: STA=" MAC_FMT
+                               " previous TSC %08x%04x received TSC "
+                               "%08x%04x\n", MAC_ARG(hdr->addr2),
+                               tkey->rx_iv32, tkey->rx_iv16, iv32, iv16);
+                }
+                tkey->dot11RSNAStatsTKIPReplays++;
+                return -4;
+        }
+        if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) {
+                tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32);
+                tkey->rx_phase1_done = 1;
+        }
+        tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16);
+        plen = skb->len - hdr_len - 12;
+        crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16);
+        sg.page = virt_to_page(pos);
+        sg.offset = offset_in_page(pos);
+        sg.length = plen + 4;
+        crypto_cipher_decrypt(tkey->tfm_arc4, &sg, &sg, plen + 4);
+        crc = ~crc32_le(~0, pos, plen);
+        icv[0] = crc;
+        icv[1] = crc >> 8;
+        icv[2] = crc >> 16;
+        icv[3] = crc >> 24;
+        if (memcmp(icv, pos + plen, 4) != 0) {
+                if (iv32 != tkey->rx_iv32) {
+                        /* Previously cached Phase1 result was already lost, so
+                         * it needs to be recalculated for the next packet. */
+                        tkey->rx_phase1_done = 0;
+                }
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "TKIP: ICV error detected: STA="
+                               MAC_FMT "\n", MAC_ARG(hdr->addr2));
+                }
+                tkey->dot11RSNAStatsTKIPICVErrors++;
+                return -5;
+        }
+        /* Update real counters only after Michael MIC verification has
+         * completed */
+        tkey->rx_iv32_new = iv32;
+        tkey->rx_iv16_new = iv16;
+        /* Remove IV and ICV */
+        memmove(skb->data + 8, skb->data, hdr_len);
+        skb_pull(skb, 8);
+        skb_trim(skb, skb->len - 4);
+        return keyidx;
+}
+static int michael_mic(struct ieee80211_tkip_data *tkey, u8 *key, u8 *hdr,
+                       u8 *data, size_t data_len, u8 *mic)
+{
+        struct scatterlist sg[2];
+        if (tkey->tfm_michael == NULL) {
+                printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n");
+                return -1;
+        }
+        sg[0].page = virt_to_page(hdr);
+        sg[0].offset = offset_in_page(hdr);
+        sg[0].length = 16;
+        sg[1].page = virt_to_page(data);
+        sg[1].offset = offset_in_page(data);
+        sg[1].length = data_len;
+        crypto_digest_init(tkey->tfm_michael);
+        crypto_digest_setkey(tkey->tfm_michael, key, 8);
+        crypto_digest_update(tkey->tfm_michael, sg, 2);
+        crypto_digest_final(tkey->tfm_michael, mic);
+        return 0;
+}
+static void michael_mic_hdr(struct sk_buff *skb, u8 *hdr)
+{
+        struct ieee80211_hdr *hdr11;
+        hdr11 = (struct ieee80211_hdr *) skb->data;
+        switch (le16_to_cpu(hdr11->frame_ctl) &
+                (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) {
+        case IEEE80211_FCTL_TODS:
+                memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */
+                memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */
+                break;
+        case IEEE80211_FCTL_FROMDS:
+                memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */
+                memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */
+                break;
+        case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS:
+                memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */
+                memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */
+                break;
+        case 0:
+                memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */
+                memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */
+                break;
+        }
+        hdr[12] = 0; /* priority */
+        hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */
+}
+static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv)
+{
+        struct ieee80211_tkip_data *tkey = priv;
+        u8 *pos;
+        if (skb_tailroom(skb) < 8 || skb->len < hdr_len) {
+                printk(KERN_DEBUG "Invalid packet for Michael MIC add "
+                       "(tailroom=%d hdr_len=%d skb->len=%d)\n",
+                       skb_tailroom(skb), hdr_len, skb->len);
+                return -1;
+        }
+        michael_mic_hdr(skb, tkey->tx_hdr);
+        pos = skb_put(skb, 8);
+        if (michael_mic(tkey, &tkey->key[16], tkey->tx_hdr,
+                        skb->data + hdr_len, skb->len - 8 - hdr_len, pos))
+                return -1;
+        return 0;
+}
+#if WIRELESS_EXT >= 18
+static void ieee80211_michael_mic_failure(struct net_device *dev,
+                                       struct ieee80211_hdr *hdr,
+                                       int keyidx)
+{
+        union iwreq_data wrqu;
+        struct iw_michaelmicfailure ev;
+        /* TODO: needed parameters: count, keyid, key type, TSC */
+        memset(&ev, 0, sizeof(ev));
+        ev.flags = keyidx & IW_MICFAILURE_KEY_ID;
+        if (hdr->addr1[0] & 0x01)
+                ev.flags |= IW_MICFAILURE_GROUP;
+        else
+                ev.flags |= IW_MICFAILURE_PAIRWISE;
+        ev.src_addr.sa_family = ARPHRD_ETHER;
+        memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN);
+        memset(&wrqu, 0, sizeof(wrqu));
+        wrqu.data.length = sizeof(ev);
+        wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *) &ev);
+}
+#elif WIRELESS_EXT >= 15
+static void ieee80211_michael_mic_failure(struct net_device *dev,
+                                       struct ieee80211_hdr *hdr,
+                                       int keyidx)
+{
+        union iwreq_data wrqu;
+        char buf[128];
+        /* TODO: needed parameters: count, keyid, key type, TSC */
+        sprintf(buf, "MLME-MICHAELMICFAILURE.indication(keyid=%d %scast addr="
+                MAC_FMT ")", keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni",
+                MAC_ARG(hdr->addr2));
+        memset(&wrqu, 0, sizeof(wrqu));
+        wrqu.data.length = strlen(buf);
+        wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf);
+}
+#else /* WIRELESS_EXT >= 15 */
+static inline void ieee80211_michael_mic_failure(struct net_device *dev,
+                                              struct ieee80211_hdr *hdr,
+                                              int keyidx)
+{
+}
+#endif /* WIRELESS_EXT >= 15 */
+static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx,
+                                     int hdr_len, void *priv)
+{
+        struct ieee80211_tkip_data *tkey = priv;
+        u8 mic[8];
+        if (!tkey->key_set)
+                return -1;
+        michael_mic_hdr(skb, tkey->rx_hdr);
+        if (michael_mic(tkey, &tkey->key[24], tkey->rx_hdr,
+                        skb->data + hdr_len, skb->len - 8 - hdr_len, mic))
+                return -1;
+        if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) {
+                struct ieee80211_hdr *hdr;
+                hdr = (struct ieee80211_hdr *) skb->data;
+                printk(KERN_DEBUG "%s: Michael MIC verification failed for "
+                       "MSDU from " MAC_FMT " keyidx=%d\n",
+                       skb->dev ? skb->dev->name : "N/A", MAC_ARG(hdr->addr2),
+                       keyidx);
+                if (skb->dev)
+                        ieee80211_michael_mic_failure(skb->dev, hdr, keyidx);
+                tkey->dot11RSNAStatsTKIPLocalMICFailures++;
+                return -1;
+        }
+        /* Update TSC counters for RX now that the packet verification has
+         * completed. */
+        tkey->rx_iv32 = tkey->rx_iv32_new;
+        tkey->rx_iv16 = tkey->rx_iv16_new;
+        skb_trim(skb, skb->len - 8);
+        return 0;
+}
+static int ieee80211_tkip_set_key(void *key, int len, u8 *seq, void *priv)
+{
+        struct ieee80211_tkip_data *tkey = priv;
+        int keyidx;
+        struct crypto_tfm *tfm = tkey->tfm_michael;
+        struct crypto_tfm *tfm2 = tkey->tfm_arc4;
+        keyidx = tkey->key_idx;
+        memset(tkey, 0, sizeof(*tkey));
+        tkey->key_idx = keyidx;
+        tkey->tfm_michael = tfm;
+        tkey->tfm_arc4 = tfm2;
+        if (len == TKIP_KEY_LEN) {
+                memcpy(tkey->key, key, TKIP_KEY_LEN);
+                tkey->key_set = 1;
+                tkey->tx_iv16 = 1; /* TSC is initialized to 1 */
+                if (seq) {
+                        tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) |
+                                (seq[3] << 8) | seq[2];
+                        tkey->rx_iv16 = (seq[1] << 8) | seq[0];
+                }
+        } else if (len == 0)
+                tkey->key_set = 0;
+        else
+                return -1;
+        return 0;
+}
+static int ieee80211_tkip_get_key(void *key, int len, u8 *seq, void *priv)
+{
+        struct ieee80211_tkip_data *tkey = priv;
+        if (len < TKIP_KEY_LEN)
+                return -1;
+        if (!tkey->key_set)
+                return 0;
+        memcpy(key, tkey->key, TKIP_KEY_LEN);
+        if (seq) {
+                /* Return the sequence number of the last transmitted frame. */
+                u16 iv16 = tkey->tx_iv16;
+                u32 iv32 = tkey->tx_iv32;
+                if (iv16 == 0)
+                        iv32--;
+                iv16--;
+                seq[0] = tkey->tx_iv16;
+                seq[1] = tkey->tx_iv16 >> 8;
+                seq[2] = tkey->tx_iv32;
+                seq[3] = tkey->tx_iv32 >> 8;
+                seq[4] = tkey->tx_iv32 >> 16;
+                seq[5] = tkey->tx_iv32 >> 24;
+        }
+        return TKIP_KEY_LEN;
+}
+static char * ieee80211_tkip_print_stats(char *p, void *priv)
+{
+        struct ieee80211_tkip_data *tkip = priv;
+        p += sprintf(p, "key[%d] alg=TKIP key_set=%d "
+                     "tx_pn=%02x%02x%02x%02x%02x%02x "
+                     "rx_pn=%02x%02x%02x%02x%02x%02x "
+                     "replays=%d icv_errors=%d local_mic_failures=%d\n",
+                     tkip->key_idx, tkip->key_set,
+                     (tkip->tx_iv32 >> 24) & 0xff,
+                     (tkip->tx_iv32 >> 16) & 0xff,
+                     (tkip->tx_iv32 >> 8) & 0xff,
+                     tkip->tx_iv32 & 0xff,
+                     (tkip->tx_iv16 >> 8) & 0xff,
+                     tkip->tx_iv16 & 0xff,
+                     (tkip->rx_iv32 >> 24) & 0xff,
+                     (tkip->rx_iv32 >> 16) & 0xff,
+                     (tkip->rx_iv32 >> 8) & 0xff,
+                     tkip->rx_iv32 & 0xff,
+                     (tkip->rx_iv16 >> 8) & 0xff,
+                     tkip->rx_iv16 & 0xff,
+                     tkip->dot11RSNAStatsTKIPReplays,
+                     tkip->dot11RSNAStatsTKIPICVErrors,
+                     tkip->dot11RSNAStatsTKIPLocalMICFailures);
+        return p;
+}
+static struct ieee80211_crypto_ops ieee80211_crypt_tkip = {
+        .name                   = "TKIP",
+        .init                   = ieee80211_tkip_init,
+        .deinit                 = ieee80211_tkip_deinit,
+        .encrypt_mpdu           = ieee80211_tkip_encrypt,
+        .decrypt_mpdu           = ieee80211_tkip_decrypt,
+        .encrypt_msdu           = ieee80211_michael_mic_add,
+        .decrypt_msdu           = ieee80211_michael_mic_verify,
+        .set_key                = ieee80211_tkip_set_key,
+        .get_key                = ieee80211_tkip_get_key,
+        .print_stats            = ieee80211_tkip_print_stats,
+        .extra_prefix_len       = 4 + 4, /* IV + ExtIV */
+        .extra_postfix_len      = 8 + 4, /* MIC + ICV */
+        .owner                  = THIS_MODULE,
+};
+static int __init ieee80211_crypto_tkip_init(void)
+{
+        return ieee80211_register_crypto_ops(&ieee80211_crypt_tkip);
+}
+static void __exit ieee80211_crypto_tkip_exit(void)
+{
+        ieee80211_unregister_crypto_ops(&ieee80211_crypt_tkip);
+}
+module_init(ieee80211_crypto_tkip_init);
+module_exit(ieee80211_crypto_tkip_exit);
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
new file mode 100644
index 000000000000..bec1d3470d39
--- /dev/null
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -0,0 +1,272 @@
+/*
+ * Host AP crypt: host-based WEP encryption implementation for Host AP driver
+ *
+ * Copyright (c) 2002-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ */
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <asm/string.h>
+#include <net/ieee80211.h>
+#include <linux/crypto.h>
+#include <asm/scatterlist.h>
+#include <linux/crc32.h>
+MODULE_AUTHOR("Jouni Malinen");
+MODULE_DESCRIPTION("Host AP crypt: WEP");
+MODULE_LICENSE("GPL");
+struct prism2_wep_data {
+        u32 iv;
+#define WEP_KEY_LEN 13
+        u8 key[WEP_KEY_LEN + 1];
+        u8 key_len;
+        u8 key_idx;
+        struct crypto_tfm *tfm;
+};
+static void * prism2_wep_init(int keyidx)
+{
+        struct prism2_wep_data *priv;
+        priv = kmalloc(sizeof(*priv), GFP_ATOMIC);
+        if (priv == NULL)
+                goto fail;
+        memset(priv, 0, sizeof(*priv));
+        priv->key_idx = keyidx;
+        priv->tfm = crypto_alloc_tfm("arc4", 0);
+        if (priv->tfm == NULL) {
+                printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate "
+                       "crypto API arc4\n");
+                goto fail;
+        }
+        /* start WEP IV from a random value */
+        get_random_bytes(&priv->iv, 4);
+        return priv;
+fail:
+        if (priv) {
+                if (priv->tfm)
+                        crypto_free_tfm(priv->tfm);
+                kfree(priv);
+        }
+        return NULL;
+}
+static void prism2_wep_deinit(void *priv)
+{
+        struct prism2_wep_data *_priv = priv;
+        if (_priv && _priv->tfm)
+                crypto_free_tfm(_priv->tfm);
+        kfree(priv);
+}
+/* Perform WEP encryption on given skb that has at least 4 bytes of headroom
+ * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted,
+ * so the payload length increases with 8 bytes.
+ *
+ * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
+ */
+static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+        struct prism2_wep_data *wep = priv;
+        u32 crc, klen, len;
+        u8 key[WEP_KEY_LEN + 3];
+        u8 *pos, *icv;
+        struct scatterlist sg;
+        if (skb_headroom(skb) < 4 || skb_tailroom(skb) < 4 ||
+            skb->len < hdr_len)
+                return -1;
+        len = skb->len - hdr_len;
+        pos = skb_push(skb, 4);
+        memmove(pos, pos + 4, hdr_len);
+        pos += hdr_len;
+        klen = 3 + wep->key_len;
+        wep->iv++;
+        /* Fluhrer, Mantin, and Shamir have reported weaknesses in the key
+         * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N)
+         * can be used to speedup attacks, so avoid using them. */
+        if ((wep->iv & 0xff00) == 0xff00) {
+                u8 B = (wep->iv >> 16) & 0xff;
+                if (B >= 3 && B < klen)
+                        wep->iv += 0x0100;
+        }
+        /* Prepend 24-bit IV to RC4 key and TX frame */
+        *pos++ = key[0] = (wep->iv >> 16) & 0xff;
+        *pos++ = key[1] = (wep->iv >> 8) & 0xff;
+        *pos++ = key[2] = wep->iv & 0xff;
+        *pos++ = wep->key_idx << 6;
+        /* Copy rest of the WEP key (the secret part) */
+        memcpy(key + 3, wep->key, wep->key_len);
+        /* Append little-endian CRC32 and encrypt it to produce ICV */
+        crc = ~crc32_le(~0, pos, len);
+        icv = skb_put(skb, 4);
+        icv[0] = crc;
+        icv[1] = crc >> 8;
+        icv[2] = crc >> 16;
+        icv[3] = crc >> 24;
+        crypto_cipher_setkey(wep->tfm, key, klen);
+        sg.page = virt_to_page(pos);
+        sg.offset = offset_in_page(pos);
+        sg.length = len + 4;
+        crypto_cipher_encrypt(wep->tfm, &sg, &sg, len + 4);
+        return 0;
+}
+/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
+ * the frame: IV (4 bytes), encrypted payload (including SNAP header),
+ * ICV (4 bytes). len includes both IV and ICV.
+ *
+ * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on
+ * failure. If frame is OK, IV and ICV will be removed.
+ */
+static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
+{
+        struct prism2_wep_data *wep = priv;
+        u32 crc, klen, plen;
+        u8 key[WEP_KEY_LEN + 3];
+        u8 keyidx, *pos, icv[4];
+        struct scatterlist sg;
+        if (skb->len < hdr_len + 8)
+                return -1;
+        pos = skb->data + hdr_len;
+        key[0] = *pos++;
+        key[1] = *pos++;
+        key[2] = *pos++;
+        keyidx = *pos++ >> 6;
+        if (keyidx != wep->key_idx)
+                return -1;
+        klen = 3 + wep->key_len;
+        /* Copy rest of the WEP key (the secret part) */
+        memcpy(key + 3, wep->key, wep->key_len);
+        /* Apply RC4 to data and compute CRC32 over decrypted data */
+        plen = skb->len - hdr_len - 8;
+        crypto_cipher_setkey(wep->tfm, key, klen);
+        sg.page = virt_to_page(pos);
+        sg.offset = offset_in_page(pos);
+        sg.length = plen + 4;
+        crypto_cipher_decrypt(wep->tfm, &sg, &sg, plen + 4);
+        crc = ~crc32_le(~0, pos, plen);
+        icv[0] = crc;
+        icv[1] = crc >> 8;
+        icv[2] = crc >> 16;
+        icv[3] = crc >> 24;
+        if (memcmp(icv, pos + plen, 4) != 0) {
+                /* ICV mismatch - drop frame */
+                return -2;
+        }
+        /* Remove IV and ICV */
+        memmove(skb->data + 4, skb->data, hdr_len);
+        skb_pull(skb, 4);
+        skb_trim(skb, skb->len - 4);
+        return 0;
+}
+static int prism2_wep_set_key(void *key, int len, u8 *seq, void *priv)
+{
+        struct prism2_wep_data *wep = priv;
+        if (len < 0 || len > WEP_KEY_LEN)
+                return -1;
+        memcpy(wep->key, key, len);
+        wep->key_len = len;
+        return 0;
+}
+static int prism2_wep_get_key(void *key, int len, u8 *seq, void *priv)
+{
+        struct prism2_wep_data *wep = priv;
+        if (len < wep->key_len)
+                return -1;
+        memcpy(key, wep->key, wep->key_len);
+        return wep->key_len;
+}
+static char * prism2_wep_print_stats(char *p, void *priv)
+{
+        struct prism2_wep_data *wep = priv;
+        p += sprintf(p, "key[%d] alg=WEP len=%d\n",
+                     wep->key_idx, wep->key_len);
+        return p;
+}
+static struct ieee80211_crypto_ops ieee80211_crypt_wep = {
+        .name                   = "WEP",
+        .init                   = prism2_wep_init,
+        .deinit                 = prism2_wep_deinit,
+        .encrypt_mpdu           = prism2_wep_encrypt,
+        .decrypt_mpdu           = prism2_wep_decrypt,
+        .encrypt_msdu           = NULL,
+        .decrypt_msdu           = NULL,
+        .set_key                = prism2_wep_set_key,
+        .get_key                = prism2_wep_get_key,
+        .print_stats            = prism2_wep_print_stats,
+        .extra_prefix_len       = 4, /* IV */
+        .extra_postfix_len      = 4, /* ICV */
+        .owner                  = THIS_MODULE,
+};
+static int __init ieee80211_crypto_wep_init(void)
+{
+        return ieee80211_register_crypto_ops(&ieee80211_crypt_wep);
+}
+static void __exit ieee80211_crypto_wep_exit(void)
+{
+        ieee80211_unregister_crypto_ops(&ieee80211_crypt_wep);
+}
+module_init(ieee80211_crypto_wep_init);
+module_exit(ieee80211_crypto_wep_exit);
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
new file mode 100644
index 000000000000..553acb2e93d5
--- /dev/null
+++ b/net/ieee80211/ieee80211_module.c
@@ -0,0 +1,299 @@
+/*******************************************************************************
+  Copyright(c) 2004 Intel Corporation. All rights reserved.
+  Portions of this file are based on the WEP enablement code provided by the
+  Host AP project hostap-drivers v0.1.3
+  Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
+  <jkmaline@cc.hut.fi>
+  Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  Contact Information:
+  James P. Ketrenos <ipw2100-admin@linux.intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+*******************************************************************************/
+#include <linux/compiler.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/wireless.h>
+#include <linux/etherdevice.h>
+#include <asm/uaccess.h>
+#include <net/arp.h>
+#include <net/ieee80211.h>
+MODULE_DESCRIPTION("802.11 data/management/control stack");
+MODULE_AUTHOR("Copyright (C) 2004 Intel Corporation <jketreno@linux.intel.com>");
+MODULE_LICENSE("GPL");
+#define DRV_NAME "ieee80211"
+static inline int ieee80211_networks_allocate(struct ieee80211_device *ieee)
+{
+        if (ieee->networks)
+                return 0;
+        ieee->networks = kmalloc(
+                MAX_NETWORK_COUNT * sizeof(struct ieee80211_network),
+                GFP_KERNEL);
+        if (!ieee->networks) {
+                printk(KERN_WARNING "%s: Out of memory allocating beacons\n",
+                       ieee->dev->name);
+                return -ENOMEM;
+        }
+        memset(ieee->networks, 0,
+               MAX_NETWORK_COUNT * sizeof(struct ieee80211_network));
+        return 0;
+}
+static inline void ieee80211_networks_free(struct ieee80211_device *ieee)
+{
+        if (!ieee->networks)
+                return;
+        kfree(ieee->networks);
+        ieee->networks = NULL;
+}
+static inline void ieee80211_networks_initialize(struct ieee80211_device *ieee)
+{
+        int i;
+        INIT_LIST_HEAD(&ieee->network_free_list);
+        INIT_LIST_HEAD(&ieee->network_list);
+        for (i = 0; i < MAX_NETWORK_COUNT; i++)
+                list_add_tail(&ieee->networks[i].list, &ieee->network_free_list);
+}
+struct net_device *alloc_ieee80211(int sizeof_priv)
+{
+        struct ieee80211_device *ieee;
+        struct net_device *dev;
+        int err;
+        IEEE80211_DEBUG_INFO("Initializing...\n");
+        dev = alloc_etherdev(sizeof(struct ieee80211_device) + sizeof_priv);
+        if (!dev) {
+                IEEE80211_ERROR("Unable to network device.\n");
+                goto failed;
+        }
+        ieee = netdev_priv(dev);
+        dev->hard_start_xmit = ieee80211_xmit;
+        ieee->dev = dev;
+        err = ieee80211_networks_allocate(ieee);
+        if (err) {
+                IEEE80211_ERROR("Unable to allocate beacon storage: %d\n",
+                                err);
+                goto failed;
+        }
+        ieee80211_networks_initialize(ieee);
+        /* Default fragmentation threshold is maximum payload size */
+        ieee->fts = DEFAULT_FTS;
+        ieee->scan_age = DEFAULT_MAX_SCAN_AGE;
+        ieee->open_wep = 1;
+        /* Default to enabling full open WEP with host based encrypt/decrypt */
+        ieee->host_encrypt = 1;
+        ieee->host_decrypt = 1;
+        ieee->ieee802_1x = 1; /* Default to supporting 802.1x */
+        INIT_LIST_HEAD(&ieee->crypt_deinit_list);
+        init_timer(&ieee->crypt_deinit_timer);
+        ieee->crypt_deinit_timer.data = (unsigned long)ieee;
+        ieee->crypt_deinit_timer.function = ieee80211_crypt_deinit_handler;
+        spin_lock_init(&ieee->lock);
+        ieee->wpa_enabled = 0;
+        ieee->tkip_countermeasures = 0;
+        ieee->drop_unencrypted = 0;
+        ieee->privacy_invoked = 0;
+        ieee->ieee802_1x = 1;
+        return dev;
+ failed:
+        if (dev)
+                free_netdev(dev);
+        return NULL;
+}
+void free_ieee80211(struct net_device *dev)
+{
+        struct ieee80211_device *ieee = netdev_priv(dev);
+        int i;
+        del_timer_sync(&ieee->crypt_deinit_timer);
+        ieee80211_crypt_deinit_entries(ieee, 1);
+        for (i = 0; i < WEP_KEYS; i++) {
+                struct ieee80211_crypt_data *crypt = ieee->crypt[i];
+                if (crypt) {
+                        if (crypt->ops) {
+                                crypt->ops->deinit(crypt->priv);
+                                module_put(crypt->ops->owner);
+                        }
+                        kfree(crypt);
+                        ieee->crypt[i] = NULL;
+                }
+        }
+        ieee80211_networks_free(ieee);
+        free_netdev(dev);
+}
+#ifdef CONFIG_IEEE80211_DEBUG
+static int debug = 0;
+u32 ieee80211_debug_level = 0;
+struct proc_dir_entry *ieee80211_proc = NULL;
+static int show_debug_level(char *page, char **start, off_t offset,
+                            int count, int *eof, void *data)
+{
+        return snprintf(page, count, "0x%08X\n", ieee80211_debug_level);
+}
+static int store_debug_level(struct file *file, const char __user *buffer,
+                             unsigned long count, void *data)
+{
+        char buf[] = "0x00000000";
+        char *p = (char *)buf;
+        unsigned long val;
+        if (count > sizeof(buf) - 1)
+                count = sizeof(buf) - 1;
+        if (copy_from_user(buf, buffer, count))
+                return count;
+        buf[count] = 0;
+        /*
+         * what a FPOS...  What, sscanf(buf, "%i", &val) would be too
+         * scary?
+         */
+        if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+                p++;
+                if (p[0] == 'x' || p[0] == 'X')
+                        p++;
+                val = simple_strtoul(p, &p, 16);
+        } else
+                val = simple_strtoul(p, &p, 10);
+        if (p == buf)
+                printk(KERN_INFO DRV_NAME
+                       ": %s is not in hex or decimal form.\n", buf);
+        else
+                ieee80211_debug_level = val;
+        return strlen(buf);
+}
+static int __init ieee80211_init(void)
+{
+        struct proc_dir_entry *e;
+        ieee80211_debug_level = debug;
+        ieee80211_proc = create_proc_entry(DRV_NAME, S_IFDIR, proc_net);
+        if (ieee80211_proc == NULL) {
+                IEEE80211_ERROR("Unable to create " DRV_NAME
+                                " proc directory\n");
+                return -EIO;
+        }
+        e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR,
+                              ieee80211_proc);
+        if (!e) {
+                remove_proc_entry(DRV_NAME, proc_net);
+                ieee80211_proc = NULL;
+                return -EIO;
+        }
+        e->read_proc = show_debug_level;
+        e->write_proc = store_debug_level;
+        e->data = NULL;
+        return 0;
+}
+static void __exit ieee80211_exit(void)
+{
+        if (ieee80211_proc) {
+                remove_proc_entry("debug_level", ieee80211_proc);
+                remove_proc_entry(DRV_NAME, proc_net);
+                ieee80211_proc = NULL;
+        }
+}
+#include <linux/moduleparam.h>
+module_param(debug, int, 0444);
+MODULE_PARM_DESC(debug, "debug output mask");
+module_exit(ieee80211_exit);
+module_init(ieee80211_init);
+#endif
+const char *escape_essid(const char *essid, u8 essid_len) {
+        static char escaped[IW_ESSID_MAX_SIZE * 2 + 1];
+        const char *s = essid;
+        char *d = escaped;
+        if (ieee80211_is_empty_essid(essid, essid_len)) {
+                memcpy(escaped, "<hidden>", sizeof("<hidden>"));
+                return escaped;
+        }
+        essid_len = min(essid_len, (u8)IW_ESSID_MAX_SIZE);
+        while (essid_len--) {
+                if (*s == '\0') {
+                        *d++ = '\\';
+                        *d++ = '0';
+                        s++;
+                } else {
+                        *d++ = *s++;
+                }
+        }
+        *d = '\0';
+        return escaped;
+}
+EXPORT_SYMBOL(alloc_ieee80211);
+EXPORT_SYMBOL(free_ieee80211);
+EXPORT_SYMBOL(escape_essid);
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
new file mode 100644
index 000000000000..a5905f53aed7
--- /dev/null
+++ b/net/ieee80211/ieee80211_rx.c
@@ -0,0 +1,1189 @@
+/*
+ * Original code based Host AP (software wireless LAN access point) driver
+ * for Intersil Prism2/2.5/3 - hostap.o module, common routines
+ *
+ * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
+ * <jkmaline@cc.hut.fi>
+ * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2004, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ */
+#include <linux/compiler.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/wireless.h>
+#include <linux/etherdevice.h>
+#include <asm/uaccess.h>
+#include <linux/ctype.h>
+#include <net/ieee80211.h>
+static inline void ieee80211_monitor_rx(struct ieee80211_device *ieee,
+                                        struct sk_buff *skb,
+                                        struct ieee80211_rx_stats *rx_stats)
+{
+        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+        u16 fc = le16_to_cpu(hdr->frame_ctl);
+        skb->dev = ieee->dev;
+        skb->mac.raw = skb->data;
+        skb_pull(skb, ieee80211_get_hdrlen(fc));
+        skb->pkt_type = PACKET_OTHERHOST;
+        skb->protocol = __constant_htons(ETH_P_80211_RAW);
+        memset(skb->cb, 0, sizeof(skb->cb));
+        netif_rx(skb);
+}
+/* Called only as a tasklet (software IRQ) */
+static struct ieee80211_frag_entry *
+ieee80211_frag_cache_find(struct ieee80211_device *ieee, unsigned int seq,
+                          unsigned int frag, u8 *src, u8 *dst)
+{
+        struct ieee80211_frag_entry *entry;
+        int i;
+        for (i = 0; i < IEEE80211_FRAG_CACHE_LEN; i++) {
+                entry = &ieee->frag_cache[i];
+                if (entry->skb != NULL &&
+                    time_after(jiffies, entry->first_frag_time + 2 * HZ)) {
+                        IEEE80211_DEBUG_FRAG(
+                                "expiring fragment cache entry "
+                                "seq=%u last_frag=%u\n",
+                                entry->seq, entry->last_frag);
+                        dev_kfree_skb_any(entry->skb);
+                        entry->skb = NULL;
+                }
+                if (entry->skb != NULL && entry->seq == seq &&
+                    (entry->last_frag + 1 == frag || frag == -1) &&
+                    memcmp(entry->src_addr, src, ETH_ALEN) == 0 &&
+                    memcmp(entry->dst_addr, dst, ETH_ALEN) == 0)
+                        return entry;
+        }
+        return NULL;
+}
+/* Called only as a tasklet (software IRQ) */
+static struct sk_buff *
+ieee80211_frag_cache_get(struct ieee80211_device *ieee,
+                         struct ieee80211_hdr *hdr)
+{
+        struct sk_buff *skb = NULL;
+        u16 sc;
+        unsigned int frag, seq;
+        struct ieee80211_frag_entry *entry;
+        sc = le16_to_cpu(hdr->seq_ctl);
+        frag = WLAN_GET_SEQ_FRAG(sc);
+        seq = WLAN_GET_SEQ_SEQ(sc);
+        if (frag == 0) {
+                /* Reserve enough space to fit maximum frame length */
+                skb = dev_alloc_skb(ieee->dev->mtu +
+                                    sizeof(struct ieee80211_hdr) +
+                                    8 /* LLC */ +
+                                    2 /* alignment */ +
+                                    8 /* WEP */ + ETH_ALEN /* WDS */);
+                if (skb == NULL)
+                        return NULL;
+                entry = &ieee->frag_cache[ieee->frag_next_idx];
+                ieee->frag_next_idx++;
+                if (ieee->frag_next_idx >= IEEE80211_FRAG_CACHE_LEN)
+                        ieee->frag_next_idx = 0;
+                if (entry->skb != NULL)
+                        dev_kfree_skb_any(entry->skb);
+                entry->first_frag_time = jiffies;
+                entry->seq = seq;
+                entry->last_frag = frag;
+                entry->skb = skb;
+                memcpy(entry->src_addr, hdr->addr2, ETH_ALEN);
+                memcpy(entry->dst_addr, hdr->addr1, ETH_ALEN);
+        } else {
+                /* received a fragment of a frame for which the head fragment
+                 * should have already been received */
+                entry = ieee80211_frag_cache_find(ieee, seq, frag, hdr->addr2,
+                                                  hdr->addr1);
+                if (entry != NULL) {
+                        entry->last_frag = frag;
+                        skb = entry->skb;
+                }
+        }
+        return skb;
+}
+/* Called only as a tasklet (software IRQ) */
+static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee,
+                                           struct ieee80211_hdr *hdr)
+{
+        u16 sc;
+        unsigned int seq;
+        struct ieee80211_frag_entry *entry;
+        sc = le16_to_cpu(hdr->seq_ctl);
+        seq = WLAN_GET_SEQ_SEQ(sc);
+        entry = ieee80211_frag_cache_find(ieee, seq, -1, hdr->addr2,
+                                          hdr->addr1);
+        if (entry == NULL) {
+                IEEE80211_DEBUG_FRAG(
+                        "could not invalidate fragment cache "
+                        "entry (seq=%u)\n", seq);
+                return -1;
+        }
+        entry->skb = NULL;
+        return 0;
+}
+#ifdef NOT_YET
+/* ieee80211_rx_frame_mgtmt
+ *
+ * Responsible for handling management control frames
+ *
+ * Called by ieee80211_rx */
+static inline int
+ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb,
+                        struct ieee80211_rx_stats *rx_stats, u16 type,
+                        u16 stype)
+{
+        if (ieee->iw_mode == IW_MODE_MASTER) {
+                printk(KERN_DEBUG "%s: Master mode not yet suppported.\n",
+                       ieee->dev->name);
+                return 0;
+/*
+  hostap_update_sta_ps(ieee, (struct hostap_ieee80211_hdr *)
+  skb->data);*/
+        }
+        if (ieee->hostapd && type == WLAN_FC_TYPE_MGMT) {
+                if (stype == WLAN_FC_STYPE_BEACON &&
+                    ieee->iw_mode == IW_MODE_MASTER) {
+                        struct sk_buff *skb2;
+                        /* Process beacon frames also in kernel driver to
+                         * update STA(AP) table statistics */
+                        skb2 = skb_clone(skb, GFP_ATOMIC);
+                        if (skb2)
+                                hostap_rx(skb2->dev, skb2, rx_stats);
+                }
+                /* send management frames to the user space daemon for
+                 * processing */
+                ieee->apdevstats.rx_packets++;
+                ieee->apdevstats.rx_bytes += skb->len;
+                prism2_rx_80211(ieee->apdev, skb, rx_stats, PRISM2_RX_MGMT);
+                return 0;
+        }
+            if (ieee->iw_mode == IW_MODE_MASTER) {
+                if (type != WLAN_FC_TYPE_MGMT && type != WLAN_FC_TYPE_CTRL) {
+                        printk(KERN_DEBUG "%s: unknown management frame "
+                               "(type=0x%02x, stype=0x%02x) dropped\n",
+                               skb->dev->name, type, stype);
+                        return -1;
+                }
+                hostap_rx(skb->dev, skb, rx_stats);
+                return 0;
+        }
+        printk(KERN_DEBUG "%s: hostap_rx_frame_mgmt: management frame "
+               "received in non-Host AP mode\n", skb->dev->name);
+        return -1;
+}
+#endif
+/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
+/* Ethernet-II snap header (RFC1042 for most EtherTypes) */
+static unsigned char rfc1042_header[] =
+{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 };
+/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */
+static unsigned char bridge_tunnel_header[] =
+{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
+/* No encapsulation header if EtherType < 0x600 (=length) */
+/* Called by ieee80211_rx_frame_decrypt */
+static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee,
+                                    struct sk_buff *skb)
+{
+        struct net_device *dev = ieee->dev;
+        u16 fc, ethertype;
+        struct ieee80211_hdr *hdr;
+        u8 *pos;
+        if (skb->len < 24)
+                return 0;
+        hdr = (struct ieee80211_hdr *) skb->data;
+        fc = le16_to_cpu(hdr->frame_ctl);
+        /* check that the frame is unicast frame to us */
+        if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
+            IEEE80211_FCTL_TODS &&
+            memcmp(hdr->addr1, dev->dev_addr, ETH_ALEN) == 0 &&
+            memcmp(hdr->addr3, dev->dev_addr, ETH_ALEN) == 0) {
+                /* ToDS frame with own addr BSSID and DA */
+        } else if ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
+                   IEEE80211_FCTL_FROMDS &&
+                   memcmp(hdr->addr1, dev->dev_addr, ETH_ALEN) == 0) {
+                /* FromDS frame with own addr as DA */
+        } else
+                return 0;
+        if (skb->len < 24 + 8)
+                return 0;
+        /* check for port access entity Ethernet type */
+        pos = skb->data + 24;
+        ethertype = (pos[6] << 8) | pos[7];
+        if (ethertype == ETH_P_PAE)
+                return 1;
+        return 0;
+}
+/* Called only as a tasklet (software IRQ), by ieee80211_rx */
+static inline int
+ieee80211_rx_frame_decrypt(struct ieee80211_device* ieee, struct sk_buff *skb,
+                           struct ieee80211_crypt_data *crypt)
+{
+        struct ieee80211_hdr *hdr;
+        int res, hdrlen;
+        if (crypt == NULL || crypt->ops->decrypt_mpdu == NULL)
+                return 0;
+        hdr = (struct ieee80211_hdr *) skb->data;
+        hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
+#ifdef CONFIG_IEEE80211_CRYPT_TKIP
+        if (ieee->tkip_countermeasures &&
+            strcmp(crypt->ops->name, "TKIP") == 0) {
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "%s: TKIP countermeasures: dropped "
+                               "received packet from " MAC_FMT "\n",
+                               ieee->dev->name, MAC_ARG(hdr->addr2));
+                }
+                return -1;
+        }
+#endif
+        atomic_inc(&crypt->refcnt);
+        res = crypt->ops->decrypt_mpdu(skb, hdrlen, crypt->priv);
+        atomic_dec(&crypt->refcnt);
+        if (res < 0) {
+                IEEE80211_DEBUG_DROP(
+                        "decryption failed (SA=" MAC_FMT
+                        ") res=%d\n", MAC_ARG(hdr->addr2), res);
+                if (res == -2)
+                        IEEE80211_DEBUG_DROP("Decryption failed ICV "
+                                             "mismatch (key %d)\n",
+                                             skb->data[hdrlen + 3] >> 6);
+                ieee->ieee_stats.rx_discards_undecryptable++;
+                return -1;
+        }
+        return res;
+}
+/* Called only as a tasklet (software IRQ), by ieee80211_rx */
+static inline int
+ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device* ieee, struct sk_buff *skb,
+                             int keyidx, struct ieee80211_crypt_data *crypt)
+{
+        struct ieee80211_hdr *hdr;
+        int res, hdrlen;
+        if (crypt == NULL || crypt->ops->decrypt_msdu == NULL)
+                return 0;
+        hdr = (struct ieee80211_hdr *) skb->data;
+        hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
+        atomic_inc(&crypt->refcnt);
+        res = crypt->ops->decrypt_msdu(skb, keyidx, hdrlen, crypt->priv);
+        atomic_dec(&crypt->refcnt);
+        if (res < 0) {
+                printk(KERN_DEBUG "%s: MSDU decryption/MIC verification failed"
+                       " (SA=" MAC_FMT " keyidx=%d)\n",
+                       ieee->dev->name, MAC_ARG(hdr->addr2), keyidx);
+                return -1;
+        }
+        return 0;
+}
+/* All received frames are sent to this function. @skb contains the frame in
+ * IEEE 802.11 format, i.e., in the format it was sent over air.
+ * This function is called only as a tasklet (software IRQ). */
+int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
+                 struct ieee80211_rx_stats *rx_stats)
+{
+        struct net_device *dev = ieee->dev;
+        struct ieee80211_hdr *hdr;
+        size_t hdrlen;
+        u16 fc, type, stype, sc;
+        struct net_device_stats *stats;
+        unsigned int frag;
+        u8 *payload;
+        u16 ethertype;
+#ifdef NOT_YET
+        struct net_device *wds = NULL;
+        struct sk_buff *skb2 = NULL;
+        struct net_device *wds = NULL;
+        int frame_authorized = 0;
+        int from_assoc_ap = 0;
+        void *sta = NULL;
+#endif
+        u8 dst[ETH_ALEN];
+        u8 src[ETH_ALEN];
+        struct ieee80211_crypt_data *crypt = NULL;
+        int keyidx = 0;
+        hdr = (struct ieee80211_hdr *)skb->data;
+        stats = &ieee->stats;
+        if (skb->len < 10) {
+                printk(KERN_INFO "%s: SKB length < 10\n",
+                       dev->name);
+                goto rx_dropped;
+        }
+        fc = le16_to_cpu(hdr->frame_ctl);
+        type = WLAN_FC_GET_TYPE(fc);
+        stype = WLAN_FC_GET_STYPE(fc);
+        sc = le16_to_cpu(hdr->seq_ctl);
+        frag = WLAN_GET_SEQ_FRAG(sc);
+        hdrlen = ieee80211_get_hdrlen(fc);
+#ifdef NOT_YET
+#if WIRELESS_EXT > 15
+        /* Put this code here so that we avoid duplicating it in all
+         * Rx paths. - Jean II */
+#ifdef IW_WIRELESS_SPY          /* defined in iw_handler.h */
+        /* If spy monitoring on */
+        if (iface->spy_data.spy_number > 0) {
+                struct iw_quality wstats;
+                wstats.level = rx_stats->signal;
+                wstats.noise = rx_stats->noise;
+                wstats.updated = 6;     /* No qual value */
+                /* Update spy records */
+                wireless_spy_update(dev, hdr->addr2, &wstats);
+        }
+#endif /* IW_WIRELESS_SPY */
+#endif /* WIRELESS_EXT > 15 */
+        hostap_update_rx_stats(local->ap, hdr, rx_stats);
+#endif
+#if WIRELESS_EXT > 15
+        if (ieee->iw_mode == IW_MODE_MONITOR) {
+                ieee80211_monitor_rx(ieee, skb, rx_stats);
+                stats->rx_packets++;
+                stats->rx_bytes += skb->len;
+                return 1;
+        }
+#endif
+        if (ieee->host_decrypt) {
+                int idx = 0;
+                if (skb->len >= hdrlen + 3)
+                        idx = skb->data[hdrlen + 3] >> 6;
+                crypt = ieee->crypt[idx];
+#ifdef NOT_YET
+                sta = NULL;
+                /* Use station specific key to override default keys if the
+                 * receiver address is a unicast address ("individual RA"). If
+                 * bcrx_sta_key parameter is set, station specific key is used
+                 * even with broad/multicast targets (this is against IEEE
+                 * 802.11, but makes it easier to use different keys with
+                 * stations that do not support WEP key mapping). */
+                if (!(hdr->addr1[0] & 0x01) || local->bcrx_sta_key)
+                        (void) hostap_handle_sta_crypto(local, hdr, &crypt,
+                                                        &sta);
+#endif
+                /* allow NULL decrypt to indicate an station specific override
+                 * for default encryption */
+                if (crypt && (crypt->ops == NULL ||
+                              crypt->ops->decrypt_mpdu == NULL))
+                        crypt = NULL;
+                if (!crypt && (fc & IEEE80211_FCTL_PROTECTED)) {
+                        /* This seems to be triggered by some (multicast?)
+                         * frames from other than current BSS, so just drop the
+                         * frames silently instead of filling system log with
+                         * these reports. */
+                        IEEE80211_DEBUG_DROP("Decryption failed (not set)"
+                                             " (SA=" MAC_FMT ")\n",
+                                             MAC_ARG(hdr->addr2));
+                        ieee->ieee_stats.rx_discards_undecryptable++;
+                        goto rx_dropped;
+                }
+        }
+#ifdef NOT_YET
+        if (type != WLAN_FC_TYPE_DATA) {
+                if (type == WLAN_FC_TYPE_MGMT && stype == WLAN_FC_STYPE_AUTH &&
+                    fc & IEEE80211_FCTL_PROTECTED && ieee->host_decrypt &&
+                    (keyidx = hostap_rx_frame_decrypt(ieee, skb, crypt)) < 0)
+                {
+                        printk(KERN_DEBUG "%s: failed to decrypt mgmt::auth "
+                               "from " MAC_FMT "\n", dev->name,
+                               MAC_ARG(hdr->addr2));
+                        /* TODO: could inform hostapd about this so that it
+                         * could send auth failure report */
+                        goto rx_dropped;
+                }
+                if (ieee80211_rx_frame_mgmt(ieee, skb, rx_stats, type, stype))
+                        goto rx_dropped;
+                else
+                        goto rx_exit;
+        }
+#endif
+        /* Data frame - extract src/dst addresses */
+        if (skb->len < IEEE80211_3ADDR_LEN)
+                goto rx_dropped;
+        switch (fc & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) {
+        case IEEE80211_FCTL_FROMDS:
+                memcpy(dst, hdr->addr1, ETH_ALEN);
+                memcpy(src, hdr->addr3, ETH_ALEN);
+                break;
+        case IEEE80211_FCTL_TODS:
+                memcpy(dst, hdr->addr3, ETH_ALEN);
+                memcpy(src, hdr->addr2, ETH_ALEN);
+                break;
+        case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS:
+                if (skb->len < IEEE80211_4ADDR_LEN)
+                        goto rx_dropped;
+                memcpy(dst, hdr->addr3, ETH_ALEN);
+                memcpy(src, hdr->addr4, ETH_ALEN);
+                break;
+        case 0:
+                memcpy(dst, hdr->addr1, ETH_ALEN);
+                memcpy(src, hdr->addr2, ETH_ALEN);
+                break;
+        }
+#ifdef NOT_YET
+        if (hostap_rx_frame_wds(ieee, hdr, fc, &wds))
+                goto rx_dropped;
+        if (wds) {
+                skb->dev = dev = wds;
+                stats = hostap_get_stats(dev);
+        }
+        if (ieee->iw_mode == IW_MODE_MASTER && !wds &&
+            (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == IEEE80211_FCTL_FROMDS &&
+            ieee->stadev &&
+            memcmp(hdr->addr2, ieee->assoc_ap_addr, ETH_ALEN) == 0) {
+                /* Frame from BSSID of the AP for which we are a client */
+                skb->dev = dev = ieee->stadev;
+                stats = hostap_get_stats(dev);
+                from_assoc_ap = 1;
+        }
+#endif
+        dev->last_rx = jiffies;
+#ifdef NOT_YET
+        if ((ieee->iw_mode == IW_MODE_MASTER ||
+             ieee->iw_mode == IW_MODE_REPEAT) &&
+            !from_assoc_ap) {
+                switch (hostap_handle_sta_rx(ieee, dev, skb, rx_stats,
+                                             wds != NULL)) {
+                case AP_RX_CONTINUE_NOT_AUTHORIZED:
+                        frame_authorized = 0;
+                        break;
+                case AP_RX_CONTINUE:
+                        frame_authorized = 1;
+                        break;
+                case AP_RX_DROP:
+                        goto rx_dropped;
+                case AP_RX_EXIT:
+                        goto rx_exit;
+                }
+        }
+#endif
+        /* Nullfunc frames may have PS-bit set, so they must be passed to
+         * hostap_handle_sta_rx() before being dropped here. */
+        if (stype != IEEE80211_STYPE_DATA &&
+            stype != IEEE80211_STYPE_DATA_CFACK &&
+            stype != IEEE80211_STYPE_DATA_CFPOLL &&
+            stype != IEEE80211_STYPE_DATA_CFACKPOLL) {
+                if (stype != IEEE80211_STYPE_NULLFUNC)
+                        IEEE80211_DEBUG_DROP(
+                                "RX: dropped data frame "
+                                "with no data (type=0x%02x, "
+                                "subtype=0x%02x, len=%d)\n",
+                                type, stype, skb->len);
+                goto rx_dropped;
+        }
+        /* skb: hdr + (possibly fragmented, possibly encrypted) payload */
+        if (ieee->host_decrypt && (fc & IEEE80211_FCTL_PROTECTED) &&
+            (keyidx = ieee80211_rx_frame_decrypt(ieee, skb, crypt)) < 0)
+                goto rx_dropped;
+        hdr = (struct ieee80211_hdr *) skb->data;
+        /* skb: hdr + (possibly fragmented) plaintext payload */
+        // PR: FIXME: hostap has additional conditions in the "if" below:
+        // ieee->host_decrypt && (fc & IEEE80211_FCTL_PROTECTED) &&
+        if ((frag != 0 || (fc & IEEE80211_FCTL_MOREFRAGS))) {
+                int flen;
+                struct sk_buff *frag_skb = ieee80211_frag_cache_get(ieee, hdr);
+                IEEE80211_DEBUG_FRAG("Rx Fragment received (%u)\n", frag);
+                if (!frag_skb) {
+                        IEEE80211_DEBUG(IEEE80211_DL_RX | IEEE80211_DL_FRAG,
+                                        "Rx cannot get skb from fragment "
+                                        "cache (morefrag=%d seq=%u frag=%u)\n",
+                                        (fc & IEEE80211_FCTL_MOREFRAGS) != 0,
+                                        WLAN_GET_SEQ_SEQ(sc), frag);
+                        goto rx_dropped;
+                }
+                flen = skb->len;
+                if (frag != 0)
+                        flen -= hdrlen;
+                if (frag_skb->tail + flen > frag_skb->end) {
+                        printk(KERN_WARNING "%s: host decrypted and "
+                               "reassembled frame did not fit skb\n",
+                               dev->name);
+                        ieee80211_frag_cache_invalidate(ieee, hdr);
+                        goto rx_dropped;
+                }
+                if (frag == 0) {
+                        /* copy first fragment (including full headers) into
+                         * beginning of the fragment cache skb */
+                        memcpy(skb_put(frag_skb, flen), skb->data, flen);
+                } else {
+                        /* append frame payload to the end of the fragment
+                         * cache skb */
+                        memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
+                               flen);
+                }
+                dev_kfree_skb_any(skb);
+                skb = NULL;
+                if (fc & IEEE80211_FCTL_MOREFRAGS) {
+                        /* more fragments expected - leave the skb in fragment
+                         * cache for now; it will be delivered to upper layers
+                         * after all fragments have been received */
+                        goto rx_exit;
+                }
+                /* this was the last fragment and the frame will be
+                 * delivered, so remove skb from fragment cache */
+                skb = frag_skb;
+                hdr = (struct ieee80211_hdr *) skb->data;
+                ieee80211_frag_cache_invalidate(ieee, hdr);
+        }
+        /* skb: hdr + (possible reassembled) full MSDU payload; possibly still
+         * encrypted/authenticated */
+        if (ieee->host_decrypt && (fc & IEEE80211_FCTL_PROTECTED) &&
+            ieee80211_rx_frame_decrypt_msdu(ieee, skb, keyidx, crypt))
+                goto rx_dropped;
+        hdr = (struct ieee80211_hdr *) skb->data;
+        if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep) {
+                if (/*ieee->ieee802_1x &&*/
+                    ieee80211_is_eapol_frame(ieee, skb)) {
+                        /* pass unencrypted EAPOL frames even if encryption is
+                         * configured */
+                } else {
+                        IEEE80211_DEBUG_DROP(
+                                "encryption configured, but RX "
+                                "frame not encrypted (SA=" MAC_FMT ")\n",
+                                MAC_ARG(hdr->addr2));
+                        goto rx_dropped;
+                }
+        }
+        if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep &&
+            !ieee80211_is_eapol_frame(ieee, skb)) {
+                IEEE80211_DEBUG_DROP(
+                        "dropped unencrypted RX data "
+                        "frame from " MAC_FMT
+                        " (drop_unencrypted=1)\n",
+                        MAC_ARG(hdr->addr2));
+                goto rx_dropped;
+        }
+        /* skb: hdr + (possible reassembled) full plaintext payload */
+        payload = skb->data + hdrlen;
+        ethertype = (payload[6] << 8) | payload[7];
+#ifdef NOT_YET
+        /* If IEEE 802.1X is used, check whether the port is authorized to send
+         * the received frame. */
+        if (ieee->ieee802_1x && ieee->iw_mode == IW_MODE_MASTER) {
+                if (ethertype == ETH_P_PAE) {
+                        printk(KERN_DEBUG "%s: RX: IEEE 802.1X frame\n",
+                               dev->name);
+                        if (ieee->hostapd && ieee->apdev) {
+                                /* Send IEEE 802.1X frames to the user
+                                 * space daemon for processing */
+                                prism2_rx_80211(ieee->apdev, skb, rx_stats,
+                                                PRISM2_RX_MGMT);
+                                ieee->apdevstats.rx_packets++;
+                                ieee->apdevstats.rx_bytes += skb->len;
+                                goto rx_exit;
+                        }
+                } else if (!frame_authorized) {
+                        printk(KERN_DEBUG "%s: dropped frame from "
+                               "unauthorized port (IEEE 802.1X): "
+                               "ethertype=0x%04x\n",
+                               dev->name, ethertype);
+                        goto rx_dropped;
+                }
+        }
+#endif
+        /* convert hdr + possible LLC headers into Ethernet header */
+        if (skb->len - hdrlen >= 8 &&
+            ((memcmp(payload, rfc1042_header, SNAP_SIZE) == 0 &&
+              ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
+             memcmp(payload, bridge_tunnel_header, SNAP_SIZE) == 0)) {
+                /* remove RFC1042 or Bridge-Tunnel encapsulation and
+                 * replace EtherType */
+                skb_pull(skb, hdrlen + SNAP_SIZE);
+                memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN);
+                memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN);
+        } else {
+                u16 len;
+                /* Leave Ethernet header part of hdr and full payload */
+                skb_pull(skb, hdrlen);
+                len = htons(skb->len);
+                memcpy(skb_push(skb, 2), &len, 2);
+                memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN);
+                memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN);
+        }
+#ifdef NOT_YET
+        if (wds && ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) ==
+                    IEEE80211_FCTL_TODS) &&
+            skb->len >= ETH_HLEN + ETH_ALEN) {
+                /* Non-standard frame: get addr4 from its bogus location after
+                 * the payload */
+                memcpy(skb->data + ETH_ALEN,
+                       skb->data + skb->len - ETH_ALEN, ETH_ALEN);
+                skb_trim(skb, skb->len - ETH_ALEN);
+        }
+#endif
+        stats->rx_packets++;
+        stats->rx_bytes += skb->len;
+#ifdef NOT_YET
+        if (ieee->iw_mode == IW_MODE_MASTER && !wds &&
+            ieee->ap->bridge_packets) {
+                if (dst[0] & 0x01) {
+                        /* copy multicast frame both to the higher layers and
+                         * to the wireless media */
+                        ieee->ap->bridged_multicast++;
+                        skb2 = skb_clone(skb, GFP_ATOMIC);
+                        if (skb2 == NULL)
+                                printk(KERN_DEBUG "%s: skb_clone failed for "
+                                       "multicast frame\n", dev->name);
+                } else if (hostap_is_sta_assoc(ieee->ap, dst)) {
+                        /* send frame directly to the associated STA using
+                         * wireless media and not passing to higher layers */
+                        ieee->ap->bridged_unicast++;
+                        skb2 = skb;
+                        skb = NULL;
+                }
+        }
+        if (skb2 != NULL) {
+                /* send to wireless media */
+                skb2->protocol = __constant_htons(ETH_P_802_3);
+                skb2->mac.raw = skb2->nh.raw = skb2->data;
+                /* skb2->nh.raw = skb2->data + ETH_HLEN; */
+                skb2->dev = dev;
+                dev_queue_xmit(skb2);
+        }
+#endif
+        if (skb) {
+                skb->protocol = eth_type_trans(skb, dev);
+                memset(skb->cb, 0, sizeof(skb->cb));
+                skb->dev = dev;
+                skb->ip_summed = CHECKSUM_NONE; /* 802.11 crc not sufficient */
+                netif_rx(skb);
+        }
+ rx_exit:
+#ifdef NOT_YET
+        if (sta)
+                hostap_handle_sta_release(sta);
+#endif
+        return 1;
+ rx_dropped:
+        stats->rx_dropped++;
+        /* Returning 0 indicates to caller that we have not handled the SKB--
+         * so it is still allocated and can be used again by underlying
+         * hardware as a DMA target */
+        return 0;
+}
+#define MGMT_FRAME_FIXED_PART_LENGTH            0x24
+static inline int ieee80211_is_ofdm_rate(u8 rate)
+{
+        switch (rate & ~IEEE80211_BASIC_RATE_MASK) {
+        case IEEE80211_OFDM_RATE_6MB:
+        case IEEE80211_OFDM_RATE_9MB:
+        case IEEE80211_OFDM_RATE_12MB:
+        case IEEE80211_OFDM_RATE_18MB:
+        case IEEE80211_OFDM_RATE_24MB:
+        case IEEE80211_OFDM_RATE_36MB:
+        case IEEE80211_OFDM_RATE_48MB:
+        case IEEE80211_OFDM_RATE_54MB:
+                return 1;
+        }
+        return 0;
+}
+static inline int ieee80211_network_init(
+        struct ieee80211_device *ieee,
+        struct ieee80211_probe_response *beacon,
+        struct ieee80211_network *network,
+        struct ieee80211_rx_stats *stats)
+{
+#ifdef CONFIG_IEEE80211_DEBUG
+        char rates_str[64];
+        char *p;
+#endif
+        struct ieee80211_info_element *info_element;
+        u16 left;
+        u8 i;
+        /* Pull out fixed field data */
+        memcpy(network->bssid, beacon->header.addr3, ETH_ALEN);
+        network->capability = beacon->capability;
+        network->last_scanned = jiffies;
+        network->time_stamp[0] = beacon->time_stamp[0];
+        network->time_stamp[1] = beacon->time_stamp[1];
+        network->beacon_interval = beacon->beacon_interval;
+        /* Where to pull this? beacon->listen_interval;*/
+        network->listen_interval = 0x0A;
+        network->rates_len = network->rates_ex_len = 0;
+        network->last_associate = 0;
+        network->ssid_len = 0;
+        network->flags = 0;
+        network->atim_window = 0;
+        if (stats->freq == IEEE80211_52GHZ_BAND) {
+                /* for A band (No DS info) */
+                network->channel = stats->received_channel;
+        } else
+                network->flags |= NETWORK_HAS_CCK;
+        network->wpa_ie_len = 0;
+        network->rsn_ie_len = 0;
+        info_element = &beacon->info_element;
+        left = stats->len - ((void *)info_element - (void *)beacon);
+        while (left >= sizeof(struct ieee80211_info_element_hdr)) {
+                if (sizeof(struct ieee80211_info_element_hdr) + info_element->len > left) {
+                        IEEE80211_DEBUG_SCAN("SCAN: parse failed: info_element->len + 2 > left : info_element->len+2=%Zd left=%d.\n",
+                                             info_element->len + sizeof(struct ieee80211_info_element),
+                                             left);
+                        return 1;
+                }
+                switch (info_element->id) {
+                case MFIE_TYPE_SSID:
+                        if (ieee80211_is_empty_essid(info_element->data,
+                                                     info_element->len)) {
+                                network->flags |= NETWORK_EMPTY_ESSID;
+                                break;
+                        }
+                        network->ssid_len = min(info_element->len,
+                                                (u8)IW_ESSID_MAX_SIZE);
+                        memcpy(network->ssid, info_element->data, network->ssid_len);
+                        if (network->ssid_len < IW_ESSID_MAX_SIZE)
+                                memset(network->ssid + network->ssid_len, 0,
+                                       IW_ESSID_MAX_SIZE - network->ssid_len);
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_SSID: '%s' len=%d.\n",
+                                             network->ssid, network->ssid_len);
+                        break;
+                case MFIE_TYPE_RATES:
+#ifdef CONFIG_IEEE80211_DEBUG
+                        p = rates_str;
+#endif
+                        network->rates_len = min(info_element->len, MAX_RATES_LENGTH);
+                        for (i = 0; i < network->rates_len; i++) {
+                                network->rates[i] = info_element->data[i];
+#ifdef CONFIG_IEEE80211_DEBUG
+                                p += snprintf(p, sizeof(rates_str) - (p - rates_str), "%02X ", network->rates[i]);
+#endif
+                                if (ieee80211_is_ofdm_rate(info_element->data[i])) {
+                                        network->flags |= NETWORK_HAS_OFDM;
+                                        if (info_element->data[i] &
+                                            IEEE80211_BASIC_RATE_MASK)
+                                                network->flags &=
+                                                        ~NETWORK_HAS_CCK;
+                                }
+                        }
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_RATES: '%s' (%d)\n",
+                                             rates_str, network->rates_len);
+                        break;
+                case MFIE_TYPE_RATES_EX:
+#ifdef CONFIG_IEEE80211_DEBUG
+                        p = rates_str;
+#endif
+                        network->rates_ex_len = min(info_element->len, MAX_RATES_EX_LENGTH);
+                        for (i = 0; i < network->rates_ex_len; i++) {
+                                network->rates_ex[i] = info_element->data[i];
+#ifdef CONFIG_IEEE80211_DEBUG
+                                p += snprintf(p, sizeof(rates_str) - (p - rates_str), "%02X ", network->rates[i]);
+#endif
+                                if (ieee80211_is_ofdm_rate(info_element->data[i])) {
+                                        network->flags |= NETWORK_HAS_OFDM;
+                                        if (info_element->data[i] &
+                                            IEEE80211_BASIC_RATE_MASK)
+                                                network->flags &=
+                                                        ~NETWORK_HAS_CCK;
+                                }
+                        }
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_RATES_EX: '%s' (%d)\n",
+                                             rates_str, network->rates_ex_len);
+                        break;
+                case MFIE_TYPE_DS_SET:
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_DS_SET: %d\n",
+                                             info_element->data[0]);
+                        if (stats->freq == IEEE80211_24GHZ_BAND)
+                                network->channel = info_element->data[0];
+                        break;
+                case MFIE_TYPE_FH_SET:
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_FH_SET: ignored\n");
+                        break;
+                case MFIE_TYPE_CF_SET:
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_CF_SET: ignored\n");
+                        break;
+                case MFIE_TYPE_TIM:
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_TIM: ignored\n");
+                        break;
+                case MFIE_TYPE_IBSS_SET:
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_IBSS_SET: ignored\n");
+                        break;
+                case MFIE_TYPE_CHALLENGE:
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_CHALLENGE: ignored\n");
+                        break;
+                case MFIE_TYPE_GENERIC:
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_GENERIC: %d bytes\n",
+                                             info_element->len);
+                        if (info_element->len >= 4  &&
+                            info_element->data[0] == 0x00 &&
+                            info_element->data[1] == 0x50 &&
+                            info_element->data[2] == 0xf2 &&
+                            info_element->data[3] == 0x01) {
+                                network->wpa_ie_len = min(info_element->len + 2,
+                                                         MAX_WPA_IE_LEN);
+                                memcpy(network->wpa_ie, info_element,
+                                       network->wpa_ie_len);
+                        }
+                        break;
+                case MFIE_TYPE_RSN:
+                        IEEE80211_DEBUG_SCAN("MFIE_TYPE_RSN: %d bytes\n",
+                                             info_element->len);
+                        network->rsn_ie_len = min(info_element->len + 2,
+                                                 MAX_WPA_IE_LEN);
+                        memcpy(network->rsn_ie, info_element,
+                               network->rsn_ie_len);
+                        break;
+                default:
+                        IEEE80211_DEBUG_SCAN("unsupported IE %d\n",
+                                             info_element->id);
+                        break;
+                }
+                left -= sizeof(struct ieee80211_info_element_hdr) +
+                        info_element->len;
+                info_element = (struct ieee80211_info_element *)
+                        &info_element->data[info_element->len];
+        }
+        network->mode = 0;
+        if (stats->freq == IEEE80211_52GHZ_BAND)
+                network->mode = IEEE_A;
+        else {
+                if (network->flags & NETWORK_HAS_OFDM)
+                        network->mode |= IEEE_G;
+                if (network->flags & NETWORK_HAS_CCK)
+                        network->mode |= IEEE_B;
+        }
+        if (network->mode == 0) {
+                IEEE80211_DEBUG_SCAN("Filtered out '%s (" MAC_FMT ")' "
+                                     "network.\n",
+                                     escape_essid(network->ssid,
+                                                  network->ssid_len),
+                                     MAC_ARG(network->bssid));
+                return 1;
+        }
+        if (ieee80211_is_empty_essid(network->ssid, network->ssid_len))
+                network->flags |= NETWORK_EMPTY_ESSID;
+        memcpy(&network->stats, stats, sizeof(network->stats));
+        return 0;
+}
+static inline int is_same_network(struct ieee80211_network *src,
+                                  struct ieee80211_network *dst)
+{
+        /* A network is only a duplicate if the channel, BSSID, and ESSID
+         * all match.  We treat all <hidden> with the same BSSID and channel
+         * as one network */
+        return ((src->ssid_len == dst->ssid_len) &&
+                (src->channel == dst->channel) &&
+                !memcmp(src->bssid, dst->bssid, ETH_ALEN) &&
+                !memcmp(src->ssid, dst->ssid, src->ssid_len));
+}
+static inline void update_network(struct ieee80211_network *dst,
+                                  struct ieee80211_network *src)
+{
+        memcpy(&dst->stats, &src->stats, sizeof(struct ieee80211_rx_stats));
+        dst->capability = src->capability;
+        memcpy(dst->rates, src->rates, src->rates_len);
+        dst->rates_len = src->rates_len;
+        memcpy(dst->rates_ex, src->rates_ex, src->rates_ex_len);
+        dst->rates_ex_len = src->rates_ex_len;
+        dst->mode = src->mode;
+        dst->flags = src->flags;
+        dst->time_stamp[0] = src->time_stamp[0];
+        dst->time_stamp[1] = src->time_stamp[1];
+        dst->beacon_interval = src->beacon_interval;
+        dst->listen_interval = src->listen_interval;
+        dst->atim_window = src->atim_window;
+        memcpy(dst->wpa_ie, src->wpa_ie, src->wpa_ie_len);
+        dst->wpa_ie_len = src->wpa_ie_len;
+        memcpy(dst->rsn_ie, src->rsn_ie, src->rsn_ie_len);
+        dst->rsn_ie_len = src->rsn_ie_len;
+        dst->last_scanned = jiffies;
+        /* dst->last_associate is not overwritten */
+}
+static inline void ieee80211_process_probe_response(
+        struct ieee80211_device *ieee,
+        struct ieee80211_probe_response *beacon,
+        struct ieee80211_rx_stats *stats)
+{
+        struct ieee80211_network network;
+        struct ieee80211_network *target;
+        struct ieee80211_network *oldest = NULL;
+#ifdef CONFIG_IEEE80211_DEBUG
+        struct ieee80211_info_element *info_element = &beacon->info_element;
+#endif
+        unsigned long flags;
+        IEEE80211_DEBUG_SCAN(
+                "'%s' (" MAC_FMT "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n",
+                escape_essid(info_element->data, info_element->len),
+                MAC_ARG(beacon->header.addr3),
+                (beacon->capability & (1<<0xf)) ? '1' : '0',
+                (beacon->capability & (1<<0xe)) ? '1' : '0',
+                (beacon->capability & (1<<0xd)) ? '1' : '0',
+                (beacon->capability & (1<<0xc)) ? '1' : '0',
+                (beacon->capability & (1<<0xb)) ? '1' : '0',
+                (beacon->capability & (1<<0xa)) ? '1' : '0',
+                (beacon->capability & (1<<0x9)) ? '1' : '0',
+                (beacon->capability & (1<<0x8)) ? '1' : '0',
+                (beacon->capability & (1<<0x7)) ? '1' : '0',
+                (beacon->capability & (1<<0x6)) ? '1' : '0',
+                (beacon->capability & (1<<0x5)) ? '1' : '0',
+                (beacon->capability & (1<<0x4)) ? '1' : '0',
+                (beacon->capability & (1<<0x3)) ? '1' : '0',
+                (beacon->capability & (1<<0x2)) ? '1' : '0',
+                (beacon->capability & (1<<0x1)) ? '1' : '0',
+                (beacon->capability & (1<<0x0)) ? '1' : '0');
+        if (ieee80211_network_init(ieee, beacon, &network, stats)) {
+                IEEE80211_DEBUG_SCAN("Dropped '%s' (" MAC_FMT ") via %s.\n",
+                                     escape_essid(info_element->data,
+                                                  info_element->len),
+                                     MAC_ARG(beacon->header.addr3),
+                                     WLAN_FC_GET_STYPE(beacon->header.frame_ctl) ==
+                                     IEEE80211_STYPE_PROBE_RESP ?
+                                     "PROBE RESPONSE" : "BEACON");
+                return;
+        }
+        /* The network parsed correctly -- so now we scan our known networks
+         * to see if we can find it in our list.
+         *
+         * NOTE:  This search is definitely not optimized.  Once its doing
+         *        the "right thing" we'll optimize it for efficiency if
+         *        necessary */
+        /* Search for this entry in the list and update it if it is
+         * already there. */
+        spin_lock_irqsave(&ieee->lock, flags);
+        list_for_each_entry(target, &ieee->network_list, list) {
+                if (is_same_network(target, &network))
+                        break;
+                if ((oldest == NULL) ||
+                    (target->last_scanned < oldest->last_scanned))
+                        oldest = target;
+        }
+        /* If we didn't find a match, then get a new network slot to initialize
+         * with this beacon's information */
+        if (&target->list == &ieee->network_list) {
+                if (list_empty(&ieee->network_free_list)) {
+                        /* If there are no more slots, expire the oldest */
+                        list_del(&oldest->list);
+                        target = oldest;
+                        IEEE80211_DEBUG_SCAN("Expired '%s' (" MAC_FMT ") from "
+                                             "network list.\n",
+                                             escape_essid(target->ssid,
+                                                          target->ssid_len),
+                                             MAC_ARG(target->bssid));
+                } else {
+                        /* Otherwise just pull from the free list */
+                        target = list_entry(ieee->network_free_list.next,
+                                            struct ieee80211_network, list);
+                        list_del(ieee->network_free_list.next);
+                }
+#ifdef CONFIG_IEEE80211_DEBUG
+                IEEE80211_DEBUG_SCAN("Adding '%s' (" MAC_FMT ") via %s.\n",
+                                     escape_essid(network.ssid,
+                                                  network.ssid_len),
+                                     MAC_ARG(network.bssid),
+                                     WLAN_FC_GET_STYPE(beacon->header.frame_ctl) ==
+                                     IEEE80211_STYPE_PROBE_RESP ?
+                                     "PROBE RESPONSE" : "BEACON");
+#endif
+                memcpy(target, &network, sizeof(*target));
+                list_add_tail(&target->list, &ieee->network_list);
+        } else {
+                IEEE80211_DEBUG_SCAN("Updating '%s' (" MAC_FMT ") via %s.\n",
+                                     escape_essid(target->ssid,
+                                                  target->ssid_len),
+                                     MAC_ARG(target->bssid),
+                                     WLAN_FC_GET_STYPE(beacon->header.frame_ctl) ==
+                                     IEEE80211_STYPE_PROBE_RESP ?
+                                     "PROBE RESPONSE" : "BEACON");
+                update_network(target, &network);
+        }
+        spin_unlock_irqrestore(&ieee->lock, flags);
+}
+void ieee80211_rx_mgt(struct ieee80211_device *ieee,
+                      struct ieee80211_hdr *header,
+                      struct ieee80211_rx_stats *stats)
+{
+        switch (WLAN_FC_GET_STYPE(header->frame_ctl)) {
+        case IEEE80211_STYPE_ASSOC_RESP:
+                IEEE80211_DEBUG_MGMT("received ASSOCIATION RESPONSE (%d)\n",
+                                     WLAN_FC_GET_STYPE(header->frame_ctl));
+                break;
+        case IEEE80211_STYPE_REASSOC_RESP:
+                IEEE80211_DEBUG_MGMT("received REASSOCIATION RESPONSE (%d)\n",
+                                     WLAN_FC_GET_STYPE(header->frame_ctl));
+                break;
+        case IEEE80211_STYPE_PROBE_RESP:
+                IEEE80211_DEBUG_MGMT("received PROBE RESPONSE (%d)\n",
+                                     WLAN_FC_GET_STYPE(header->frame_ctl));
+                IEEE80211_DEBUG_SCAN("Probe response\n");
+                ieee80211_process_probe_response(
+                        ieee, (struct ieee80211_probe_response *)header, stats);
+                break;
+        case IEEE80211_STYPE_BEACON:
+                IEEE80211_DEBUG_MGMT("received BEACON (%d)\n",
+                                     WLAN_FC_GET_STYPE(header->frame_ctl));
+                IEEE80211_DEBUG_SCAN("Beacon\n");
+                ieee80211_process_probe_response(
+                        ieee, (struct ieee80211_probe_response *)header, stats);
+                break;
+        default:
+                IEEE80211_DEBUG_MGMT("received UNKNOWN (%d)\n",
+                                     WLAN_FC_GET_STYPE(header->frame_ctl));
+                IEEE80211_WARNING("%s: Unknown management packet: %d\n",
+                                  ieee->dev->name,
+                                  WLAN_FC_GET_STYPE(header->frame_ctl));
+                break;
+        }
+}
+EXPORT_SYMBOL(ieee80211_rx_mgt);
+EXPORT_SYMBOL(ieee80211_rx);
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
new file mode 100644
index 000000000000..b7ea3e25e25d
--- /dev/null
+++ b/net/ieee80211/ieee80211_tx.c
@@ -0,0 +1,438 @@
+/******************************************************************************
+  Copyright(c) 2003 - 2004 Intel Corporation. All rights reserved.
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  Contact Information:
+  James P. Ketrenos <ipw2100-admin@linux.intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+******************************************************************************/
+#include <linux/compiler.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/wireless.h>
+#include <linux/etherdevice.h>
+#include <asm/uaccess.h>
+#include <net/ieee80211.h>
+/*
+802.11 Data Frame
+      ,-------------------------------------------------------------------.
+Bytes |  2   |  2   |    6    |    6    |    6    |  2   | 0..2312 |   4  |
+      |------|------|---------|---------|---------|------|---------|------|
+Desc. | ctrl | dura |  DA/RA  |   TA    |    SA   | Sequ |  Frame  |  fcs |
+      |      | tion | (BSSID) |         |         | ence |  data   |      |
+      `--------------------------------------------------|         |------'
+Total: 28 non-data bytes                                 `----.----'
+                                                              |
+       .- 'Frame data' expands to <---------------------------'
+       |
+       V
+      ,---------------------------------------------------.
+Bytes |  1   |  1   |    1    |    3     |  2   |  0-2304 |
+      |------|------|---------|----------|------|---------|
+Desc. | SNAP | SNAP | Control |Eth Tunnel| Type | IP      |
+      | DSAP | SSAP |         |          |      | Packet  |
+      | 0xAA | 0xAA |0x03 (UI)|0x00-00-F8|      |         |
+      `-----------------------------------------|         |
+Total: 8 non-data bytes                         `----.----'
+                                                     |
+       .- 'IP Packet' expands, if WEP enabled, to <--'
+       |
+       V
+      ,-----------------------.
+Bytes |  4  |   0-2296  |  4  |
+      |-----|-----------|-----|
+Desc. | IV  | Encrypted | ICV |
+      |     | IP Packet |     |
+      `-----------------------'
+Total: 8 non-data bytes
+802.3 Ethernet Data Frame
+      ,-----------------------------------------.
+Bytes |   6   |   6   |  2   |  Variable |   4  |
+      |-------|-------|------|-----------|------|
+Desc. | Dest. | Source| Type | IP Packet |  fcs |
+      |  MAC  |  MAC  |      |           |      |
+      `-----------------------------------------'
+Total: 18 non-data bytes
+In the event that fragmentation is required, the incoming payload is split into
+N parts of size ieee->fts.  The first fragment contains the SNAP header and the
+remaining packets are just data.
+If encryption is enabled, each fragment payload size is reduced by enough space
+to add the prefix and postfix (IV and ICV totalling 8 bytes in the case of WEP)
+So if you have 1500 bytes of payload with ieee->fts set to 500 without
+encryption it will take 3 frames.  With WEP it will take 4 frames as the
+payload of each frame is reduced to 492 bytes.
+* SKB visualization
+*
+*  ,- skb->data
+* |
+* |    ETHERNET HEADER        ,-<-- PAYLOAD
+* |                           |     14 bytes from skb->data
+* |  2 bytes for Type --> ,T. |     (sizeof ethhdr)
+* |                       | | |
+* |,-Dest.--. ,--Src.---. | | |
+* |  6 bytes| | 6 bytes | | | |
+* v         | |         | | | |
+* 0         | v       1 | v | v           2
+* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+*     ^     | ^         | ^ |
+*     |     | |         | | |
+*     |     | |         | `T' <---- 2 bytes for Type
+*     |     | |         |
+*     |     | '---SNAP--' <-------- 6 bytes for SNAP
+*     |     |
+*     `-IV--' <-------------------- 4 bytes for IV (WEP)
+*
+*      SNAP HEADER
+*
+*/
+static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 };
+static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 };
+static inline int ieee80211_put_snap(u8 *data, u16 h_proto)
+{
+        struct ieee80211_snap_hdr *snap;
+        u8 *oui;
+        snap = (struct ieee80211_snap_hdr *)data;
+        snap->dsap = 0xaa;
+        snap->ssap = 0xaa;
+        snap->ctrl = 0x03;
+        if (h_proto == 0x8137 || h_proto == 0x80f3)
+                oui = P802_1H_OUI;
+        else
+                oui = RFC1042_OUI;
+        snap->oui[0] = oui[0];
+        snap->oui[1] = oui[1];
+        snap->oui[2] = oui[2];
+        *(u16 *)(data + SNAP_SIZE) = htons(h_proto);
+        return SNAP_SIZE + sizeof(u16);
+}
+static inline int ieee80211_encrypt_fragment(
+        struct ieee80211_device *ieee,
+        struct sk_buff *frag,
+        int hdr_len)
+{
+        struct ieee80211_crypt_data* crypt = ieee->crypt[ieee->tx_keyidx];
+        int res;
+#ifdef CONFIG_IEEE80211_CRYPT_TKIP
+        struct ieee80211_hdr *header;
+        if (ieee->tkip_countermeasures &&
+            crypt && crypt->ops && strcmp(crypt->ops->name, "TKIP") == 0) {
+                header = (struct ieee80211_hdr *) frag->data;
+                if (net_ratelimit()) {
+                        printk(KERN_DEBUG "%s: TKIP countermeasures: dropped "
+                               "TX packet to " MAC_FMT "\n",
+                               ieee->dev->name, MAC_ARG(header->addr1));
+                }
+                return -1;
+        }
+#endif
+        /* To encrypt, frame format is:
+         * IV (4 bytes), clear payload (including SNAP), ICV (4 bytes) */
+        // PR: FIXME: Copied from hostap. Check fragmentation/MSDU/MPDU encryption.
+        /* Host-based IEEE 802.11 fragmentation for TX is not yet supported, so
+         * call both MSDU and MPDU encryption functions from here. */
+        atomic_inc(&crypt->refcnt);
+        res = 0;
+        if (crypt->ops->encrypt_msdu)
+                res = crypt->ops->encrypt_msdu(frag, hdr_len, crypt->priv);
+        if (res == 0 && crypt->ops->encrypt_mpdu)
+                res = crypt->ops->encrypt_mpdu(frag, hdr_len, crypt->priv);
+        atomic_dec(&crypt->refcnt);
+        if (res < 0) {
+                printk(KERN_INFO "%s: Encryption failed: len=%d.\n",
+                       ieee->dev->name, frag->len);
+                ieee->ieee_stats.tx_discards++;
+                return -1;
+        }
+        return 0;
+}
+void ieee80211_txb_free(struct ieee80211_txb *txb) {
+        int i;
+        if (unlikely(!txb))
+                return;
+        for (i = 0; i < txb->nr_frags; i++)
+                if (txb->fragments[i])
+                        dev_kfree_skb_any(txb->fragments[i]);
+        kfree(txb);
+}
+static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size,
+                                                 int gfp_mask)
+{
+        struct ieee80211_txb *txb;
+        int i;
+        txb = kmalloc(
+                sizeof(struct ieee80211_txb) + (sizeof(u8*) * nr_frags),
+                gfp_mask);
+        if (!txb)
+                return NULL;
+        memset(txb, 0, sizeof(struct ieee80211_txb));
+        txb->nr_frags = nr_frags;
+        txb->frag_size = txb_size;
+        for (i = 0; i < nr_frags; i++) {
+                txb->fragments[i] = dev_alloc_skb(txb_size);
+                if (unlikely(!txb->fragments[i])) {
+                        i--;
+                        break;
+                }
+        }
+        if (unlikely(i != nr_frags)) {
+                while (i >= 0)
+                        dev_kfree_skb_any(txb->fragments[i--]);
+                kfree(txb);
+                return NULL;
+        }
+        return txb;
+}
+/* SKBs are added to the ieee->tx_queue. */
+int ieee80211_xmit(struct sk_buff *skb,
+                   struct net_device *dev)
+{
+        struct ieee80211_device *ieee = netdev_priv(dev);
+        struct ieee80211_txb *txb = NULL;
+        struct ieee80211_hdr *frag_hdr;
+        int i, bytes_per_frag, nr_frags, bytes_last_frag, frag_size;
+        unsigned long flags;
+        struct net_device_stats *stats = &ieee->stats;
+        int ether_type, encrypt;
+        int bytes, fc, hdr_len;
+        struct sk_buff *skb_frag;
+        struct ieee80211_hdr header = { /* Ensure zero initialized */
+                .duration_id = 0,
+                .seq_ctl = 0
+        };
+        u8 dest[ETH_ALEN], src[ETH_ALEN];
+        struct ieee80211_crypt_data* crypt;
+        spin_lock_irqsave(&ieee->lock, flags);
+        /* If there is no driver handler to take the TXB, dont' bother
+         * creating it... */
+        if (!ieee->hard_start_xmit) {
+                printk(KERN_WARNING "%s: No xmit handler.\n",
+                       ieee->dev->name);
+                goto success;
+        }
+        if (unlikely(skb->len < SNAP_SIZE + sizeof(u16))) {
+                printk(KERN_WARNING "%s: skb too small (%d).\n",
+                       ieee->dev->name, skb->len);
+                goto success;
+        }
+        ether_type = ntohs(((struct ethhdr *)skb->data)->h_proto);
+        crypt = ieee->crypt[ieee->tx_keyidx];
+        encrypt = !(ether_type == ETH_P_PAE && ieee->ieee802_1x) &&
+                ieee->host_encrypt && crypt && crypt->ops;
+        if (!encrypt && ieee->ieee802_1x &&
+            ieee->drop_unencrypted && ether_type != ETH_P_PAE) {
+                stats->tx_dropped++;
+                goto success;
+        }
+        /* Save source and destination addresses */
+        memcpy(&dest, skb->data, ETH_ALEN);
+        memcpy(&src, skb->data+ETH_ALEN, ETH_ALEN);
+        /* Advance the SKB to the start of the payload */
+        skb_pull(skb, sizeof(struct ethhdr));
+        /* Determine total amount of storage required for TXB packets */
+        bytes = skb->len + SNAP_SIZE + sizeof(u16);
+        if (encrypt)
+                fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
+                        IEEE80211_FCTL_PROTECTED;
+        else
+                fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA;
+        if (ieee->iw_mode == IW_MODE_INFRA) {
+                fc |= IEEE80211_FCTL_TODS;
+                /* To DS: Addr1 = BSSID, Addr2 = SA,
+                   Addr3 = DA */
+                memcpy(&header.addr1, ieee->bssid, ETH_ALEN);
+                memcpy(&header.addr2, &src, ETH_ALEN);
+                memcpy(&header.addr3, &dest, ETH_ALEN);
+        } else if (ieee->iw_mode == IW_MODE_ADHOC) {
+                /* not From/To DS: Addr1 = DA, Addr2 = SA,
+                   Addr3 = BSSID */
+                memcpy(&header.addr1, dest, ETH_ALEN);
+                memcpy(&header.addr2, src, ETH_ALEN);
+                memcpy(&header.addr3, ieee->bssid, ETH_ALEN);
+        }
+        header.frame_ctl = cpu_to_le16(fc);
+        hdr_len = IEEE80211_3ADDR_LEN;
+        /* Determine fragmentation size based on destination (multicast
+         * and broadcast are not fragmented) */
+        if (is_multicast_ether_addr(dest) ||
+            is_broadcast_ether_addr(dest))
+                frag_size = MAX_FRAG_THRESHOLD;
+        else
+                frag_size = ieee->fts;
+        /* Determine amount of payload per fragment.  Regardless of if
+         * this stack is providing the full 802.11 header, one will
+         * eventually be affixed to this fragment -- so we must account for
+         * it when determining the amount of payload space. */
+        bytes_per_frag = frag_size - IEEE80211_3ADDR_LEN;
+        if (ieee->config &
+            (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS))
+                bytes_per_frag -= IEEE80211_FCS_LEN;
+        /* Each fragment may need to have room for encryptiong pre/postfix */
+        if (encrypt)
+                bytes_per_frag -= crypt->ops->extra_prefix_len +
+                        crypt->ops->extra_postfix_len;
+        /* Number of fragments is the total bytes_per_frag /
+         * payload_per_fragment */
+        nr_frags = bytes / bytes_per_frag;
+        bytes_last_frag = bytes % bytes_per_frag;
+        if (bytes_last_frag)
+                nr_frags++;
+        else
+                bytes_last_frag = bytes_per_frag;
+        /* When we allocate the TXB we allocate enough space for the reserve
+         * and full fragment bytes (bytes_per_frag doesn't include prefix,
+         * postfix, header, FCS, etc.) */
+        txb = ieee80211_alloc_txb(nr_frags, frag_size, GFP_ATOMIC);
+        if (unlikely(!txb)) {
+                printk(KERN_WARNING "%s: Could not allocate TXB\n",
+                       ieee->dev->name);
+                goto failed;
+        }
+        txb->encrypted = encrypt;
+        txb->payload_size = bytes;
+        for (i = 0; i < nr_frags; i++) {
+                skb_frag = txb->fragments[i];
+                if (encrypt)
+                        skb_reserve(skb_frag, crypt->ops->extra_prefix_len);
+                frag_hdr = (struct ieee80211_hdr *)skb_put(skb_frag, hdr_len);
+                memcpy(frag_hdr, &header, hdr_len);
+                /* If this is not the last fragment, then add the MOREFRAGS
+                 * bit to the frame control */
+                if (i != nr_frags - 1) {
+                        frag_hdr->frame_ctl = cpu_to_le16(
+                                fc | IEEE80211_FCTL_MOREFRAGS);
+                        bytes = bytes_per_frag;
+                } else {
+                        /* The last fragment takes the remaining length */
+                        bytes = bytes_last_frag;
+                }
+                /* Put a SNAP header on the first fragment */
+                if (i == 0) {
+                        ieee80211_put_snap(
+                                skb_put(skb_frag, SNAP_SIZE + sizeof(u16)),
+                                ether_type);
+                        bytes -= SNAP_SIZE + sizeof(u16);
+                }
+                memcpy(skb_put(skb_frag, bytes), skb->data, bytes);
+                /* Advance the SKB... */
+                skb_pull(skb, bytes);
+                /* Encryption routine will move the header forward in order
+                 * to insert the IV between the header and the payload */
+                if (encrypt)
+                        ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len);
+                if (ieee->config &
+                    (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS))
+                        skb_put(skb_frag, 4);
+        }
+ success:
+        spin_unlock_irqrestore(&ieee->lock, flags);
+        dev_kfree_skb_any(skb);
+        if (txb) {
+                if ((*ieee->hard_start_xmit)(txb, dev) == 0) {
+                        stats->tx_packets++;
+                        stats->tx_bytes += txb->payload_size;
+                        return 0;
+                }
+                ieee80211_txb_free(txb);
+        }
+        return 0;
+ failed:
+        spin_unlock_irqrestore(&ieee->lock, flags);
+        netif_stop_queue(dev);
+        stats->tx_errors++;
+        return 1;
+}
+EXPORT_SYMBOL(ieee80211_txb_free);
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
new file mode 100644
index 000000000000..2cd571c525a9
--- /dev/null
+++ b/net/ieee80211/ieee80211_wx.c
@@ -0,0 +1,471 @@
+/******************************************************************************
+  Copyright(c) 2004 Intel Corporation. All rights reserved.
+  Portions of this file are based on the WEP enablement code provided by the
+  Host AP project hostap-drivers v0.1.3
+  Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
+  <jkmaline@cc.hut.fi>
+  Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+  Contact Information:
+  James P. Ketrenos <ipw2100-admin@linux.intel.com>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+******************************************************************************/
+#include <linux/wireless.h>
+#include <linux/version.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <net/ieee80211.h>
+static const char *ieee80211_modes[] = {
+        "?", "a", "b", "ab", "g", "ag", "bg", "abg"
+};
+#define MAX_CUSTOM_LEN 64
+static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee,
+                                           char *start, char *stop,
+                                           struct ieee80211_network *network)
+{
+        char custom[MAX_CUSTOM_LEN];
+        char *p;
+        struct iw_event iwe;
+        int i, j;
+        u8 max_rate, rate;
+        /* First entry *MUST* be the AP MAC address */
+        iwe.cmd = SIOCGIWAP;
+        iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
+        memcpy(iwe.u.ap_addr.sa_data, network->bssid, ETH_ALEN);
+        start = iwe_stream_add_event(start, stop, &iwe, IW_EV_ADDR_LEN);
+        /* Remaining entries will be displayed in the order we provide them */
+        /* Add the ESSID */
+        iwe.cmd = SIOCGIWESSID;
+        iwe.u.data.flags = 1;
+        if (network->flags & NETWORK_EMPTY_ESSID) {
+                iwe.u.data.length = sizeof("<hidden>");
+                start = iwe_stream_add_point(start, stop, &iwe, "<hidden>");
+        } else {
+                iwe.u.data.length = min(network->ssid_len, (u8)32);
+                start = iwe_stream_add_point(start, stop, &iwe, network->ssid);
+        }
+        /* Add the protocol name */
+        iwe.cmd = SIOCGIWNAME;
+        snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s", ieee80211_modes[network->mode]);
+        start = iwe_stream_add_event(start, stop, &iwe, IW_EV_CHAR_LEN);
+        /* Add mode */
+        iwe.cmd = SIOCGIWMODE;
+        if (network->capability &
+            (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)) {
+                if (network->capability & WLAN_CAPABILITY_ESS)
+                        iwe.u.mode = IW_MODE_MASTER;
+                else
+                        iwe.u.mode = IW_MODE_ADHOC;
+                start = iwe_stream_add_event(start, stop, &iwe,
+                                             IW_EV_UINT_LEN);
+        }
+        /* Add frequency/channel */
+        iwe.cmd = SIOCGIWFREQ;
+/*      iwe.u.freq.m = ieee80211_frequency(network->channel, network->mode);
+        iwe.u.freq.e = 3; */
+        iwe.u.freq.m = network->channel;
+        iwe.u.freq.e = 0;
+        iwe.u.freq.i = 0;
+        start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN);
+        /* Add encryption capability */
+        iwe.cmd = SIOCGIWENCODE;
+        if (network->capability & WLAN_CAPABILITY_PRIVACY)
+                iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
+        else
+                iwe.u.data.flags = IW_ENCODE_DISABLED;
+        iwe.u.data.length = 0;
+        start = iwe_stream_add_point(start, stop, &iwe, network->ssid);
+        /* Add basic and extended rates */
+        max_rate = 0;
+        p = custom;
+        p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), " Rates (Mb/s): ");
+        for (i = 0, j = 0; i < network->rates_len; ) {
+                if (j < network->rates_ex_len &&
+                    ((network->rates_ex[j] & 0x7F) <
+                     (network->rates[i] & 0x7F)))
+                        rate = network->rates_ex[j++] & 0x7F;
+                else
+                        rate = network->rates[i++] & 0x7F;
+                if (rate > max_rate)
+                        max_rate = rate;
+                p += snprintf(p, MAX_CUSTOM_LEN - (p - custom),
+                              "%d%s ", rate >> 1, (rate & 1) ? ".5" : "");
+        }
+        for (; j < network->rates_ex_len; j++) {
+                rate = network->rates_ex[j] & 0x7F;
+                p += snprintf(p, MAX_CUSTOM_LEN - (p - custom),
+                              "%d%s ", rate >> 1, (rate & 1) ? ".5" : "");
+                if (rate > max_rate)
+                        max_rate = rate;
+        }
+        iwe.cmd = SIOCGIWRATE;
+        iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
+        iwe.u.bitrate.value = max_rate * 500000;
+        start = iwe_stream_add_event(start, stop, &iwe,
+                                     IW_EV_PARAM_LEN);
+        iwe.cmd = IWEVCUSTOM;
+        iwe.u.data.length = p - custom;
+        if (iwe.u.data.length)
+                start = iwe_stream_add_point(start, stop, &iwe, custom);
+        /* Add quality statistics */
+        /* TODO: Fix these values... */
+        iwe.cmd = IWEVQUAL;
+        iwe.u.qual.qual = network->stats.signal;
+        iwe.u.qual.level = network->stats.rssi;
+        iwe.u.qual.noise = network->stats.noise;
+        iwe.u.qual.updated = network->stats.mask & IEEE80211_STATMASK_WEMASK;
+        if (!(network->stats.mask & IEEE80211_STATMASK_RSSI))
+                iwe.u.qual.updated |= IW_QUAL_LEVEL_INVALID;
+        if (!(network->stats.mask & IEEE80211_STATMASK_NOISE))
+                iwe.u.qual.updated |= IW_QUAL_NOISE_INVALID;
+        if (!(network->stats.mask & IEEE80211_STATMASK_SIGNAL))
+                iwe.u.qual.updated |= IW_QUAL_QUAL_INVALID;
+        start = iwe_stream_add_event(start, stop, &iwe, IW_EV_QUAL_LEN);
+        iwe.cmd = IWEVCUSTOM;
+        p = custom;
+        iwe.u.data.length = p - custom;
+        if (iwe.u.data.length)
+                start = iwe_stream_add_point(start, stop, &iwe, custom);
+        if (ieee->wpa_enabled && network->wpa_ie_len){
+                char buf[MAX_WPA_IE_LEN * 2 + 30];
+                u8 *p = buf;
+                p += sprintf(p, "wpa_ie=");
+                for (i = 0; i < network->wpa_ie_len; i++) {
+                        p += sprintf(p, "%02x", network->wpa_ie[i]);
+                }
+                memset(&iwe, 0, sizeof(iwe));
+                iwe.cmd = IWEVCUSTOM;
+                iwe.u.data.length = strlen(buf);
+                start = iwe_stream_add_point(start, stop, &iwe, buf);
+        }
+        if (ieee->wpa_enabled && network->rsn_ie_len){
+                char buf[MAX_WPA_IE_LEN * 2 + 30];
+                u8 *p = buf;
+                p += sprintf(p, "rsn_ie=");
+                for (i = 0; i < network->rsn_ie_len; i++) {
+                        p += sprintf(p, "%02x", network->rsn_ie[i]);
+                }
+                memset(&iwe, 0, sizeof(iwe));
+                iwe.cmd = IWEVCUSTOM;
+                iwe.u.data.length = strlen(buf);
+                start = iwe_stream_add_point(start, stop, &iwe, buf);
+        }
+        /* Add EXTRA: Age to display seconds since last beacon/probe response
+         * for given network. */
+        iwe.cmd = IWEVCUSTOM;
+        p = custom;
+        p += snprintf(p, MAX_CUSTOM_LEN - (p - custom),
+                      " Last beacon: %lums ago", (jiffies - network->last_scanned) / (HZ / 100));
+        iwe.u.data.length = p - custom;
+        if (iwe.u.data.length)
+                start = iwe_stream_add_point(start, stop, &iwe, custom);
+        return start;
+}
+int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
+                          struct iw_request_info *info,
+                          union iwreq_data *wrqu, char *extra)
+{
+        struct ieee80211_network *network;
+        unsigned long flags;
+        char *ev = extra;
+        char *stop = ev + IW_SCAN_MAX_DATA;
+        int i = 0;
+        IEEE80211_DEBUG_WX("Getting scan\n");
+        spin_lock_irqsave(&ieee->lock, flags);
+        list_for_each_entry(network, &ieee->network_list, list) {
+                i++;
+                if (ieee->scan_age == 0 ||
+                    time_after(network->last_scanned + ieee->scan_age, jiffies))
+                        ev = ipw2100_translate_scan(ieee, ev, stop, network);
+                else
+                        IEEE80211_DEBUG_SCAN(
+                                "Not showing network '%s ("
+                                MAC_FMT ")' due to age (%lums).\n",
+                                escape_essid(network->ssid,
+                                             network->ssid_len),
+                                MAC_ARG(network->bssid),
+                                (jiffies - network->last_scanned) / (HZ / 100));
+        }
+        spin_unlock_irqrestore(&ieee->lock, flags);
+        wrqu->data.length = ev -  extra;
+        wrqu->data.flags = 0;
+        IEEE80211_DEBUG_WX("exit: %d networks returned.\n", i);
+        return 0;
+}
+int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
+                            struct iw_request_info *info,
+                            union iwreq_data *wrqu, char *keybuf)
+{
+        struct iw_point *erq = &(wrqu->encoding);
+        struct net_device *dev = ieee->dev;
+        struct ieee80211_security sec = {
+                .flags = 0
+        };
+        int i, key, key_provided, len;
+        struct ieee80211_crypt_data **crypt;
+        IEEE80211_DEBUG_WX("SET_ENCODE\n");
+        key = erq->flags & IW_ENCODE_INDEX;
+        if (key) {
+                if (key > WEP_KEYS)
+                        return -EINVAL;
+                key--;
+                key_provided = 1;
+        } else {
+                key_provided = 0;
+                key = ieee->tx_keyidx;
+        }
+        IEEE80211_DEBUG_WX("Key: %d [%s]\n", key, key_provided ?
+                           "provided" : "default");
+        crypt = &ieee->crypt[key];
+        if (erq->flags & IW_ENCODE_DISABLED) {
+                if (key_provided && *crypt) {
+                        IEEE80211_DEBUG_WX("Disabling encryption on key %d.\n",
+                                           key);
+                        ieee80211_crypt_delayed_deinit(ieee, crypt);
+                } else
+                        IEEE80211_DEBUG_WX("Disabling encryption.\n");
+                /* Check all the keys to see if any are still configured,
+                 * and if no key index was provided, de-init them all */
+                for (i = 0; i < WEP_KEYS; i++) {
+                        if (ieee->crypt[i] != NULL) {
+                                if (key_provided)
+                                        break;
+                                ieee80211_crypt_delayed_deinit(
+                                        ieee, &ieee->crypt[i]);
+                        }
+                }
+                if (i == WEP_KEYS) {
+                        sec.enabled = 0;
+                        sec.level = SEC_LEVEL_0;
+                        sec.flags |= SEC_ENABLED | SEC_LEVEL;
+                }
+                goto done;
+        }
+        sec.enabled = 1;
+        sec.flags |= SEC_ENABLED;
+        if (*crypt != NULL && (*crypt)->ops != NULL &&
+            strcmp((*crypt)->ops->name, "WEP") != 0) {
+                /* changing to use WEP; deinit previously used algorithm
+                 * on this key */
+                ieee80211_crypt_delayed_deinit(ieee, crypt);
+        }
+        if (*crypt == NULL) {
+                struct ieee80211_crypt_data *new_crypt;
+                /* take WEP into use */
+                new_crypt = kmalloc(sizeof(struct ieee80211_crypt_data),
+                                    GFP_KERNEL);
+                if (new_crypt == NULL)
+                        return -ENOMEM;
+                memset(new_crypt, 0, sizeof(struct ieee80211_crypt_data));
+                new_crypt->ops = ieee80211_get_crypto_ops("WEP");
+                if (!new_crypt->ops) {
+                        request_module("ieee80211_crypt_wep");
+                        new_crypt->ops = ieee80211_get_crypto_ops("WEP");
+                }
+                if (new_crypt->ops && try_module_get(new_crypt->ops->owner))
+                        new_crypt->priv = new_crypt->ops->init(key);
+                if (!new_crypt->ops || !new_crypt->priv) {
+                        kfree(new_crypt);
+                        new_crypt = NULL;
+                        printk(KERN_WARNING "%s: could not initialize WEP: "
+                               "load module ieee80211_crypt_wep\n",
+                               dev->name);
+                        return -EOPNOTSUPP;
+                }
+                *crypt = new_crypt;
+        }
+        /* If a new key was provided, set it up */
+        if (erq->length > 0) {
+                len = erq->length <= 5 ? 5 : 13;
+                memcpy(sec.keys[key], keybuf, erq->length);
+                if (len > erq->length)
+                        memset(sec.keys[key] + erq->length, 0,
+                               len - erq->length);
+                IEEE80211_DEBUG_WX("Setting key %d to '%s' (%d:%d bytes)\n",
+                                   key, escape_essid(sec.keys[key], len),
+                                   erq->length, len);
+                sec.key_sizes[key] = len;
+                (*crypt)->ops->set_key(sec.keys[key], len, NULL,
+                                       (*crypt)->priv);
+                sec.flags |= (1 << key);
+                /* This ensures a key will be activated if no key is
+                 * explicitely set */
+                if (key == sec.active_key)
+                        sec.flags |= SEC_ACTIVE_KEY;
+        } else {
+                len = (*crypt)->ops->get_key(sec.keys[key], WEP_KEY_LEN,
+                                             NULL, (*crypt)->priv);
+                if (len == 0) {
+                        /* Set a default key of all 0 */
+                        IEEE80211_DEBUG_WX("Setting key %d to all zero.\n",
+                                           key);
+                        memset(sec.keys[key], 0, 13);
+                        (*crypt)->ops->set_key(sec.keys[key], 13, NULL,
+                                               (*crypt)->priv);
+                        sec.key_sizes[key] = 13;
+                        sec.flags |= (1 << key);
+                }
+                /* No key data - just set the default TX key index */
+                if (key_provided) {
+                        IEEE80211_DEBUG_WX(
+                                "Setting key %d to default Tx key.\n", key);
+                        ieee->tx_keyidx = key;
+                        sec.active_key = key;
+                        sec.flags |= SEC_ACTIVE_KEY;
+                }
+        }
+ done:
+        ieee->open_wep = !(erq->flags & IW_ENCODE_RESTRICTED);
+        sec.auth_mode = ieee->open_wep ? WLAN_AUTH_OPEN : WLAN_AUTH_SHARED_KEY;
+        sec.flags |= SEC_AUTH_MODE;
+        IEEE80211_DEBUG_WX("Auth: %s\n", sec.auth_mode == WLAN_AUTH_OPEN ?
+                           "OPEN" : "SHARED KEY");
+        /* For now we just support WEP, so only set that security level...
+         * TODO: When WPA is added this is one place that needs to change */
+        sec.flags |= SEC_LEVEL;
+        sec.level = SEC_LEVEL_1; /* 40 and 104 bit WEP */
+        if (ieee->set_security)
+                ieee->set_security(dev, &sec);
+        /* Do not reset port if card is in Managed mode since resetting will
+         * generate new IEEE 802.11 authentication which may end up in looping
+         * with IEEE 802.1X.  If your hardware requires a reset after WEP
+         * configuration (for example... Prism2), implement the reset_port in
+         * the callbacks structures used to initialize the 802.11 stack. */
+        if (ieee->reset_on_keychange &&
+            ieee->iw_mode != IW_MODE_INFRA &&
+            ieee->reset_port && ieee->reset_port(dev)) {
+                printk(KERN_DEBUG "%s: reset_port failed\n", dev->name);
+                return -EINVAL;
+        }
+        return 0;
+}
+int ieee80211_wx_get_encode(struct ieee80211_device *ieee,
+                            struct iw_request_info *info,
+                            union iwreq_data *wrqu, char *keybuf)
+{
+        struct iw_point *erq = &(wrqu->encoding);
+        int len, key;
+        struct ieee80211_crypt_data *crypt;
+        IEEE80211_DEBUG_WX("GET_ENCODE\n");
+        key = erq->flags & IW_ENCODE_INDEX;
+        if (key) {
+                if (key > WEP_KEYS)
+                        return -EINVAL;
+                key--;
+        } else
+                key = ieee->tx_keyidx;
+        crypt = ieee->crypt[key];
+        erq->flags = key + 1;
+        if (crypt == NULL || crypt->ops == NULL) {
+                erq->length = 0;
+                erq->flags |= IW_ENCODE_DISABLED;
+                return 0;
+        }
+        if (strcmp(crypt->ops->name, "WEP") != 0) {
+                /* only WEP is supported with wireless extensions, so just
+                 * report that encryption is used */
+                erq->length = 0;
+                erq->flags |= IW_ENCODE_ENABLED;
+                return 0;
+        }
+        len = crypt->ops->get_key(keybuf, WEP_KEY_LEN, NULL, crypt->priv);
+        erq->length = (len >= 0 ? len : 0);
+        erq->flags |= IW_ENCODE_ENABLED;
+        if (ieee->open_wep)
+                erq->flags |= IW_ENCODE_OPEN;
+        else
+                erq->flags |= IW_ENCODE_RESTRICTED;
+        return 0;
+}
+EXPORT_SYMBOL(ieee80211_wx_get_scan);
+EXPORT_SYMBOL(ieee80211_wx_set_encode);
+EXPORT_SYMBOL(ieee80211_wx_get_encode);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 3e63123f7bbd..e55136ae09f4 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -3,7 +3,6 @@
 #
 config IP_MULTICAST
        bool "IP: multicasting"
-        depends on INET
        help
          This is code for addressing several networked computers at once,
          enlarging your kernel by about 2 KB. You need multicasting if you
@@ -17,7 +16,6 @@ config IP_MULTICAST
 config IP_ADVANCED_ROUTER
        bool "IP: advanced router"
-        depends on INET
        ---help---
          If you intend to run your Linux box mostly as a router, i.e. as a
          computer that forwards and redistributes network packets, say Y; you
@@ -56,9 +54,9 @@ config IP_ADVANCED_ROUTER
 choice 
        prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
        depends on IP_ADVANCED_ROUTER
-        default IP_FIB_HASH
+        default ASK_IP_FIB_HASH
-config IP_FIB_HASH
+config ASK_IP_FIB_HASH
        bool "FIB_HASH"
        ---help---
        Current FIB is very proven and good enough for most users.
@@ -84,12 +82,8 @@ config IP_FIB_TRIE
       
 endchoice
-# If the user does not enable advanced routing, he gets the safe
-# default of the fib-hash algorithm.
 config IP_FIB_HASH
-        bool
+        def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER
-        depends on !IP_ADVANCED_ROUTER
-        default y
 config IP_MULTIPLE_TABLES
        bool "IP: policy routing"
@@ -130,7 +124,7 @@ config IP_ROUTE_MULTIPATH
 config IP_ROUTE_MULTIPATH_CACHED
        bool "IP: equal cost multipath with caching support (EXPERIMENTAL)"
-        depends on: IP_ROUTE_MULTIPATH
+        depends on IP_ROUTE_MULTIPATH
        help
          Normally, equal cost multipath routing is not supported by the
          routing cache. If you say Y here, alternative routes are cached
@@ -183,7 +177,6 @@ config IP_ROUTE_VERBOSE
 config IP_PNP
        bool "IP: kernel level autoconfiguration"
-        depends on INET
        help
          This enables automatic configuration of IP addresses of devices and
          of the routing table during kernel boot, based on either information
@@ -242,8 +235,6 @@ config IP_PNP_RARP
 #   bool '    IP: ARP support' CONFIG_IP_PNP_ARP                
 config NET_IPIP
        tristate "IP: tunneling"
-        depends on INET
-        select INET_TUNNEL
        ---help---
          Tunneling means encapsulating data of one protocol type within
          another protocol and sending it over a channel that understands the
@@ -260,8 +251,6 @@ config NET_IPIP
 config NET_IPGRE
        tristate "IP: GRE tunnels over IP"
-        depends on INET
-        select XFRM
        help
          Tunneling means encapsulating data of one protocol type within
          another protocol and sending it over a channel that understands the
@@ -319,7 +308,7 @@ config IP_PIMSM_V2
 config ARPD
        bool "IP: ARP daemon support (EXPERIMENTAL)"
-        depends on INET && EXPERIMENTAL
+        depends on EXPERIMENTAL
        ---help---
          Normally, the kernel maintains an internal cache which maps IP
          addresses to hardware addresses on the local network, so that
@@ -344,7 +333,6 @@ config ARPD
 config SYN_COOKIES
        bool "IP: TCP syncookie support (disabled per default)"
-        depends on INET
        ---help---
          Normal TCP/IP networking is open to an attack known as "SYN
          flooding". This denial-of-service attack prevents legitimate remote
@@ -381,7 +369,6 @@ config SYN_COOKIES
 config INET_AH
        tristate "IP: AH transformation"
-        depends on INET
        select XFRM
        select CRYPTO
        select CRYPTO_HMAC
@@ -394,7 +381,6 @@ config INET_AH
 config INET_ESP
        tristate "IP: ESP transformation"
-        depends on INET
        select XFRM
        select CRYPTO
        select CRYPTO_HMAC
@@ -408,7 +394,6 @@ config INET_ESP
 config INET_IPCOMP
        tristate "IP: IPComp transformation"
-        depends on INET
        select XFRM
        select INET_TUNNEL
        select CRYPTO
@@ -421,7 +406,6 @@ config INET_IPCOMP
 config INET_TUNNEL
        tristate "IP: tunnel transformation"
-        depends on INET
        select XFRM
        ---help---
          Support for generic IP tunnel transformation, which is required by
@@ -429,25 +413,22 @@ config INET_TUNNEL
          
          If unsure, say Y.
-config IP_TCPDIAG
+config INET_DIAG
-        tristate "IP: TCP socket monitoring interface"
+        tristate "INET: socket monitoring interface"
-        depends on INET
        default y
        ---help---
-          Support for TCP socket monitoring interface used by native Linux
+          Support for INET (TCP, DCCP, etc) socket monitoring interface used by
-          tools such as ss. ss is included in iproute2, currently downloadable
+          native Linux tools such as ss. ss is included in iproute2, currently
-          at <http://developer.osdl.org/dev/iproute2>. If you want IPv6 support
+          downloadable at <http://developer.osdl.org/dev/iproute2>. 
-          and have selected IPv6 as a module, you need to build this as a
-          module too.
          
          If unsure, say Y.
-config IP_TCPDIAG_IPV6
+config INET_TCP_DIAG
-        def_bool (IP_TCPDIAG=y && IPV6=y) || (IP_TCPDIAG=m && IPV6)
+        depends on INET_DIAG
+        def_tristate INET_DIAG
 config TCP_CONG_ADVANCED
        bool "TCP: advanced congestion control"
-        depends on INET
        ---help---
          Support for selection of various TCP congestion control
          modules.
@@ -463,7 +444,6 @@ menu "TCP congestion control"
 config TCP_CONG_BIC
        tristate "Binary Increase Congestion (BIC) control"
-        depends on INET
        default y
        ---help---
        BIC-TCP is a sender-side only change that ensures a linear RTT
@@ -478,7 +458,6 @@ config TCP_CONG_BIC
 config TCP_CONG_WESTWOOD
        tristate "TCP Westwood+"
-        depends on INET
        default m
        ---help---
        TCP Westwood+ is a sender-side only modification of the TCP Reno
@@ -493,7 +472,6 @@ config TCP_CONG_WESTWOOD
 config TCP_CONG_HTCP
        tristate "H-TCP"
-        depends on INET
        default m
        ---help---
        H-TCP is a send-side only modifications of the TCP Reno
@@ -505,7 +483,7 @@ config TCP_CONG_HTCP
 config TCP_CONG_HSTCP
        tristate "High Speed TCP"
-        depends on INET && EXPERIMENTAL
+        depends on EXPERIMENTAL
        default n
        ---help---
        Sally Floyd's High Speed TCP (RFC 3649) congestion control.
@@ -516,7 +494,7 @@ config TCP_CONG_HSTCP
 config TCP_CONG_HYBLA
        tristate "TCP-Hybla congestion control algorithm"
-        depends on INET && EXPERIMENTAL
+        depends on EXPERIMENTAL
        default n
        ---help---
        TCP-Hybla is a sender-side only change that eliminates penalization of
@@ -526,7 +504,7 @@ config TCP_CONG_HYBLA
 config TCP_CONG_VEGAS
        tristate "TCP Vegas"
-        depends on INET && EXPERIMENTAL
+        depends on EXPERIMENTAL
        default n
        ---help---
        TCP Vegas is a sender-side only change to TCP that anticipates
@@ -537,7 +515,7 @@ config TCP_CONG_VEGAS
 config TCP_CONG_SCALABLE
        tristate "Scalable TCP"
-        depends on INET && EXPERIMENTAL
+        depends on EXPERIMENTAL
        default n
        ---help---
        Scalable TCP is a sender-side only change to TCP which uses a
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 5718cdb3a61e..f0435d00db6b 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -2,13 +2,14 @@
 # Makefile for the Linux TCP/IP (INET) layer.
 #
-obj-y     := utils.o route.o inetpeer.o protocol.o \
+obj-y     := route.o inetpeer.o protocol.o \
             ip_input.o ip_fragment.o ip_forward.o ip_options.o \
-             ip_output.o ip_sockglue.o \
+             ip_output.o ip_sockglue.o inet_hashtables.o \
+             inet_timewait_sock.o inet_connection_sock.o \
             tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
             tcp_minisocks.o tcp_cong.o \
             datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
-             sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
+             sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
@@ -29,8 +30,9 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
 obj-$(CONFIG_NETFILTER) += netfilter/
 obj-$(CONFIG_IP_VS) += ipvs/
-obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o 
+obj-$(CONFIG_INET_DIAG) += inet_diag.o 
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
+obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
 obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
 obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ef7468376ae6..bf147f8db399 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -99,6 +99,7 @@
 #include <net/arp.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
+#include <net/inet_connection_sock.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <linux/skbuff.h>
@@ -112,11 +113,7 @@
 #include <linux/mroute.h>
 #endif
-DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
+DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
-#ifdef INET_REFCNT_DEBUG
-atomic_t inet_sock_nr;
-#endif
 extern void ip_mc_drop_socket(struct sock *sk);
@@ -153,11 +150,7 @@ void inet_sock_destruct(struct sock *sk)
        if (inet->opt)
                kfree(inet->opt);
        dst_release(sk->sk_dst_cache);
-#ifdef INET_REFCNT_DEBUG
+        sk_refcnt_debug_dec(sk);
-        atomic_dec(&inet_sock_nr);
-        printk(KERN_DEBUG "INET socket %p released, %d are still alive\n",
-               sk, atomic_read(&inet_sock_nr));
-#endif
 }
 /*
@@ -210,7 +203,7 @@ int inet_listen(struct socket *sock, int backlog)
         * we can only allow the backlog to be adjusted.
         */
        if (old_state != TCP_LISTEN) {
-                err = tcp_listen_start(sk);
+                err = inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
                if (err)
                        goto out;
        }
@@ -235,12 +228,14 @@ static int inet_create(struct socket *sock, int protocol)
        struct proto *answer_prot;
        unsigned char answer_flags;
        char answer_no_check;
-        int err;
+        int try_loading_module = 0;
+        int err = -ESOCKTNOSUPPORT;
        sock->state = SS_UNCONNECTED;
        /* Look for the requested type/protocol pair. */
        answer = NULL;
+lookup_protocol:
        rcu_read_lock();
        list_for_each_rcu(p, &inetsw[sock->type]) {
                answer = list_entry(p, struct inet_protosw, list);
@@ -261,9 +256,28 @@ static int inet_create(struct socket *sock, int protocol)
                answer = NULL;
        }
-        err = -ESOCKTNOSUPPORT;
+        if (unlikely(answer == NULL)) {
-        if (!answer)
+                if (try_loading_module < 2) {
-                goto out_rcu_unlock;
+                        rcu_read_unlock();
+                        /*
+                         * Be more specific, e.g. net-pf-2-proto-132-type-1
+                         * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
+                         */
+                        if (++try_loading_module == 1)
+                                request_module("net-pf-%d-proto-%d-type-%d",
+                                               PF_INET, protocol, sock->type);
+                        /*
+                         * Fall back to generic, e.g. net-pf-2-proto-132
+                         * (net-pf-PF_INET-proto-IPPROTO_SCTP)
+                         */
+                        else
+                                request_module("net-pf-%d-proto-%d",
+                                               PF_INET, protocol);
+                        goto lookup_protocol;
+                } else
+                        goto out_rcu_unlock;
+        }
        err = -EPERM;
        if (answer->capability > 0 && !capable(answer->capability))
                goto out_rcu_unlock;
@@ -317,9 +331,7 @@ static int inet_create(struct socket *sock, int protocol)
        inet->mc_index  = 0;
        inet->mc_list   = NULL;
-#ifdef INET_REFCNT_DEBUG
+        sk_refcnt_debug_inc(sk);
-        atomic_inc(&inet_sock_nr);
-#endif
        if (inet->num) {
                /* It assumes that any protocol which allows
@@ -847,10 +859,6 @@ static struct net_proto_family inet_family_ops = {
        .owner  = THIS_MODULE,
 };
-extern void tcp_init(void);
-extern void tcp_v4_init(struct net_proto_family *);
 /* Upon startup we insert all the elements in inetsw_array[] into
 * the linked list inetsw.
 */
@@ -961,6 +969,119 @@ void inet_unregister_protosw(struct inet_protosw *p)
        }
 }
+/*
+ *      Shall we try to damage output packets if routing dev changes?
+ */
+int sysctl_ip_dynaddr;
+static int inet_sk_reselect_saddr(struct sock *sk)
+{
+        struct inet_sock *inet = inet_sk(sk);
+        int err;
+        struct rtable *rt;
+        __u32 old_saddr = inet->saddr;
+        __u32 new_saddr;
+        __u32 daddr = inet->daddr;
+        if (inet->opt && inet->opt->srr)
+                daddr = inet->opt->faddr;
+        /* Query new route. */
+        err = ip_route_connect(&rt, daddr, 0,
+                               RT_CONN_FLAGS(sk),
+                               sk->sk_bound_dev_if,
+                               sk->sk_protocol,
+                               inet->sport, inet->dport, sk);
+        if (err)
+                return err;
+        sk_setup_caps(sk, &rt->u.dst);
+        new_saddr = rt->rt_src;
+        if (new_saddr == old_saddr)
+                return 0;
+        if (sysctl_ip_dynaddr > 1) {
+                printk(KERN_INFO "%s(): shifting inet->"
+                                 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
+                       __FUNCTION__,
+                       NIPQUAD(old_saddr),
+                       NIPQUAD(new_saddr));
+        }
+        inet->saddr = inet->rcv_saddr = new_saddr;
+        /*
+         * XXX The only one ugly spot where we need to
+         * XXX really change the sockets identity after
+         * XXX it has entered the hashes. -DaveM
+         *
+         * Besides that, it does not check for connection
+         * uniqueness. Wait for troubles.
+         */
+        __sk_prot_rehash(sk);
+        return 0;
+}
+int inet_sk_rebuild_header(struct sock *sk)
+{
+        struct inet_sock *inet = inet_sk(sk);
+        struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
+        u32 daddr;
+        int err;
+        /* Route is OK, nothing to do. */
+        if (rt)
+                return 0;
+        /* Reroute. */
+        daddr = inet->daddr;
+        if (inet->opt && inet->opt->srr)
+                daddr = inet->opt->faddr;
+{
+        struct flowi fl = {
+                .oif = sk->sk_bound_dev_if,
+                .nl_u = {
+                        .ip4_u = {
+                                .daddr  = daddr,
+                                .saddr  = inet->saddr,
+                                .tos    = RT_CONN_FLAGS(sk),
+                        },
+                },
+                .proto = sk->sk_protocol,
+                .uli_u = {
+                        .ports = {
+                                .sport = inet->sport,
+                                .dport = inet->dport,
+                        },
+                },
+        };
+                                                
+        err = ip_route_output_flow(&rt, &fl, sk, 0);
+}
+        if (!err)
+                sk_setup_caps(sk, &rt->u.dst);
+        else {
+                /* Routing failed... */
+                sk->sk_route_caps = 0;
+                /*
+                 * Other protocols have to map its equivalent state to TCP_SYN_SENT.
+                 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
+                 */
+                if (!sysctl_ip_dynaddr ||
+                    sk->sk_state != TCP_SYN_SENT ||
+                    (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
+                    (err = inet_sk_reselect_saddr(sk)) != 0)
+                        sk->sk_err_soft = -err;
+        }
+        return err;
+}
+EXPORT_SYMBOL(inet_sk_rebuild_header);
 #ifdef CONFIG_IP_MULTICAST
 static struct net_protocol igmp_protocol = {
        .handler =      igmp_rcv,
@@ -1007,7 +1128,6 @@ static int __init init_ipv4_mibs(void)
 }
 static int ipv4_proc_init(void);
-extern void ipfrag_init(void);
 /*
 *      IP protocol layer initialiser
@@ -1128,19 +1248,10 @@ module_init(inet_init);
 /* ------------------------------------------------------------------------ */
 #ifdef CONFIG_PROC_FS
-extern int  fib_proc_init(void);
-extern void fib_proc_exit(void);
 #ifdef CONFIG_IP_FIB_TRIE
 extern int  fib_stat_proc_init(void);
 extern void fib_stat_proc_exit(void);
 #endif
-extern int  ip_misc_proc_init(void);
-extern int  raw_proc_init(void);
-extern void raw_proc_exit(void);
-extern int  tcp4_proc_init(void);
-extern void tcp4_proc_exit(void);
-extern int  udp4_proc_init(void);
-extern void udp4_proc_exit(void);
 static int __init ipv4_proc_init(void)
 {
@@ -1157,7 +1268,7 @@ static int __init ipv4_proc_init(void)
 #ifdef CONFIG_IP_FIB_TRIE
         if (fib_stat_proc_init())
                 goto out_fib_stat;
- #endif
+#endif
        if (ip_misc_proc_init())
                goto out_misc;
 out:
@@ -1205,7 +1316,3 @@ EXPORT_SYMBOL(inet_stream_ops);
 EXPORT_SYMBOL(inet_unregister_protosw);
 EXPORT_SYMBOL(net_statistics);
 EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
-#ifdef INET_REFCNT_DEBUG
-EXPORT_SYMBOL(inet_sock_nr);
-#endif
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 514c85b2631a..035ad2c9e1ba 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -263,10 +263,8 @@ static int ah_init_state(struct xfrm_state *x)
 error:
        if (ahp) {
-                if (ahp->work_icv)
+                kfree(ahp->work_icv);
-                        kfree(ahp->work_icv);
+                crypto_free_tfm(ahp->tfm);
-                if (ahp->tfm)
-                        crypto_free_tfm(ahp->tfm);
                kfree(ahp);
        }
        return -EINVAL;
@@ -279,14 +277,10 @@ static void ah_destroy(struct xfrm_state *x)
        if (!ahp)
                return;
-        if (ahp->work_icv) {
+        kfree(ahp->work_icv);
-                kfree(ahp->work_icv);
+        ahp->work_icv = NULL;
-                ahp->work_icv = NULL;
+        crypto_free_tfm(ahp->tfm);
-        }
+        ahp->tfm = NULL;
-        if (ahp->tfm) {
-                crypto_free_tfm(ahp->tfm);
-                ahp->tfm = NULL;
-        }
        kfree(ahp);
 }
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a642fd612853..8bf312bdea13 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -700,7 +700,7 @@ void arp_send(int type, int ptype, u32 dest_ip,
 static void parp_redo(struct sk_buff *skb)
 {
        nf_reset(skb);
-        arp_rcv(skb, skb->dev, NULL);
+        arp_rcv(skb, skb->dev, NULL, skb->dev);
 }
 /*
@@ -865,7 +865,7 @@ static int arp_process(struct sk_buff *skb)
                                if (n)
                                        neigh_release(n);
-                                if (skb->stamp.tv_sec == LOCALLY_ENQUEUED || 
+                                if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 
                                    skb->pkt_type == PACKET_HOST ||
                                    in_dev->arp_parms->proxy_delay == 0) {
                                        arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
@@ -927,7 +927,7 @@ out:
 *      Receive an arp request from the device layer.
 */
-int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct arphdr *arp;
@@ -948,6 +948,8 @@ int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
        if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
                goto out_of_mem;
+        memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
        return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
 freeskb:
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index b1db561f2542..c1b42b5257f8 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -16,9 +16,10 @@
 #include <linux/module.h>
 #include <linux/ip.h>
 #include <linux/in.h>
+#include <net/ip.h>
 #include <net/sock.h>
-#include <net/tcp.h>
 #include <net/route.h>
+#include <net/tcp_states.h>
 int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d8a10e3dd77d..ba2895ae8151 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1111,13 +1111,12 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
        struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
        if (!skb)
-                netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS);
        else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
                kfree_skb(skb);
-                netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL);
        } else {
-                NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR;
+                netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
-                netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL);
        }
 }
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index ba57446d5d1f..1b5a09d1b90b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -331,8 +331,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
        x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
        if (!x)
                return;
-        NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
+        NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
-                        ntohl(esph->spi), ntohl(iph->daddr)));
+                 ntohl(esph->spi), ntohl(iph->daddr));
        xfrm_state_put(x);
 }
@@ -343,22 +343,14 @@ static void esp_destroy(struct xfrm_state *x)
        if (!esp)
                return;
-        if (esp->conf.tfm) {
+        crypto_free_tfm(esp->conf.tfm);
-                crypto_free_tfm(esp->conf.tfm);
+        esp->conf.tfm = NULL;
-                esp->conf.tfm = NULL;
+        kfree(esp->conf.ivec);
-        }
+        esp->conf.ivec = NULL;
-        if (esp->conf.ivec) {
+        crypto_free_tfm(esp->auth.tfm);
-                kfree(esp->conf.ivec);
+        esp->auth.tfm = NULL;
-                esp->conf.ivec = NULL;
+        kfree(esp->auth.work_icv);
-        }
+        esp->auth.work_icv = NULL;
-        if (esp->auth.tfm) {
-                crypto_free_tfm(esp->auth.tfm);
-                esp->auth.tfm = NULL;
-        }
-        if (esp->auth.work_icv) {
-                kfree(esp->auth.work_icv);
-                esp->auth.work_icv = NULL;
-        }
        kfree(esp);
 }
@@ -395,10 +387,10 @@ static int esp_init_state(struct xfrm_state *x)
                if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
                    crypto_tfm_alg_digestsize(esp->auth.tfm)) {
-                        NETDEBUG(printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+                        NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
-                               x->aalg->alg_name,
+                                 x->aalg->alg_name,
-                               crypto_tfm_alg_digestsize(esp->auth.tfm),
+                                 crypto_tfm_alg_digestsize(esp->auth.tfm),
-                               aalg_desc->uinfo.auth.icv_fullbits/8));
+                                 aalg_desc->uinfo.auth.icv_fullbits/8);
                        goto error;
                }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cd8e45ab9580..4e1379f71269 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -558,16 +558,15 @@ static void nl_fib_input(struct sock *sk, int len)
        nl_fib_lookup(frn, tb);
        
        pid = nlh->nlmsg_pid;           /*pid of sending process */
-        NETLINK_CB(skb).groups = 0;     /* not in mcast group */
        NETLINK_CB(skb).pid = 0;         /* from kernel */
        NETLINK_CB(skb).dst_pid = pid;
-        NETLINK_CB(skb).dst_groups = 0;  /* unicast */
+        NETLINK_CB(skb).dst_group = 0;  /* unicast */
        netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
 }    
 static void nl_fib_lookup_init(void)
 {
-      netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input);
+      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
 }
 static void fib_disable_ip(struct net_device *dev, int force)
@@ -662,5 +661,4 @@ void __init ip_fib_init(void)
 }
 EXPORT_SYMBOL(inet_addr_type);
-EXPORT_SYMBOL(ip_dev_find);
 EXPORT_SYMBOL(ip_rt_ioctl);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index b10d6bb5ef3d..2a8c9afc3695 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -45,8 +45,8 @@
 #include "fib_lookup.h"
-static kmem_cache_t *fn_hash_kmem;
+static kmem_cache_t *fn_hash_kmem __read_mostly;
-static kmem_cache_t *fn_alias_kmem;
+static kmem_cache_t *fn_alias_kmem __read_mostly;
 struct fib_node {
        struct hlist_node       fn_hash;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index b729d97cfa93..ef6609ea0eb7 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -7,6 +7,7 @@
 struct fib_alias {
        struct list_head        fa_list;
+        struct rcu_head rcu;
        struct fib_info         *fa_info;
        u8                      fa_tos;
        u8                      fa_type;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c886b28ba9f5..d41219e8037c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -290,10 +290,10 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
                kfree_skb(skb);
                return;
        }
-        NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
+        NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
        if (n->nlmsg_flags&NLM_F_ECHO)
                atomic_inc(&skb->users);
-        netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
+        netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
        if (n->nlmsg_flags&NLM_F_ECHO)
                netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
 }
@@ -593,10 +593,13 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
                          struct hlist_head *new_laddrhash,
                          unsigned int new_size)
 {
+        struct hlist_head *old_info_hash, *old_laddrhash;
        unsigned int old_size = fib_hash_size;
-        unsigned int i;
+        unsigned int i, bytes;
        write_lock(&fib_info_lock);
+        old_info_hash = fib_info_hash;
+        old_laddrhash = fib_info_laddrhash;
        fib_hash_size = new_size;
        for (i = 0; i < old_size; i++) {
@@ -636,6 +639,10 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
        fib_info_laddrhash = new_laddrhash;
        write_unlock(&fib_info_lock);
+        bytes = old_size * sizeof(struct hlist_head *);
+        fib_hash_free(old_info_hash, bytes);
+        fib_hash_free(old_laddrhash, bytes);
 }
 struct fib_info *
@@ -847,6 +854,7 @@ failure:
        return NULL;
 }
+/* Note! fib_semantic_match intentionally uses  RCU list functions. */
 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
                       struct fib_result *res, __u32 zone, __u32 mask, 
                        int prefixlen)
@@ -854,7 +862,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
        struct fib_alias *fa;
        int nh_sel = 0;
-        list_for_each_entry(fa, head, fa_list) {
+        list_for_each_entry_rcu(fa, head, fa_list) {
                int err;
                if (fa->fa_tos &&
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4be234c7d8c3..b2dea4e5da77 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
 *              2 of the License, or (at your option) any later version.
 */
-#define VERSION "0.325"
+#define VERSION "0.402"
 #include <linux/config.h>
 #include <asm/uaccess.h>
@@ -62,6 +62,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
@@ -77,56 +78,55 @@
 #undef CONFIG_IP_FIB_TRIE_STATS
 #define MAX_CHILDS 16384
-#define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
 #define KEYLENGTH (8*sizeof(t_key))
 #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
 #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
-static DEFINE_RWLOCK(fib_lock);
 typedef unsigned int t_key;
 #define T_TNODE 0
 #define T_LEAF  1
 #define NODE_TYPE_MASK  0x1UL
-#define NODE_PARENT(_node) \
+#define NODE_PARENT(node) \
-((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
+        ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK)))
-#define NODE_SET_PARENT(_node, _ptr) \
-((_node)->_parent = (((unsigned long)(_ptr)) | \
+#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
-                     ((_node)->_parent & NODE_TYPE_MASK)))
-#define NODE_INIT_PARENT(_node, _type) \
+#define NODE_SET_PARENT(node, ptr)              \
-((_node)->_parent = (_type))
+        rcu_assign_pointer((node)->parent,      \
-#define NODE_TYPE(_node) \
+                           ((unsigned long)(ptr)) | NODE_TYPE(node))
-((_node)->_parent & NODE_TYPE_MASK)
+#define IS_TNODE(n) (!(n->parent & T_LEAF))
-#define IS_TNODE(n) (!(n->_parent & T_LEAF))
+#define IS_LEAF(n) (n->parent & T_LEAF)
-#define IS_LEAF(n) (n->_parent & T_LEAF)
 struct node {
-        t_key key;
+        t_key key;
-        unsigned long _parent;
+        unsigned long parent;
 };
 struct leaf {
-        t_key key;
+        t_key key;
-        unsigned long _parent;
+        unsigned long parent;
        struct hlist_head list;
+        struct rcu_head rcu;
 };
 struct leaf_info {
        struct hlist_node hlist;
+        struct rcu_head rcu;
        int plen;
        struct list_head falh;
 };
 struct tnode {
-        t_key key;
+        t_key key;
-        unsigned long _parent;
+        unsigned long parent;
-        unsigned short pos:5;        /* 2log(KEYLENGTH) bits needed */
+        unsigned short pos:5;           /* 2log(KEYLENGTH) bits needed */
-        unsigned short bits:5;       /* 2log(KEYLENGTH) bits needed */
+        unsigned short bits:5;          /* 2log(KEYLENGTH) bits needed */
-        unsigned short full_children;  /* KEYLENGTH bits needed */
+        unsigned short full_children;   /* KEYLENGTH bits needed */
-        unsigned short empty_children; /* KEYLENGTH bits needed */
+        unsigned short empty_children;  /* KEYLENGTH bits needed */
-        struct node *child[0];
+        struct rcu_head rcu;
+        struct node *child[0];
 };
 #ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -147,116 +147,76 @@ struct trie_stat {
        unsigned int leaves;
        unsigned int nullpointers;
        unsigned int nodesizes[MAX_CHILDS];
-};    
+};
 struct trie {
-        struct node *trie;
+        struct node *trie;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
        struct trie_use_stats stats;
 #endif
-        int size;
+        int size;
        unsigned int revision;
 };
-static int trie_debug = 0;
-static int tnode_full(struct tnode *tn, struct node *n);
 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
-static int tnode_child_length(struct tnode *tn);
 static struct node *resize(struct trie *t, struct tnode *tn);
-static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err);
+static struct tnode *inflate(struct trie *t, struct tnode *tn);
-static struct tnode *halve(struct trie *t, struct tnode *tn, int *err);
+static struct tnode *halve(struct trie *t, struct tnode *tn);
 static void tnode_free(struct tnode *tn);
 static void trie_dump_seq(struct seq_file *seq, struct trie *t);
-extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
-extern int fib_detect_death(struct fib_info *fi, int order,
-                            struct fib_info **last_resort, int *last_idx, int *dflt);
-extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
+static kmem_cache_t *fn_alias_kmem __read_mostly;
-               struct nlmsghdr *n, struct netlink_skb_parms *req);
-static kmem_cache_t *fn_alias_kmem;
 static struct trie *trie_local = NULL, *trie_main = NULL;
-static void trie_bug(char *err)
-{
-        printk("Trie Bug: %s\n", err);
-        BUG();
-}
-static inline struct node *tnode_get_child(struct tnode *tn, int i) 
+/* rcu_read_lock needs to be hold by caller from readside */
+static inline struct node *tnode_get_child(struct tnode *tn, int i)
 {
-        if (i >=  1<<tn->bits) 
+        BUG_ON(i >= 1 << tn->bits);
-                trie_bug("tnode_get_child");
-        return tn->child[i];
+        return rcu_dereference(tn->child[i]);
 }
-static inline int tnode_child_length(struct tnode *tn)
+static inline int tnode_child_length(const struct tnode *tn)
 {
-        return 1<<tn->bits;
+        return 1 << tn->bits;
 }
-/*
-  _________________________________________________________________
-  | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
-  ----------------------------------------------------------------
-    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15 
-  _________________________________________________________________
-  | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
-  -----------------------------------------------------------------
-   16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
-  tp->pos = 7
-  tp->bits = 3
-  n->pos = 15
-  n->bits=4
-  KEYLENGTH=32
-*/
 static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
 {
-        if (offset < KEYLENGTH)
+        if (offset < KEYLENGTH)
                return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
-        else
+        else
                return 0;
 }
 static inline int tkey_equals(t_key a, t_key b)
 {
-  return a == b;
+        return a == b;
 }
 static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
 {
-     if (bits == 0 || offset >= KEYLENGTH)
+        if (bits == 0 || offset >= KEYLENGTH)
-            return 1;
+                return 1;
-        bits = bits > KEYLENGTH ? KEYLENGTH : bits;
+        bits = bits > KEYLENGTH ? KEYLENGTH : bits;
-        return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
+        return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
-}       
+}
 static inline int tkey_mismatch(t_key a, int offset, t_key b)
 {
        t_key diff = a ^ b;
        int i = offset;
-        if(!diff) 
+        if (!diff)
-          return 0;
+                return 0;
-        while((diff << i) >> (KEYLENGTH-1) == 0)
+        while ((diff << i) >> (KEYLENGTH-1) == 0)
                i++;
        return i;
 }
-/* Candiate for fib_semantics */
-static void fn_free_alias(struct fib_alias *fa)
-{
-        fib_release_info(fa->fa_info);
-        kmem_cache_free(fn_alias_kmem, fa);
-}
 /*
  To understand this stuff, an understanding of keys and all their bits is 
  necessary. Every node in the trie has a key associated with it, but not 
@@ -295,7 +255,7 @@ static void fn_free_alias(struct fib_alias *fa)
  tp->pos = 7
  tp->bits = 3
  n->pos = 15
-  n->bits=4
+  n->bits = 4
  First, let's just ignore the bits that come before the parent tp, that is 
  the bits from 0 to (tp->pos-1). They are *known* but at this point we do 
@@ -314,65 +274,71 @@ static void fn_free_alias(struct fib_alias *fa)
  The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 
  n's child array, and will of course be different for each child.
  
  The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
  at this point.
 */
-static void check_tnode(struct tnode *tn)
+static inline void check_tnode(const struct tnode *tn)
 {
-        if(tn && tn->pos+tn->bits > 32) {
+        WARN_ON(tn && tn->pos+tn->bits > 32);
-                printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
-        }
 }
 static int halve_threshold = 25;
 static int inflate_threshold = 50;
-static struct leaf *leaf_new(void)
+static void __alias_free_mem(struct rcu_head *head)
 {
-        struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
+        struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
-        if(l) {
+        kmem_cache_free(fn_alias_kmem, fa);
-                NODE_INIT_PARENT(l, T_LEAF);
-                INIT_HLIST_HEAD(&l->list);
-        }
-        return l;
 }
-static struct leaf_info *leaf_info_new(int plen)
+static inline void alias_free_mem_rcu(struct fib_alias *fa)
 {
-        struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
+        call_rcu(&fa->rcu, __alias_free_mem);
-        if(li) {
+}
-                li->plen = plen;
-                INIT_LIST_HEAD(&li->falh);
+static void __leaf_free_rcu(struct rcu_head *head)
-        }
+{
-        return li;
+        kfree(container_of(head, struct leaf, rcu));
+}
+static inline void free_leaf(struct leaf *leaf)
+{
+        call_rcu(&leaf->rcu, __leaf_free_rcu);
 }
-static inline void free_leaf(struct leaf *l)
+static void __leaf_info_free_rcu(struct rcu_head *head)
 {
-        kfree(l);
+        kfree(container_of(head, struct leaf_info, rcu));
 }
-static inline void free_leaf_info(struct leaf_info *li)
+static inline void free_leaf_info(struct leaf_info *leaf)
 {
-        kfree(li);
+        call_rcu(&leaf->rcu, __leaf_info_free_rcu);
 }
 static struct tnode *tnode_alloc(unsigned int size)
 {
-        if (size <= PAGE_SIZE) {
+        struct page *pages;
-                return kmalloc(size, GFP_KERNEL);
-        } else {
+        if (size <= PAGE_SIZE)
-                return (struct tnode *)
+                return kcalloc(size, 1, GFP_KERNEL);
-                       __get_free_pages(GFP_KERNEL, get_order(size));
-        }
+        pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
+        if (!pages)
+                return NULL;
+        return page_address(pages);
 }
-static void __tnode_free(struct tnode *tn)
+static void __tnode_free_rcu(struct rcu_head *head)
 {
+        struct tnode *tn = container_of(head, struct tnode, rcu);
        unsigned int size = sizeof(struct tnode) +
-                            (1<<tn->bits) * sizeof(struct node *);
+                (1 << tn->bits) * sizeof(struct node *);
        if (size <= PAGE_SIZE)
                kfree(tn);
@@ -380,45 +346,50 @@ static void __tnode_free(struct tnode *tn)
                free_pages((unsigned long)tn, get_order(size));
 }
+static inline void tnode_free(struct tnode *tn)
+{
+        call_rcu(&tn->rcu, __tnode_free_rcu);
+}
+static struct leaf *leaf_new(void)
+{
+        struct leaf *l = kmalloc(sizeof(struct leaf),  GFP_KERNEL);
+        if (l) {
+                l->parent = T_LEAF;
+                INIT_HLIST_HEAD(&l->list);
+        }
+        return l;
+}
+static struct leaf_info *leaf_info_new(int plen)
+{
+        struct leaf_info *li = kmalloc(sizeof(struct leaf_info),  GFP_KERNEL);
+        if (li) {
+                li->plen = plen;
+                INIT_LIST_HEAD(&li->falh);
+        }
+        return li;
+}
 static struct tnode* tnode_new(t_key key, int pos, int bits)
 {
        int nchildren = 1<<bits;
        int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
        struct tnode *tn = tnode_alloc(sz);
-        if(tn)  {
+        if (tn) {
                memset(tn, 0, sz);
-                NODE_INIT_PARENT(tn, T_TNODE);
+                tn->parent = T_TNODE;
                tn->pos = pos;
                tn->bits = bits;
                tn->key = key;
                tn->full_children = 0;
                tn->empty_children = 1<<bits;
        }
-        if(trie_debug > 0) 
-                printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
-                       (unsigned int) (sizeof(struct node) * 1<<bits));
-        return tn;
-}
-static void tnode_free(struct tnode *tn)
+        pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
-{
+                 (unsigned int) (sizeof(struct node) * 1<<bits));
-        if(!tn) {
+        return tn;
-                trie_bug("tnode_free\n");
-        }
-        if(IS_LEAF(tn)) {
-                free_leaf((struct leaf *)tn);
-                if(trie_debug > 0 ) 
-                        printk("FL %p \n", tn);
-        }
-        else if(IS_TNODE(tn)) { 
-                __tnode_free(tn);
-                if(trie_debug > 0 ) 
-                        printk("FT %p \n", tn);
-        }
-        else {
-                trie_bug("tnode_free\n");
-        }
 }
 /*
@@ -426,70 +397,65 @@ static void tnode_free(struct tnode *tn)
 * and no bits are skipped. See discussion in dyntree paper p. 6
 */
-static inline int tnode_full(struct tnode *tn, struct node *n)
+static inline int tnode_full(const struct tnode *tn, const struct node *n)
 {
-        if(n == NULL || IS_LEAF(n))
+        if (n == NULL || IS_LEAF(n))
                return 0;
        return ((struct tnode *) n)->pos == tn->pos + tn->bits;
 }
-static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n) 
+static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
 {
        tnode_put_child_reorg(tn, i, n, -1);
 }
- /* 
+ /*
  * Add a child at position i overwriting the old value.
  * Update the value of full_children and empty_children.
  */
-static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) 
+static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
 {
-        struct node *chi;
+        struct node *chi = tn->child[i];
        int isfull;
-        if(i >=  1<<tn->bits) {
+        BUG_ON(i >= 1<<tn->bits);
-                printk("bits=%d, i=%d\n", tn->bits, i);
-                trie_bug("tnode_put_child_reorg bits");
-        }
-        write_lock_bh(&fib_lock);
-        chi = tn->child[i];     
        /* update emptyChildren */
        if (n == NULL && chi != NULL)
                tn->empty_children++;
        else if (n != NULL && chi == NULL)
                tn->empty_children--;
-  
        /* update fullChildren */
-        if (wasfull == -1)
+        if (wasfull == -1)
                wasfull = tnode_full(tn, chi);
        isfull = tnode_full(tn, n);
-        if (wasfull && !isfull) 
+        if (wasfull && !isfull)
                tn->full_children--;
-        
+        else if (!wasfull && isfull)
-        else if (!wasfull && isfull) 
                tn->full_children++;
-        if(n) 
-                NODE_SET_PARENT(n, tn); 
-        tn->child[i] = n;
+        if (n)
-        write_unlock_bh(&fib_lock);
+                NODE_SET_PARENT(n, tn);
+        rcu_assign_pointer(tn->child[i], n);
 }
-static struct node *resize(struct trie *t, struct tnode *tn) 
+static struct node *resize(struct trie *t, struct tnode *tn)
 {
        int i;
        int err = 0;
+        struct tnode *old_tn;
        if (!tn)
                return NULL;
-        if(trie_debug) 
+        pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
-                printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 
+                 tn, inflate_threshold, halve_threshold);
-                      tn, inflate_threshold, halve_threshold);
        /* No children */
        if (tn->empty_children == tnode_child_length(tn)) {
@@ -499,95 +465,92 @@ static struct node *resize(struct trie *t, struct tnode *tn)
        /* One child */
        if (tn->empty_children == tnode_child_length(tn) - 1)
                for (i = 0; i < tnode_child_length(tn); i++) {
+                        struct node *n;
-                        write_lock_bh(&fib_lock);
+                        n = tn->child[i];
-                        if (tn->child[i] != NULL) {
+                        if (!n)
+                                continue;
-                                /* compress one level */
-                                struct node *n = tn->child[i];
-                                if(n)
-                                        NODE_INIT_PARENT(n, NODE_TYPE(n));
-                                write_unlock_bh(&fib_lock);
+                        /* compress one level */
-                                tnode_free(tn);
+                        NODE_SET_PARENT(n, NULL);
-                                return n;
+                        tnode_free(tn);
-                        }
+                        return n;
-                        write_unlock_bh(&fib_lock);
                }
-        /* 
+        /*
         * Double as long as the resulting node has a number of
         * nonempty nodes that are above the threshold.
         */
        /*
-         * From "Implementing a dynamic compressed trie" by Stefan Nilsson of 
+         * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
-         * the Helsinki University of Technology and Matti Tikkanen of Nokia 
+         * the Helsinki University of Technology and Matti Tikkanen of Nokia
         * Telecommunications, page 6:
-         * "A node is doubled if the ratio of non-empty children to all 
+         * "A node is doubled if the ratio of non-empty children to all
         * children in the *doubled* node is at least 'high'."
         *
-         * 'high' in this instance is the variable 'inflate_threshold'. It 
+         * 'high' in this instance is the variable 'inflate_threshold'. It
-         * is expressed as a percentage, so we multiply it with 
+         * is expressed as a percentage, so we multiply it with
-         * tnode_child_length() and instead of multiplying by 2 (since the 
+         * tnode_child_length() and instead of multiplying by 2 (since the
-         * child array will be doubled by inflate()) and multiplying 
+         * child array will be doubled by inflate()) and multiplying
-         * the left-hand side by 100 (to handle the percentage thing) we 
+         * the left-hand side by 100 (to handle the percentage thing) we
         * multiply the left-hand side by 50.
-         * 
+         *
-         * The left-hand side may look a bit weird: tnode_child_length(tn) 
+         * The left-hand side may look a bit weird: tnode_child_length(tn)
-         * - tn->empty_children is of course the number of non-null children 
+         * - tn->empty_children is of course the number of non-null children
-         * in the current node. tn->full_children is the number of "full" 
+         * in the current node. tn->full_children is the number of "full"
         * children, that is non-null tnodes with a skip value of 0.
-         * All of those will be doubled in the resulting inflated tnode, so 
+         * All of those will be doubled in the resulting inflated tnode, so
         * we just count them one extra time here.
-         * 
+         *
         * A clearer way to write this would be:
-         * 
+         *
         * to_be_doubled = tn->full_children;
-         * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - 
+         * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
         *     tn->full_children;
         *
         * new_child_length = tnode_child_length(tn) * 2;
         *
-         * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 
+         * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
         *      new_child_length;
         * if (new_fill_factor >= inflate_threshold)
-         * 
+         *
-         * ...and so on, tho it would mess up the while() loop.
+         * ...and so on, tho it would mess up the while () loop.
-         * 
+         *
         * anyway,
         * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
         *      inflate_threshold
-         * 
+         *
         * avoid a division:
         * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
         *      inflate_threshold * new_child_length
-         * 
+         *
         * expand not_to_be_doubled and to_be_doubled, and shorten:
-         * 100 * (tnode_child_length(tn) - tn->empty_children + 
+         * 100 * (tnode_child_length(tn) - tn->empty_children +
-         *    tn->full_children ) >= inflate_threshold * new_child_length
+         *    tn->full_children) >= inflate_threshold * new_child_length
-         * 
+         *
         * expand new_child_length:
-         * 100 * (tnode_child_length(tn) - tn->empty_children + 
+         * 100 * (tnode_child_length(tn) - tn->empty_children +
-         *    tn->full_children ) >=
+         *    tn->full_children) >=
         *      inflate_threshold * tnode_child_length(tn) * 2
-         * 
+         *
         * shorten again:
-         * 50 * (tn->full_children + tnode_child_length(tn) - 
+         * 50 * (tn->full_children + tnode_child_length(tn) -
-         *    tn->empty_children ) >= inflate_threshold * 
+         *    tn->empty_children) >= inflate_threshold *
         *    tnode_child_length(tn)
-         * 
+         *
         */
        check_tnode(tn);
-        
        err = 0;
        while ((tn->full_children > 0 &&
               50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
                                inflate_threshold * tnode_child_length(tn))) {
-                tn = inflate(t, tn, &err);
+                old_tn = tn;
+                tn = inflate(t, tn);
-                if(err) {
+                if (IS_ERR(tn)) {
+                        tn = old_tn;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
                        t->stats.resize_node_skipped++;
 #endif
@@ -607,9 +570,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
               100 * (tnode_child_length(tn) - tn->empty_children) <
               halve_threshold * tnode_child_length(tn)) {
-                tn = halve(t, tn, &err);
+                old_tn = tn;
+                tn = halve(t, tn);
-                if(err) {
+                if (IS_ERR(tn)) {
+                        tn = old_tn;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
                        t->stats.resize_node_skipped++;
 #endif
@@ -617,55 +581,48 @@ static struct node *resize(struct trie *t, struct tnode *tn)
                }
        }
-  
-        /* Only one child remains */
+        /* Only one child remains */
        if (tn->empty_children == tnode_child_length(tn) - 1)
                for (i = 0; i < tnode_child_length(tn); i++) {
-                        
+                        struct node *n;
-                        write_lock_bh(&fib_lock);
-                        if (tn->child[i] != NULL) {
+                        n = tn->child[i];
-                                /* compress one level */
+                        if (!n)
-                                struct node *n = tn->child[i];
+                                continue;
-                                if(n)
+                        /* compress one level */
-                                        NODE_INIT_PARENT(n, NODE_TYPE(n));
+                        NODE_SET_PARENT(n, NULL);
-                                write_unlock_bh(&fib_lock);
+                        tnode_free(tn);
-                                tnode_free(tn);
+                        return n;
-                                return n;
-                        }
-                        write_unlock_bh(&fib_lock);
                }
        return (struct node *) tn;
 }
-static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
+static struct tnode *inflate(struct trie *t, struct tnode *tn)
 {
        struct tnode *inode;
        struct tnode *oldtnode = tn;
        int olen = tnode_child_length(tn);
        int i;
-        if(trie_debug) 
+        pr_debug("In inflate\n");
-                printk("In inflate\n");
        tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
-        if (!tn) {
+        if (!tn)
-                *err = -ENOMEM;
+                return ERR_PTR(-ENOMEM);
-                return oldtnode;
-        }
        /*
-         * Preallocate and store tnodes before the actual work so we 
+         * Preallocate and store tnodes before the actual work so we
-         * don't get into an inconsistent state if memory allocation 
+         * don't get into an inconsistent state if memory allocation
-         * fails. In case of failure we return the oldnode and  inflate 
+         * fails. In case of failure we return the oldnode and  inflate
         * of tnode is ignored.
         */
-                        
-        for(i = 0; i < olen; i++) {
+        for (i = 0; i < olen; i++) {
                struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
                if (inode &&
@@ -673,56 +630,40 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
                    inode->pos == oldtnode->pos + oldtnode->bits &&
                    inode->bits > 1) {
                        struct tnode *left, *right;
                        t_key m = TKEY_GET_MASK(inode->pos, 1);
- 
                        left = tnode_new(inode->key&(~m), inode->pos + 1,
                                         inode->bits - 1);
+                        if (!left)
+                                goto nomem;
-                        if(!left) {
-                                *err = -ENOMEM; 
-                                break;
-                        }
-                        
                        right = tnode_new(inode->key|m, inode->pos + 1,
                                          inode->bits - 1);
-                        if(!right) {
+                        if (!right) {
-                                *err = -ENOMEM; 
+                                tnode_free(left);
-                                break;
+                                goto nomem;
-                        }
+                        }
                        put_child(t, tn, 2*i, (struct node *) left);
                        put_child(t, tn, 2*i+1, (struct node *) right);
                }
        }
-        if(*err) {
+        for (i = 0; i < olen; i++) {
-                int size = tnode_child_length(tn);
-                int j;
-                for(j = 0; j < size; j++) 
-                        if( tn->child[j])
-                                tnode_free((struct tnode *)tn->child[j]);
-                tnode_free(tn);
-                
-                *err = -ENOMEM;
-                return oldtnode;
-        }
-        for(i = 0; i < olen; i++) {
                struct node *node = tnode_get_child(oldtnode, i);
-      
+                struct tnode *left, *right;
+                int size, j;
                /* An empty child */
                if (node == NULL)
                        continue;
                /* A leaf or an internal node with skipped bits */
-                if(IS_LEAF(node) || ((struct tnode *) node)->pos >
+                if (IS_LEAF(node) || ((struct tnode *) node)->pos >
                   tn->pos + tn->bits - 1) {
-                        if(tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
+                        if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
                                             1) == 0)
                                put_child(t, tn, 2*i, node);
                        else
@@ -738,207 +679,212 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
                        put_child(t, tn, 2*i+1, inode->child[1]);
                        tnode_free(inode);
+                        continue;
                }
-                        /* An internal node with more than two children */
+                /* An internal node with more than two children */
-                else {
-                        struct tnode *left, *right;
+                /* We will replace this node 'inode' with two new
-                        int size, j;
+                 * ones, 'left' and 'right', each with half of the
+                 * original children. The two new nodes will have
-                        /* We will replace this node 'inode' with two new 
+                 * a position one bit further down the key and this
-                         * ones, 'left' and 'right', each with half of the 
+                 * means that the "significant" part of their keys
-                         * original children. The two new nodes will have 
+                 * (see the discussion near the top of this file)
-                         * a position one bit further down the key and this 
+                 * will differ by one bit, which will be "0" in
-                         * means that the "significant" part of their keys 
+                 * left's key and "1" in right's key. Since we are
-                         * (see the discussion near the top of this file) 
+                 * moving the key position by one step, the bit that
-                         * will differ by one bit, which will be "0" in 
+                 * we are moving away from - the bit at position
-                         * left's key and "1" in right's key. Since we are 
+                 * (inode->pos) - is the one that will differ between
-                         * moving the key position by one step, the bit that 
+                 * left and right. So... we synthesize that bit in the
-                         * we are moving away from - the bit at position 
+                 * two  new keys.
-                         * (inode->pos) - is the one that will differ between 
+                 * The mask 'm' below will be a single "one" bit at
-                         * left and right. So... we synthesize that bit in the
+                 * the position (inode->pos)
-                         * two  new keys.
+                 */
-                         * The mask 'm' below will be a single "one" bit at 
-                         * the position (inode->pos)
-                         */
-                        /* Use the old key, but set the new significant 
-                         *   bit to zero. 
-                         */
-                        left = (struct tnode *) tnode_get_child(tn, 2*i);
+                /* Use the old key, but set the new significant
-                        put_child(t, tn, 2*i, NULL);
+                 *   bit to zero.
+                 */
-                        if(!left)
+                left = (struct tnode *) tnode_get_child(tn, 2*i);
-                                BUG();
+                put_child(t, tn, 2*i, NULL);
-                        right = (struct tnode *) tnode_get_child(tn, 2*i+1);
+                BUG_ON(!left);
-                        put_child(t, tn, 2*i+1, NULL);
-                        if(!right)
+                right = (struct tnode *) tnode_get_child(tn, 2*i+1);
-                                BUG();
+                put_child(t, tn, 2*i+1, NULL);
-                        size = tnode_child_length(left);
+                BUG_ON(!right);
-                        for(j = 0; j < size; j++) {
-                                put_child(t, left, j, inode->child[j]);
-                                put_child(t, right, j, inode->child[j + size]);
-                        }
-                        put_child(t, tn, 2*i, resize(t, left));
-                        put_child(t, tn, 2*i+1, resize(t, right));
-                        tnode_free(inode);
+                size = tnode_child_length(left);
+                for (j = 0; j < size; j++) {
+                        put_child(t, left, j, inode->child[j]);
+                        put_child(t, right, j, inode->child[j + size]);
                }
+                put_child(t, tn, 2*i, resize(t, left));
+                put_child(t, tn, 2*i+1, resize(t, right));
+                tnode_free(inode);
        }
        tnode_free(oldtnode);
        return tn;
+nomem:
+        {
+                int size = tnode_child_length(tn);
+                int j;
+                for (j = 0; j < size; j++)
+                        if (tn->child[j])
+                                tnode_free((struct tnode *)tn->child[j]);
+                tnode_free(tn);
+                return ERR_PTR(-ENOMEM);
+        }
 }
-static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
+static struct tnode *halve(struct trie *t, struct tnode *tn)
 {
        struct tnode *oldtnode = tn;
        struct node *left, *right;
        int i;
        int olen = tnode_child_length(tn);
-        if(trie_debug) printk("In halve\n");
+        pr_debug("In halve\n");
-  
-        tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
-        if (!tn) {
+        tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
-                *err = -ENOMEM;
-                return oldtnode;
+        if (!tn)
-        }
+                return ERR_PTR(-ENOMEM);
        /*
-         * Preallocate and store tnodes before the actual work so we 
+         * Preallocate and store tnodes before the actual work so we
-         * don't get into an inconsistent state if memory allocation 
+         * don't get into an inconsistent state if memory allocation
-         * fails. In case of failure we return the oldnode and halve 
+         * fails. In case of failure we return the oldnode and halve
         * of tnode is ignored.
         */
-        for(i = 0; i < olen; i += 2) {
+        for (i = 0; i < olen; i += 2) {
                left = tnode_get_child(oldtnode, i);
                right = tnode_get_child(oldtnode, i+1);
-    
                /* Two nonempty children */
-                if( left && right)  {
+                if (left && right) {
-                        struct tnode *newBinNode =
+                        struct tnode *newn;
-                                tnode_new(left->key, tn->pos + tn->bits, 1);
-                        if(!newBinNode) {
+                        newn = tnode_new(left->key, tn->pos + tn->bits, 1);
-                                *err = -ENOMEM; 
-                                break;
-                        }
-                        put_child(t, tn, i/2, (struct node *)newBinNode);
-                }
-        }
-        if(*err) {
+                        if (!newn)
-                int size = tnode_child_length(tn);
+                                goto nomem;
-                int j;
-                for(j = 0; j < size; j++) 
+                        put_child(t, tn, i/2, (struct node *)newn);
-                        if( tn->child[j])
+                }
-                                tnode_free((struct tnode *)tn->child[j]);
-                tnode_free(tn);
-                
-                *err = -ENOMEM;
-                return oldtnode;
        }
-        for(i = 0; i < olen; i += 2) {
+        for (i = 0; i < olen; i += 2) {
+                struct tnode *newBinNode;
                left = tnode_get_child(oldtnode, i);
                right = tnode_get_child(oldtnode, i+1);
-    
                /* At least one of the children is empty */
                if (left == NULL) {
                        if (right == NULL)    /* Both are empty */
                                continue;
                        put_child(t, tn, i/2, right);
-                } else if (right == NULL)
+                        continue;
-                        put_child(t, tn, i/2, left);
+                }
-     
-                /* Two nonempty children */
-                else {
-                        struct tnode *newBinNode =
-                                (struct tnode *) tnode_get_child(tn, i/2);
-                        put_child(t, tn, i/2, NULL);
-                        if(!newBinNode) 
-                                BUG();
-                        put_child(t, newBinNode, 0, left);
+                if (right == NULL) {
-                        put_child(t, newBinNode, 1, right);
+                        put_child(t, tn, i/2, left);
-                        put_child(t, tn, i/2, resize(t, newBinNode));
+                        continue;
                }
+                /* Two nonempty children */
+                newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
+                put_child(t, tn, i/2, NULL);
+                put_child(t, newBinNode, 0, left);
+                put_child(t, newBinNode, 1, right);
+                put_child(t, tn, i/2, resize(t, newBinNode));
        }
        tnode_free(oldtnode);
        return tn;
+nomem:
+        {
+                int size = tnode_child_length(tn);
+                int j;
+                for (j = 0; j < size; j++)
+                        if (tn->child[j])
+                                tnode_free((struct tnode *)tn->child[j]);
+                tnode_free(tn);
+                return ERR_PTR(-ENOMEM);
+        }
 }
-static void *trie_init(struct trie *t)
+static void trie_init(struct trie *t)
 {
-        if(t) {
+        if (!t)
-                t->size = 0;
+                return;
-                t->trie = NULL;
-                t->revision = 0;
+        t->size = 0;
+        rcu_assign_pointer(t->trie, NULL);
+        t->revision = 0;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-                memset(&t->stats, 0, sizeof(struct trie_use_stats));
+        memset(&t->stats, 0, sizeof(struct trie_use_stats));
 #endif
-        }
-        return t;
 }
+/* readside most use rcu_read_lock currently dump routines
+ via get_fa_head and dump */
 static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
 {
        struct hlist_node *node;
        struct leaf_info *li;
-        hlist_for_each_entry(li, node, head, hlist) {
+        hlist_for_each_entry_rcu(li, node, head, hlist)
-                  
+                if (li->plen == plen)
-                if ( li->plen == plen )
                        return li;
-        }
        return NULL;
 }
 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
 {
-        struct list_head *fa_head=NULL;
        struct leaf_info *li = find_leaf_info(&l->list, plen);
-        
-        if(li) 
+        if (!li)
-                fa_head = &li->falh;
+                return NULL;
-        
-        return fa_head;
+        return &li->falh;
 }
 static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
 {
-        struct leaf_info *li=NULL, *last=NULL;
+        struct leaf_info *li = NULL, *last = NULL;
-        struct hlist_node *node, *tmp;
+        struct hlist_node *node;
-        write_lock_bh(&fib_lock);
+        if (hlist_empty(head)) {
-        
+                hlist_add_head_rcu(&new->hlist, head);
-        if(hlist_empty(head))
+        } else {
-                hlist_add_head(&new->hlist, head);
+                hlist_for_each_entry(li, node, head, hlist) {
-        else {
+                        if (new->plen > li->plen)
-                hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
+                                break;
-                        
-                        if (new->plen > li->plen) 
+                        last = li;
-                                break;
+                }
-                        
+                if (last)
-                        last = li;
+                        hlist_add_after_rcu(&last->hlist, &new->hlist);
-                }
+                else
-                if(last) 
+                        hlist_add_before_rcu(&new->hlist, &li->hlist);
-                        hlist_add_after(&last->hlist, &new->hlist);
+        }
-                else 
-                        hlist_add_before(&new->hlist, &li->hlist);
-        }
-        write_unlock_bh(&fib_lock);
 }
+/* rcu_read_lock needs to be hold by caller from readside */
 static struct leaf *
 fib_find_node(struct trie *t, u32 key)
 {
@@ -947,73 +893,57 @@ fib_find_node(struct trie *t, u32 key)
        struct node *n;
        pos = 0;
-        n=t->trie;
+        n = rcu_dereference(t->trie);
        while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
                tn = (struct tnode *) n;
-                        
                check_tnode(tn);
-                        
-                if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
+                if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
-                        pos=tn->pos + tn->bits;
+                        pos = tn->pos + tn->bits;
                        n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
-                }
+                } else
-                else
                        break;
        }
        /* Case we have found a leaf. Compare prefixes */
-        if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
+        if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
-                struct leaf *l = (struct leaf *) n;
+                return (struct leaf *)n;
-                return l;
-        }
        return NULL;
 }
 static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
 {
-        int i = 0;
        int wasfull;
        t_key cindex, key;
        struct tnode *tp = NULL;
-        if(!tn) 
-                BUG();
-        
        key = tn->key;
-        i = 0;
        while (tn != NULL && NODE_PARENT(tn) != NULL) {
-                if( i > 10 ) {
-                        printk("Rebalance tn=%p \n", tn);
-                        if(tn)          printk("tn->parent=%p \n", NODE_PARENT(tn));
-                        
-                        printk("Rebalance tp=%p \n", tp);
-                        if(tp)          printk("tp->parent=%p \n", NODE_PARENT(tp));
-                }
-                if( i > 12 ) BUG();
-                i++;
                tp = NODE_PARENT(tn);
                cindex = tkey_extract_bits(key, tp->pos, tp->bits);
                wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
                tn = (struct tnode *) resize (t, (struct tnode *)tn);
                tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
-                
-                if(!NODE_PARENT(tn))
+                if (!NODE_PARENT(tn))
                        break;
                tn = NODE_PARENT(tn);
        }
        /* Handle last (top) tnode */
-        if (IS_TNODE(tn)) 
+        if (IS_TNODE(tn))
                tn = (struct tnode*) resize(t, (struct tnode *)tn);
        return (struct node*) tn;
 }
+/* only used from updater-side */
 static  struct list_head *
 fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 {
@@ -1022,68 +952,62 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
        struct node *n;
        struct leaf *l;
        int missbit;
-        struct list_head *fa_head=NULL;
+        struct list_head *fa_head = NULL;
        struct leaf_info *li;
        t_key cindex;
        pos = 0;
-        n=t->trie;
+        n = t->trie;
-        /* If we point to NULL, stop. Either the tree is empty and we should 
+        /* If we point to NULL, stop. Either the tree is empty and we should
-         * just put a new leaf in if, or we have reached an empty child slot, 
+         * just put a new leaf in if, or we have reached an empty child slot,
         * and we should just put our new leaf in that.
-         * If we point to a T_TNODE, check if it matches our key. Note that 
+         * If we point to a T_TNODE, check if it matches our key. Note that
-         * a T_TNODE might be skipping any number of bits - its 'pos' need 
+         * a T_TNODE might be skipping any number of bits - its 'pos' need
         * not be the parent's 'pos'+'bits'!
         *
-         * If it does match the current key, get pos/bits from it, extract 
+         * If it does match the current key, get pos/bits from it, extract
         * the index from our key, push the T_TNODE and walk the tree.
         *
         * If it doesn't, we have to replace it with a new T_TNODE.
         *
-         * If we point to a T_LEAF, it might or might not have the same key 
+         * If we point to a T_LEAF, it might or might not have the same key
-         * as we do. If it does, just change the value, update the T_LEAF's 
+         * as we do. If it does, just change the value, update the T_LEAF's
-         * value, and return it. 
+         * value, and return it.
         * If it doesn't, we need to replace it with a T_TNODE.
         */
        while (n != NULL &&  NODE_TYPE(n) == T_TNODE) {
                tn = (struct tnode *) n;
-                        
                check_tnode(tn);
-                
-                if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
+                if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
                        tp = tn;
-                        pos=tn->pos + tn->bits;
+                        pos = tn->pos + tn->bits;
                        n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
-                        if(n && NODE_PARENT(n) != tn) {
+                        BUG_ON(n && NODE_PARENT(n) != tn);
-                                printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
+                } else
-                                BUG();
-                        }
-                }
-                else
                        break;
        }
        /*
         * n  ----> NULL, LEAF or TNODE
         *
-         * tp is n's (parent) ----> NULL or TNODE  
+         * tp is n's (parent) ----> NULL or TNODE
         */
-        if(tp && IS_LEAF(tp))
+        BUG_ON(tp && IS_LEAF(tp));
-                BUG();
        /* Case 1: n is a leaf. Compare prefixes */
-        if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 
+        if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
-                struct leaf *l = ( struct leaf *)  n;
+                struct leaf *l = (struct leaf *) n;
-                
                li = leaf_info_new(plen);
-                
-                if(! li) {
+                if (!li) {
                        *err = -ENOMEM;
                        goto err;
                }
@@ -1095,7 +1019,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
        t->size++;
        l = leaf_new();
-        if(! l) {
+        if (!l) {
                *err = -ENOMEM;
                goto err;
        }
@@ -1103,7 +1027,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
        l->key = key;
        li = leaf_info_new(plen);
-        if(! li) {
+        if (!li) {
                tnode_free((struct tnode *) l);
                *err = -ENOMEM;
                goto err;
@@ -1112,70 +1036,65 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
        fa_head = &li->falh;
        insert_leaf_info(&l->list, li);
-        /* Case 2: n is NULL, and will just insert a new leaf */
        if (t->trie && n == NULL) {
+                /* Case 2: n is NULL, and will just insert a new leaf */
                NODE_SET_PARENT(l, tp);
-                
-                if (!tp) 
-                        BUG();
-                else {
+                cindex = tkey_extract_bits(key, tp->pos, tp->bits);
-                        cindex = tkey_extract_bits(key, tp->pos, tp->bits);
+                put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
-                        put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
+        } else {
-                }
+                /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
-        }
+                /*
-        /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
+                 *  Add a new tnode here
-        else {
-                /* 
-                 *  Add a new tnode here 
                 *  first tnode need some special handling
                 */
                if (tp)
-                        pos=tp->pos+tp->bits;
+                        pos = tp->pos+tp->bits;
                else
-                        pos=0;
+                        pos = 0;
-                if(n) {
+                if (n) {
                        newpos = tkey_mismatch(key, pos, n->key);
                        tn = tnode_new(n->key, newpos, 1);
-                }
+                } else {
-                else {
                        newpos = 0;
-                        tn = tnode_new(key, newpos, 1); /* First tnode */ 
+                        tn = tnode_new(key, newpos, 1); /* First tnode */
                }
-                if(!tn) {
+                if (!tn) {
                        free_leaf_info(li);
                        tnode_free((struct tnode *) l);
                        *err = -ENOMEM;
                        goto err;
-                }                       
+                }
-                        
                NODE_SET_PARENT(tn, tp);
-                missbit=tkey_extract_bits(key, newpos, 1);
+                missbit = tkey_extract_bits(key, newpos, 1);
                put_child(t, tn, missbit, (struct node *)l);
                put_child(t, tn, 1-missbit, n);
-                if(tp) {
+                if (tp) {
                        cindex = tkey_extract_bits(key, tp->pos, tp->bits);
                        put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
-                }
+                } else {
-                else { 
+                        rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
-                        t->trie = (struct node*) tn; /* First tnode */
                        tp = tn;
                }
        }
-        if(tp && tp->pos+tp->bits > 32) {
-                printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 
+        if (tp && tp->pos + tp->bits > 32)
+                printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
                       tp, tp->pos, tp->bits, key, plen);
-        }
        /* Rebalance the trie */
-        t->trie = trie_rebalance(t, tp);
+        rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
 done:
        t->revision++;
-err:;
+err:
        return fa_head;
 }
@@ -1185,7 +1104,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 {
        struct trie *t = (struct trie *) tb->tb_data;
        struct fib_alias *fa, *new_fa;
-        struct list_head *fa_head=NULL;
+        struct list_head *fa_head = NULL;
        struct fib_info *fi;
        int plen = r->rtm_dst_len;
        int type = r->rtm_type;
@@ -1198,28 +1117,29 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                return -EINVAL;
        key = 0;
-        if (rta->rta_dst) 
+        if (rta->rta_dst)
                memcpy(&key, rta->rta_dst, 4);
        key = ntohl(key);
-        if(trie_debug)
+        pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
-                printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
-        mask =  ntohl( inet_make_mask(plen) );
+        mask = ntohl(inet_make_mask(plen));
-        if(key & ~mask)
+        if (key & ~mask)
                return -EINVAL;
        key = key & mask;
-        if  ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
+        fi = fib_create_info(r, rta, nlhdr, &err);
+        if (!fi)
                goto err;
        l = fib_find_node(t, key);
-        fa = NULL;      
+        fa = NULL;
-        if(l) {
+        if (l) {
                fa_head = get_fa_head(l, plen);
                fa = fib_find_alias(fa_head, tos, fi->fib_priority);
        }
@@ -1235,8 +1155,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
         * and we need to allocate a new one of those as well.
         */
-        if (fa &&
+        if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
-            fa->fa_info->fib_priority == fi->fib_priority) {
                struct fib_alias *fa_orig;
                err = -EEXIST;
@@ -1247,22 +1166,27 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                        struct fib_info *fi_drop;
                        u8 state;
-                        write_lock_bh(&fib_lock);
+                        err = -ENOBUFS;
+                        new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
+                        if (new_fa == NULL)
+                                goto out;
                        fi_drop = fa->fa_info;
-                        fa->fa_info = fi;
+                        new_fa->fa_tos = fa->fa_tos;
-                        fa->fa_type = type;
+                        new_fa->fa_info = fi;
-                        fa->fa_scope = r->rtm_scope;
+                        new_fa->fa_type = type;
+                        new_fa->fa_scope = r->rtm_scope;
                        state = fa->fa_state;
-                        fa->fa_state &= ~FA_S_ACCESSED;
+                        new_fa->fa_state &= ~FA_S_ACCESSED;
-                        write_unlock_bh(&fib_lock);
+                        list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
+                        alias_free_mem_rcu(fa);
                        fib_release_info(fi_drop);
                        if (state & FA_S_ACCESSED)
-                          rt_cache_flush(-1);
+                                rt_cache_flush(-1);
-                            goto succeeded;
+                        goto succeeded;
                }
                /* Error if we find a perfect match which
                 * uses the same scope, type, and nexthop
@@ -1284,7 +1208,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                        fa = fa_orig;
        }
        err = -ENOENT;
-        if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
+        if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
                goto out;
        err = -ENOBUFS;
@@ -1297,26 +1221,19 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
        new_fa->fa_type = type;
        new_fa->fa_scope = r->rtm_scope;
        new_fa->fa_state = 0;
-#if 0
-        new_fa->dst  = NULL;
-#endif
        /*
         * Insert new entry to the list.
         */
-        if(!fa_head) {
+        if (!fa_head) {
                fa_head = fib_insert_node(t, &err, key, plen);
                err = 0;
-                if(err) 
+                if (err)
                        goto out_free_new_fa;
        }
-        write_lock_bh(&fib_lock);
+        list_add_tail_rcu(&new_fa->fa_list,
+                          (fa ? &fa->fa_list : fa_head));
-        list_add_tail(&new_fa->fa_list,
-                 (fa ? &fa->fa_list : fa_head));
-        write_unlock_bh(&fib_lock);
        rt_cache_flush(-1);
        rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
@@ -1327,38 +1244,40 @@ out_free_new_fa:
        kmem_cache_free(fn_alias_kmem, new_fa);
 out:
        fib_release_info(fi);
-err:;   
+err:
        return err;
 }
-static inline int check_leaf(struct trie *t, struct leaf *l,  t_key key, int *plen, const struct flowi *flp, 
-                             struct fib_result *res, int *err)
+/* should be clalled with rcu_read_lock */
+static inline int check_leaf(struct trie *t, struct leaf *l,
+                             t_key key, int *plen, const struct flowi *flp,
+                             struct fib_result *res)
 {
-        int i;
+        int err, i;
        t_key mask;
        struct leaf_info *li;
        struct hlist_head *hhead = &l->list;
        struct hlist_node *node;
-        
-        hlist_for_each_entry(li, node, hhead, hlist) {
+        hlist_for_each_entry_rcu(li, node, hhead, hlist) {
                i = li->plen;
                mask = ntohl(inet_make_mask(i));
-                if (l->key != (key & mask)) 
+                if (l->key != (key & mask))
                        continue;
-                if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
+                if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
                        *plen = i;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
                        t->stats.semantic_match_passed++;
 #endif
-                        return 1;
+                        return err;
                }
 #ifdef CONFIG_IP_FIB_TRIE_STATS
                t->stats.semantic_match_miss++;
 #endif
        }
-        return 0;
+        return 1;
 }
 static int
@@ -1369,14 +1288,18 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
        struct node *n;
        struct tnode *pn;
        int pos, bits;
-        t_key key=ntohl(flp->fl4_dst);
+        t_key key = ntohl(flp->fl4_dst);
        int chopped_off;
        t_key cindex = 0;
        int current_prefix_length = KEYLENGTH;
-        n = t->trie;
+        struct tnode *cn;
+        t_key node_prefix, key_prefix, pref_mismatch;
+        int mp;
-        read_lock(&fib_lock);
+        rcu_read_lock();
-        if(!n)
+        n = rcu_dereference(t->trie);
+        if (!n)
                goto failed;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -1385,19 +1308,18 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
        /* Just a leaf? */
        if (IS_LEAF(n)) {
-                if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) )
+                if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
                        goto found;
                goto failed;
        }
        pn = (struct tnode *) n;
        chopped_off = 0;
-        
-        while (pn) {
+        while (pn) {
                pos = pn->pos;
                bits = pn->bits;
-                if(!chopped_off) 
+                if (!chopped_off)
                        cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
                n = tnode_get_child(pn, cindex);
@@ -1409,130 +1331,129 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
                        goto backtrace;
                }
-                if (IS_TNODE(n)) {
+                if (IS_LEAF(n)) {
+                        if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
+                                goto found;
+                        else
+                                goto backtrace;
+                }
 #define HL_OPTIMIZE
 #ifdef HL_OPTIMIZE
-                        struct tnode *cn = (struct tnode *)n;
+                cn = (struct tnode *)n;
-                        t_key node_prefix, key_prefix, pref_mismatch;
-                        int mp;
-                        /*
+                /*
-                         * It's a tnode, and we can do some extra checks here if we 
+                 * It's a tnode, and we can do some extra checks here if we
-                         * like, to avoid descending into a dead-end branch.
+                 * like, to avoid descending into a dead-end branch.
-                         * This tnode is in the parent's child array at index 
+                 * This tnode is in the parent's child array at index
-                         * key[p_pos..p_pos+p_bits] but potentially with some bits 
+                 * key[p_pos..p_pos+p_bits] but potentially with some bits
-                         * chopped off, so in reality the index may be just a 
+                 * chopped off, so in reality the index may be just a
-                         * subprefix, padded with zero at the end.
+                 * subprefix, padded with zero at the end.
-                         * We can also take a look at any skipped bits in this 
+                 * We can also take a look at any skipped bits in this
-                         * tnode - everything up to p_pos is supposed to be ok, 
+                 * tnode - everything up to p_pos is supposed to be ok,
-                         * and the non-chopped bits of the index (se previous
+                 * and the non-chopped bits of the index (se previous
-                         * paragraph) are also guaranteed ok, but the rest is 
+                 * paragraph) are also guaranteed ok, but the rest is
-                         * considered unknown.
+                 * considered unknown.
-                         *
+                 *
-                         * The skipped bits are key[pos+bits..cn->pos].
+                 * The skipped bits are key[pos+bits..cn->pos].
-                         */
+                 */
-                        
-                        /* If current_prefix_length < pos+bits, we are already doing 
-                         * actual prefix  matching, which means everything from 
-                         * pos+(bits-chopped_off) onward must be zero along some 
-                         * branch of this subtree - otherwise there is *no* valid 
-                         * prefix present. Here we can only check the skipped
-                         * bits. Remember, since we have already indexed into the 
-                         * parent's child array, we know that the bits we chopped of 
-                         * *are* zero.
-                         */
-                        /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
+                /* If current_prefix_length < pos+bits, we are already doing
-                        
+                 * actual prefix  matching, which means everything from
-                        if (current_prefix_length < pos+bits) {
+                 * pos+(bits-chopped_off) onward must be zero along some
-                                if (tkey_extract_bits(cn->key, current_prefix_length,
+                 * branch of this subtree - otherwise there is *no* valid
-                                                      cn->pos - current_prefix_length) != 0 ||
+                 * prefix present. Here we can only check the skipped
-                                    !(cn->child[0]))
+                 * bits. Remember, since we have already indexed into the
-                                        goto backtrace;
+                 * parent's child array, we know that the bits we chopped of
-                        }
+                 * *are* zero.
+                 */
-                        /*
+                /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
-                         * If chopped_off=0, the index is fully validated and we 
-                         * only need to look at the skipped bits for this, the new, 
-                         * tnode. What we actually want to do is to find out if
-                         * these skipped bits match our key perfectly, or if we will
-                         * have to count on finding a matching prefix further down, 
-                         * because if we do, we would like to have some way of 
-                         * verifying the existence of such a prefix at this point. 
-                         */
-                        /* The only thing we can do at this point is to verify that
+                if (current_prefix_length < pos+bits) {
-                         * any such matching prefix can indeed be a prefix to our
+                        if (tkey_extract_bits(cn->key, current_prefix_length,
-                         * key, and if the bits in the node we are inspecting that
+                                                cn->pos - current_prefix_length) != 0 ||
-                         * do not match our key are not ZERO, this cannot be true.
+                            !(cn->child[0]))
-                         * Thus, find out where there is a mismatch (before cn->pos)
+                                goto backtrace;
-                         * and verify that all the mismatching bits are zero in the
+                }
-                         * new tnode's key.
-                         */
-                        /* Note: We aren't very concerned about the piece of the key 
+                /*
-                         * that precede pn->pos+pn->bits, since these have already been 
+                 * If chopped_off=0, the index is fully validated and we
-                         * checked. The bits after cn->pos aren't checked since these are 
+                 * only need to look at the skipped bits for this, the new,
-                         * by definition "unknown" at this point. Thus, what we want to 
+                 * tnode. What we actually want to do is to find out if
-                         * see is if we are about to enter the "prefix matching" state, 
+                 * these skipped bits match our key perfectly, or if we will
-                         * and in that case verify that the skipped bits that will prevail 
+                 * have to count on finding a matching prefix further down,
-                         * throughout this subtree are zero, as they have to be if we are 
+                 * because if we do, we would like to have some way of
-                         * to find a matching prefix.
+                 * verifying the existence of such a prefix at this point.
-                         */
+                 */
-                        node_prefix = MASK_PFX(cn->key, cn->pos);
+                /* The only thing we can do at this point is to verify that
-                        key_prefix =  MASK_PFX(key, cn->pos);
+                 * any such matching prefix can indeed be a prefix to our
-                        pref_mismatch = key_prefix^node_prefix;
+                 * key, and if the bits in the node we are inspecting that
-                        mp = 0;
+                 * do not match our key are not ZERO, this cannot be true.
+                 * Thus, find out where there is a mismatch (before cn->pos)
+                 * and verify that all the mismatching bits are zero in the
+                 * new tnode's key.
+                 */
-                        /* In short: If skipped bits in this node do not match the search 
+                /* Note: We aren't very concerned about the piece of the key
-                         * key, enter the "prefix matching" state.directly.
+                 * that precede pn->pos+pn->bits, since these have already been
-                         */
+                 * checked. The bits after cn->pos aren't checked since these are
-                        if (pref_mismatch) {
+                 * by definition "unknown" at this point. Thus, what we want to
-                                while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
+                 * see is if we are about to enter the "prefix matching" state,
-                                        mp++;
+                 * and in that case verify that the skipped bits that will prevail
-                                        pref_mismatch = pref_mismatch <<1;
+                 * throughout this subtree are zero, as they have to be if we are
-                                }
+                 * to find a matching prefix.
-                                key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
+                 */
-                                
-                                if (key_prefix != 0)
+                node_prefix = MASK_PFX(cn->key, cn->pos);
-                                        goto backtrace;
+                key_prefix = MASK_PFX(key, cn->pos);
+                pref_mismatch = key_prefix^node_prefix;
-                                if (current_prefix_length >= cn->pos)
+                mp = 0;
-                                        current_prefix_length=mp;
-                       }
+                /* In short: If skipped bits in this node do not match the search
+                 * key, enter the "prefix matching" state.directly.
+                 */
+                if (pref_mismatch) {
+                        while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
+                                mp++;
+                                pref_mismatch = pref_mismatch <<1;
+                        }
+                        key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
+                        if (key_prefix != 0)
+                                goto backtrace;
+                        if (current_prefix_length >= cn->pos)
+                                current_prefix_length = mp;
+                }
 #endif
-                       pn = (struct tnode *)n; /* Descend */
+                pn = (struct tnode *)n; /* Descend */
-                       chopped_off = 0;
+                chopped_off = 0;
-                       continue;
+                continue;
-                } 
-                if (IS_LEAF(n)) {       
-                        if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
-                                goto found;
-               }
 backtrace:
                chopped_off++;
                /* As zero don't change the child key (cindex) */
-                while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
+                while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
                        chopped_off++;
-                }
                /* Decrease current_... with bits chopped off */
                if (current_prefix_length > pn->pos + pn->bits - chopped_off)
                        current_prefix_length = pn->pos + pn->bits - chopped_off;
-                
                /*
-                 * Either we do the actual chop off according or if we have 
+                 * Either we do the actual chop off according or if we have
                 * chopped off all bits in this tnode walk up to our parent.
                 */
-                if(chopped_off <= pn->bits)
+                if (chopped_off <= pn->bits) {
                        cindex &= ~(1 << (chopped_off-1));
-                else {
+                } else {
-                        if( NODE_PARENT(pn) == NULL)
+                        if (NODE_PARENT(pn) == NULL)
                                goto failed;
-                        
                        /* Get Child's index */
                        cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
                        pn = NODE_PARENT(pn);
@@ -1542,15 +1463,16 @@ backtrace:
                        t->stats.backtrack++;
 #endif
                        goto backtrace;
-                } 
+                }
        }
 failed:
-        ret =  1;
+        ret = 1;
 found:
-        read_unlock(&fib_lock);
+        rcu_read_unlock();
        return ret;
 }
+/* only called from updater side */
 static int trie_leaf_remove(struct trie *t, t_key key)
 {
        t_key cindex;
@@ -1558,54 +1480,51 @@ static int trie_leaf_remove(struct trie *t, t_key key)
        struct node *n = t->trie;
        struct leaf *l;
-        if(trie_debug) 
+        pr_debug("entering trie_leaf_remove(%p)\n", n);
-                printk("entering trie_leaf_remove(%p)\n", n);
        /* Note that in the case skipped bits, those bits are *not* checked!
-         * When we finish this, we will have NULL or a T_LEAF, and the 
+         * When we finish this, we will have NULL or a T_LEAF, and the
         * T_LEAF may or may not match our key.
         */
-        while (n != NULL && IS_TNODE(n)) {
+        while (n != NULL && IS_TNODE(n)) {
                struct tnode *tn = (struct tnode *) n;
                check_tnode(tn);
                n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
-                        if(n && NODE_PARENT(n) != tn) {
+                BUG_ON(n && NODE_PARENT(n) != tn);
-                                printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
+        }
-                                BUG();
-                        }
-        }
        l = (struct leaf *) n;
-        if(!n || !tkey_equals(l->key, key)) 
+        if (!n || !tkey_equals(l->key, key))
                return 0;
-    
-        /* 
+        /*
-         * Key found. 
+         * Key found.
-         * Remove the leaf and rebalance the tree 
+         * Remove the leaf and rebalance the tree
         */
        t->revision++;
        t->size--;
+        preempt_disable();
        tp = NODE_PARENT(n);
        tnode_free((struct tnode *) n);
-        if(tp) {
+        if (tp) {
                cindex = tkey_extract_bits(key, tp->pos, tp->bits);
                put_child(t, (struct tnode *)tp, cindex, NULL);
-                t->trie = trie_rebalance(t, tp);
+                rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
-        }
+        } else
-        else
+                rcu_assign_pointer(t->trie, NULL);
-                t->trie = NULL;
+        preempt_enable();
        return 1;
 }
 static int
 fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-               struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+                struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
 {
        struct trie *t = (struct trie *) tb->tb_data;
        u32 key, mask;
@@ -1614,24 +1533,26 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
        struct fib_alias *fa, *fa_to_delete;
        struct list_head *fa_head;
        struct leaf *l;
+        struct leaf_info *li;
-        if (plen > 32) 
+        if (plen > 32)
                return -EINVAL;
        key = 0;
-        if (rta->rta_dst) 
+        if (rta->rta_dst)
                memcpy(&key, rta->rta_dst, 4);
        key = ntohl(key);
-        mask =  ntohl( inet_make_mask(plen) );
+        mask = ntohl(inet_make_mask(plen));
-        if(key & ~mask)
+        if (key & ~mask)
                return -EINVAL;
        key = key & mask;
        l = fib_find_node(t, key);
-        if(!l)
+        if (!l)
                return -ESRCH;
        fa_head = get_fa_head(l, plen);
@@ -1640,11 +1561,11 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
        if (!fa)
                return -ESRCH;
-        if (trie_debug)
+        pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
-                printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
        fa_to_delete = NULL;
        fa_head = fa->fa_list.prev;
        list_for_each_entry(fa, fa_head, fa_list) {
                struct fib_info *fi = fa->fa_info;
@@ -1663,39 +1584,31 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                }
        }
-        if (fa_to_delete) {
+        if (!fa_to_delete)
-                int kill_li = 0;
+                return -ESRCH;
-                struct leaf_info *li;
-                fa = fa_to_delete;
-                rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
-                l = fib_find_node(t, key);
+        fa = fa_to_delete;
-                li = find_leaf_info(&l->list, plen);
+        rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
-                write_lock_bh(&fib_lock);
+        l = fib_find_node(t, key);
+        li = find_leaf_info(&l->list, plen);
-                list_del(&fa->fa_list);
+        list_del_rcu(&fa->fa_list);
-                if(list_empty(fa_head)) {
+        if (list_empty(fa_head)) {
-                        hlist_del(&li->hlist);
+                hlist_del_rcu(&li->hlist);
-                        kill_li = 1;
+                free_leaf_info(li);
-                }
+        }
-                write_unlock_bh(&fib_lock);
-                
-                if(kill_li)
-                        free_leaf_info(li);
-                if(hlist_empty(&l->list))
+        if (hlist_empty(&l->list))
-                        trie_leaf_remove(t, key);
+                trie_leaf_remove(t, key);
-                if (fa->fa_state & FA_S_ACCESSED)
+        if (fa->fa_state & FA_S_ACCESSED)
-                        rt_cache_flush(-1);
+                rt_cache_flush(-1);
-                fn_free_alias(fa);
+        fib_release_info(fa->fa_info);
-                return 0;
+        alias_free_mem_rcu(fa);
-        }
+        return 0;
-        return -ESRCH;
 }
 static int trie_flush_list(struct trie *t, struct list_head *head)
@@ -1705,14 +1618,11 @@ static int trie_flush_list(struct trie *t, struct list_head *head)
        list_for_each_entry_safe(fa, fa_node, head, fa_list) {
                struct fib_info *fi = fa->fa_info;
-                
-                if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
-                        write_lock_bh(&fib_lock);       
-                        list_del(&fa->fa_list);
-                        write_unlock_bh(&fib_lock); 
-                        fn_free_alias(fa);
+                if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
+                        list_del_rcu(&fa->fa_list);
+                        fib_release_info(fa->fa_info);
+                        alias_free_mem_rcu(fa);
                        found++;
                }
        }
@@ -1727,71 +1637,71 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l)
        struct leaf_info *li = NULL;
        hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
-                        
                found += trie_flush_list(t, &li->falh);
                if (list_empty(&li->falh)) {
+                        hlist_del_rcu(&li->hlist);
-                        write_lock_bh(&fib_lock); 
-                        hlist_del(&li->hlist);
-                        write_unlock_bh(&fib_lock); 
                        free_leaf_info(li);
                }
        }
        return found;
 }
+/* rcu_read_lock needs to be hold by caller from readside */
 static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
 {
        struct node *c = (struct node *) thisleaf;
        struct tnode *p;
        int idx;
+        struct node *trie = rcu_dereference(t->trie);
-        if(c == NULL) {
+        if (c == NULL) {
-                if(t->trie == NULL)
+                if (trie == NULL)
                        return NULL;
-                if (IS_LEAF(t->trie))          /* trie w. just a leaf */
+                if (IS_LEAF(trie))          /* trie w. just a leaf */
-                        return (struct leaf *) t->trie;
+                        return (struct leaf *) trie;
-                p = (struct tnode*) t->trie;  /* Start */
+                p = (struct tnode*) trie;  /* Start */
-        }
+        } else
-        else 
                p = (struct tnode *) NODE_PARENT(c);
        while (p) {
                int pos, last;
                /*  Find the next child of the parent */
-                if(c)
+                if (c)
-                        pos  = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
+                        pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
-                else 
+                else
                        pos = 0;
                last = 1 << p->bits;
-                for(idx = pos; idx < last ; idx++) {
+                for (idx = pos; idx < last ; idx++) {
-                        if( p->child[idx]) {
+                        c = rcu_dereference(p->child[idx]);
-                                /* Decend if tnode */
+                        if (!c)
+                                continue;
-                                while (IS_TNODE(p->child[idx])) {
-                                        p = (struct tnode*) p->child[idx];
+                        /* Decend if tnode */
-                                        idx = 0;
+                        while (IS_TNODE(c)) {
-                                        
+                                p = (struct tnode *) c;
-                                        /* Rightmost non-NULL branch */
+                                idx = 0;
-                                        if( p && IS_TNODE(p) )
-                                                while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++;
+                                /* Rightmost non-NULL branch */
+                                if (p && IS_TNODE(p))
-                                        /* Done with this tnode? */
+                                        while (!(c = rcu_dereference(p->child[idx]))
-                                        if( idx >= (1 << p->bits) || p->child[idx] == NULL ) 
+                                               && idx < (1<<p->bits)) idx++;
-                                                goto up;
-                                }
+                                /* Done with this tnode? */
-                                return (struct leaf*) p->child[idx];
+                                if (idx >= (1 << p->bits) || !c)
+                                        goto up;
                        }
+                        return (struct leaf *) c;
                }
 up:
                /* No more children go up one step  */
-                c = (struct node*) p;
+                c = (struct node *) p;
                p = (struct tnode *) NODE_PARENT(p);
        }
        return NULL; /* Ready. Root of trie */
@@ -1805,23 +1715,24 @@ static int fn_trie_flush(struct fib_table *tb)
        t->revision++;
-        for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
+        rcu_read_lock();
+        for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
                found += trie_flush_leaf(t, l);
                if (ll && hlist_empty(&ll->list))
                        trie_leaf_remove(t, ll->key);
                ll = l;
        }
+        rcu_read_unlock();  
        if (ll && hlist_empty(&ll->list))
                trie_leaf_remove(t, ll->key);
-        if(trie_debug) 
+        pr_debug("trie_flush found=%d\n", found);
-                printk("trie_flush found=%d\n", found);
        return found;
 }
-static int trie_last_dflt=-1;
+static int trie_last_dflt = -1;
 static void
 fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
@@ -1838,33 +1749,33 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
        last_resort = NULL;
        order = -1;
-        read_lock(&fib_lock);
+        rcu_read_lock();
-        
        l = fib_find_node(t, 0);
-        if(!l) 
+        if (!l)
                goto out;
        fa_head = get_fa_head(l, 0);
-        if(!fa_head)
+        if (!fa_head)
                goto out;
-        if (list_empty(fa_head)) 
+        if (list_empty(fa_head))
                goto out;
-        list_for_each_entry(fa, fa_head, fa_list) {
+        list_for_each_entry_rcu(fa, fa_head, fa_list) {
                struct fib_info *next_fi = fa->fa_info;
-                
                if (fa->fa_scope != res->scope ||
                    fa->fa_type != RTN_UNICAST)
                        continue;
-                
                if (next_fi->fib_priority > res->fi->fib_priority)
                        break;
                if (!next_fi->fib_nh[0].nh_gw ||
                    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
                        continue;
                fa->fa_state |= FA_S_ACCESSED;
-                
                if (fi == NULL) {
                        if (next_fi != res->fi)
                                break;
@@ -1902,21 +1813,23 @@ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
        }
        trie_last_dflt = last_idx;
 out:;
-        read_unlock(&fib_lock); 
+        rcu_read_unlock();
 }
-static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, 
+static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
                           struct sk_buff *skb, struct netlink_callback *cb)
 {
        int i, s_i;
        struct fib_alias *fa;
-        u32 xkey=htonl(key);
+        u32 xkey = htonl(key);
-        s_i=cb->args[3];
+        s_i = cb->args[3];
        i = 0;
-        list_for_each_entry(fa, fah, fa_list) {
+        /* rcu_read_lock is hold by caller */
+        list_for_each_entry_rcu(fa, fah, fa_list) {
                if (i < s_i) {
                        i++;
                        continue;
@@ -1944,23 +1857,23 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
                                  fa->fa_info, 0) < 0) {
                        cb->args[3] = i;
                        return -1;
-                        }
+                }
                i++;
        }
-        cb->args[3]=i;
+        cb->args[3] = i;
        return skb->len;
 }
-static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb, 
+static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
                             struct netlink_callback *cb)
 {
        int h, s_h;
        struct list_head *fa_head;
        struct leaf *l = NULL;
-        s_h=cb->args[2];
-        for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
+        s_h = cb->args[2];
+        for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
                if (h < s_h)
                        continue;
                if (h > s_h)
@@ -1968,19 +1881,19 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
                               sizeof(cb->args) - 3*sizeof(cb->args[0]));
                fa_head = get_fa_head(l, plen);
-                
-                if(!fa_head)
+                if (!fa_head)
                        continue;
-                if(list_empty(fa_head))
+                if (list_empty(fa_head))
                        continue;
                if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
-                        cb->args[2]=h;
+                        cb->args[2] = h;
                        return -1;
                }
        }
-        cb->args[2]=h;
+        cb->args[2] = h;
        return skb->len;
 }
@@ -1991,25 +1904,24 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
        s_m = cb->args[1];
-        read_lock(&fib_lock);
+        rcu_read_lock();
-        for (m=0; m<=32; m++) {
+        for (m = 0; m <= 32; m++) {
                if (m < s_m)
                        continue;
                if (m > s_m)
                        memset(&cb->args[2], 0,
-                               sizeof(cb->args) - 2*sizeof(cb->args[0]));
+                                sizeof(cb->args) - 2*sizeof(cb->args[0]));
                if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
                        cb->args[1] = m;
                        goto out;
                }
        }
-        read_unlock(&fib_lock);
+        rcu_read_unlock();
        cb->args[1] = m;
        return skb->len;
- out:
+out:
-        read_unlock(&fib_lock);
+        rcu_read_unlock();
        return -1;
 }
@@ -2048,10 +1960,10 @@ struct fib_table * __init fib_hash_init(int id)
        trie_init(t);
-        if (id == RT_TABLE_LOCAL) 
+        if (id == RT_TABLE_LOCAL)
-                trie_local=t;
+                trie_local = t;
-          else if (id == RT_TABLE_MAIN) 
+        else if (id == RT_TABLE_MAIN)
-                trie_main=t;
+                trie_main = t;
        if (id == RT_TABLE_LOCAL)
                printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
@@ -2063,7 +1975,8 @@ struct fib_table * __init fib_hash_init(int id)
 static void putspace_seq(struct seq_file *seq, int n)
 {
-        while (n--) seq_printf(seq, " ");
+        while (n--)
+                seq_printf(seq, " ");
 }
 static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
@@ -2072,7 +1985,7 @@ static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
                seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
 }
-static void printnode_seq(struct seq_file *seq, int indent, struct node *n, 
+static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
                   int pend, int cindex, int bits)
 {
        putspace_seq(seq, indent);
@@ -2084,49 +1997,41 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
                seq_printf(seq, "%d/", cindex);
                printbin_seq(seq, cindex, bits);
                seq_printf(seq, ": ");
-        }
+        } else
-        else
                seq_printf(seq, "<root>: ");
        seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
-        if (IS_LEAF(n))
-                seq_printf(seq, "key=%d.%d.%d.%d\n", 
-                           n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
-        else {
-                int plen=((struct tnode *)n)->pos;
-                t_key prf=MASK_PFX(n->key, plen);
-                seq_printf(seq, "key=%d.%d.%d.%d/%d\n", 
-                           prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
-        }
        if (IS_LEAF(n)) {
-                struct leaf *l=(struct leaf *)n;
+                struct leaf *l = (struct leaf *)n;
                struct fib_alias *fa;
                int i;
-                for (i=32; i>=0; i--)
-                  if(find_leaf_info(&l->list, i)) {
+                seq_printf(seq, "key=%d.%d.%d.%d\n",
-                        
+                           n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
+                for (i = 32; i >= 0; i--)
+                        if (find_leaf_info(&l->list, i)) {
                                struct list_head *fa_head = get_fa_head(l, i);
-                                
-                                if(!fa_head)
+                                if (!fa_head)
                                        continue;
-                                if(list_empty(fa_head))
+                                if (list_empty(fa_head))
                                        continue;
                                putspace_seq(seq, indent+2);
                                seq_printf(seq, "{/%d...dumping}\n", i);
+                                list_for_each_entry_rcu(fa, fa_head, fa_list) {
-                                list_for_each_entry(fa, fa_head, fa_list) {
                                        putspace_seq(seq, indent+2);
-                                        if (fa->fa_info->fib_nh == NULL) {
-                                                seq_printf(seq, "Error _fib_nh=NULL\n");
-                                                continue;
-                                        }
                                        if (fa->fa_info == NULL) {
                                                seq_printf(seq, "Error fa_info=NULL\n");
                                                continue;
                                        }
+                                        if (fa->fa_info->fib_nh == NULL) {
+                                                seq_printf(seq, "Error _fib_nh=NULL\n");
+                                                continue;
+                                        }
                                        seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
                                              fa->fa_type,
@@ -2134,11 +2039,16 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
                                              fa->fa_tos);
                                }
                        }
-        }
+        } else {
-        else if (IS_TNODE(n)) {
+                struct tnode *tn = (struct tnode *)n;
-                struct tnode *tn=(struct tnode *)n;
+                int plen = ((struct tnode *)n)->pos;
+                t_key prf = MASK_PFX(n->key, plen);
+                seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
+                           prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
                putspace_seq(seq, indent); seq_printf(seq, "|    ");
-                seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
+                seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos));
                printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
                seq_printf(seq, "}\n");
                putspace_seq(seq, indent); seq_printf(seq, "|    ");
@@ -2152,194 +2062,196 @@ static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
 static void trie_dump_seq(struct seq_file *seq, struct trie *t)
 {
-        struct node *n=t->trie;
+        struct node *n;
-        int cindex=0;
+        int cindex = 0;
-        int indent=1;
+        int indent = 1;
-        int pend=0;
+        int pend = 0;
        int depth = 0;
+        struct tnode *tn;
-        read_lock(&fib_lock);
+        rcu_read_lock();
+        n = rcu_dereference(t->trie);
        seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
-        if (n) {
-                printnode_seq(seq, indent, n, pend, cindex, 0);
-                if (IS_TNODE(n)) {
-                        struct tnode *tn=(struct tnode *)n;
-                        pend = tn->pos+tn->bits;
-                        putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
-                        indent += 3;
-                        depth++;
-                        while (tn && cindex < (1 << tn->bits)) {
-                                if (tn->child[cindex]) {
-                                        
-                                        /* Got a child */
-                                        
-                                        printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
-                                        if (IS_LEAF(tn->child[cindex])) { 
-                                                cindex++;
-                                                
-                                        }
-                                        else {
-                                                /* 
-                                                 * New tnode. Decend one level 
-                                                 */
-                                                
-                                                depth++;
-                                                n=tn->child[cindex];
-                                                tn=(struct tnode *)n;
-                                                pend=tn->pos+tn->bits;
-                                                putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
-                                                indent+=3;
-                                                cindex=0;
-                                        }
-                                }
-                                else 
-                                        cindex++;
+        if (!n) {
+                seq_printf(seq, "------ trie is empty\n");
+                rcu_read_unlock();
+                return;
+        }
+        printnode_seq(seq, indent, n, pend, cindex, 0);
+        if (!IS_TNODE(n)) {
+                rcu_read_unlock();
+                return;
+        }
+        tn = (struct tnode *)n;
+        pend = tn->pos+tn->bits;
+        putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
+        indent += 3;
+        depth++;
+        while (tn && cindex < (1 << tn->bits)) {
+                struct node *child = rcu_dereference(tn->child[cindex]);
+                if (!child)
+                        cindex++;
+                else {
+                        /* Got a child */
+                        printnode_seq(seq, indent, child, pend,
+                                      cindex, tn->bits);
+                        if (IS_LEAF(child))
+                                cindex++;
+                        else {
                                /*
-                                 * Test if we are done 
+                                 * New tnode. Decend one level
                                 */
-                                
-                                while (cindex >= (1 << tn->bits)) {
-                                        /*
+                                depth++;
-                                         * Move upwards and test for root
+                                n = child;
-                                         * pop off all traversed  nodes
+                                tn = (struct tnode *)n;
-                                         */
+                                pend = tn->pos+tn->bits;
-                                        
+                                putspace_seq(seq, indent);
-                                        if (NODE_PARENT(tn) == NULL) {
+                                seq_printf(seq, "\\--\n");
-                                                tn = NULL;
+                                indent += 3;
-                                                n = NULL;
+                                cindex = 0;
-                                                break;
-                                        }
-                                        else {
-                                                cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
-                                                tn = NODE_PARENT(tn);
-                                                cindex++;
-                                                n=(struct node *)tn;
-                                                pend=tn->pos+tn->bits;
-                                                indent-=3;
-                                                depth--;
-                                        }
-                                }
                        }
                }
-                else n = NULL;
-        }
-        else seq_printf(seq, "------ trie is empty\n");
-        read_unlock(&fib_lock);
+                /*
+                 * Test if we are done
+                 */
+                while (cindex >= (1 << tn->bits)) {
+                        /*
+                         * Move upwards and test for root
+                         * pop off all traversed  nodes
+                         */
+                        if (NODE_PARENT(tn) == NULL) {
+                                tn = NULL;
+                                break;
+                        }
+                        cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
+                        cindex++;
+                        tn = NODE_PARENT(tn);
+                        pend = tn->pos + tn->bits;
+                        indent -= 3;
+                        depth--;
+                }
+        }
+        rcu_read_unlock();
 }
 static struct trie_stat *trie_stat_new(void)
 {
-        struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
+        struct trie_stat *s;
        int i;
-        
-        if(s) {
+        s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
-                s->totdepth = 0;
+        if (!s)
-                s->maxdepth = 0;
+                return NULL;
-                s->tnodes = 0;
-                s->leaves = 0;
+        s->totdepth = 0;
-                s->nullpointers = 0;
+        s->maxdepth = 0;
-                
+        s->tnodes = 0;
-                for(i=0; i< MAX_CHILDS; i++)
+        s->leaves = 0;
-                        s->nodesizes[i] = 0;
+        s->nullpointers = 0;
-        }
+        for (i = 0; i < MAX_CHILDS; i++)
+                s->nodesizes[i] = 0;
        return s;
-}    
+}
 static struct trie_stat *trie_collect_stats(struct trie *t)
 {
-        struct node *n=t->trie;
+        struct node *n;
        struct trie_stat *s = trie_stat_new();
        int cindex = 0;
-        int indent = 1;
        int pend = 0;
        int depth = 0;
-        read_lock(&fib_lock);           
+        if (!s)
+                return NULL;
-        if (s) {
+        rcu_read_lock();
-                if (n) {
+        n = rcu_dereference(t->trie);
-                        if (IS_TNODE(n)) {
-                                struct tnode *tn = (struct tnode *)n;
-                                pend=tn->pos+tn->bits;
-                                indent += 3;
-                                s->nodesizes[tn->bits]++;
-                                depth++;
-                                while (tn && cindex < (1 << tn->bits)) {
+        if (!n)
-                                        if (tn->child[cindex]) {
+                return s;
-                                                /* Got a child */
-                                        
+        if (IS_TNODE(n)) {
-                                                if (IS_LEAF(tn->child[cindex])) { 
+                struct tnode *tn = (struct tnode *)n;
-                                                        cindex++;
+                pend = tn->pos+tn->bits;
-                                                
+                s->nodesizes[tn->bits]++;
-                                                        /* stats */
+                depth++;
-                                                        if (depth > s->maxdepth)
-                                                                s->maxdepth = depth;
-                                                        s->totdepth += depth;
-                                                        s->leaves++;
-                                                }
-                                        
-                                                else {
-                                                        /* 
-                                                         * New tnode. Decend one level 
-                                                         */
-                                                
-                                                        s->tnodes++;
-                                                        s->nodesizes[tn->bits]++;
-                                                        depth++;
-                                                
-                                                        n = tn->child[cindex];
-                                                        tn = (struct tnode *)n;
-                                                        pend = tn->pos+tn->bits;
-                                                        indent += 3;
-                                                        cindex = 0;
-                                                }
-                                        }
-                                        else {
-                                                cindex++;
-                                                s->nullpointers++; 
-                                        }
+                while (tn && cindex < (1 << tn->bits)) {
+                        struct node *ch = rcu_dereference(tn->child[cindex]);
+                        if (ch) {
+                                /* Got a child */
+                                if (IS_LEAF(tn->child[cindex])) {
+                                        cindex++;
+                                        /* stats */
+                                        if (depth > s->maxdepth)
+                                                s->maxdepth = depth;
+                                        s->totdepth += depth;
+                                        s->leaves++;
+                                } else {
                                        /*
-                                         * Test if we are done 
+                                         * New tnode. Decend one level
                                         */
-                                
-                                        while (cindex >= (1 << tn->bits)) {
+                                        s->tnodes++;
+                                        s->nodesizes[tn->bits]++;
-                                                /*
+                                        depth++;
-                                                 * Move upwards and test for root
-                                                 * pop off all traversed  nodes
+                                        n = ch;
-                                                 */
+                                        tn = (struct tnode *)n;
+                                        pend = tn->pos+tn->bits;
-                                                
-                                                if (NODE_PARENT(tn) == NULL) {
+                                        cindex = 0;
-                                                        tn = NULL;
-                                                        n = NULL;
-                                                        break;
-                                                }
-                                                else {
-                                                        cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
-                                                        tn = NODE_PARENT(tn);
-                                                        cindex++; 
-                                                        n = (struct node *)tn;
-                                                        pend=tn->pos+tn->bits;
-                                                        indent -= 3;
-                                                        depth--;
-                                                }
-                                        }
                                }
+                        } else {
+                                cindex++;
+                                s->nullpointers++;
                        }
-                        else n = NULL;
+                        /*
+                         * Test if we are done
+                         */
+                        while (cindex >= (1 << tn->bits)) {
+                                /*
+                                 * Move upwards and test for root
+                                 * pop off all traversed  nodes
+                                 */
+                                if (NODE_PARENT(tn) == NULL) {
+                                        tn = NULL;
+                                        n = NULL;
+                                        break;
+                                }
+                                cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
+                                tn = NODE_PARENT(tn);
+                                cindex++;
+                                n = (struct node *)tn;
+                                pend = tn->pos+tn->bits;
+                                depth--;
+                        }
                }
        }
-        read_unlock(&fib_lock);         
+        rcu_read_unlock();
        return s;
 }
@@ -2357,17 +2269,22 @@ static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
 static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
 {
-        void *v = NULL;
+        if (!ip_fib_main_table)
+                return NULL;
-        if (ip_fib_main_table)
+        if (*pos)
-                v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
+                return fib_triestat_get_next(seq);
-        return v;
+        else
+                return SEQ_START_TOKEN;
 }
 static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        ++*pos;
-        return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
+        if (v == SEQ_START_TOKEN)
+                return fib_triestat_get_first(seq);
+        else
+                return fib_triestat_get_next(seq);
 }
 static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
@@ -2375,7 +2292,7 @@ static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
 }
-/* 
+/*
 *      This outputs /proc/net/fib_triestats
 *
 *      It always works in backward compatibility mode.
@@ -2386,22 +2303,22 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
 {
        int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
        int i, max, pointers;
-        struct trie_stat *stat;
+        struct trie_stat *stat;
        int avdepth;
        stat = trie_collect_stats(t);
-        bytes=0;
+        bytes = 0;
        seq_printf(seq, "trie=%p\n", t);
        if (stat) {
                if (stat->leaves)
-                        avdepth=stat->totdepth*100 / stat->leaves;
+                        avdepth = stat->totdepth*100 / stat->leaves;
                else
-                        avdepth=0;
+                        avdepth = 0;
-                seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
+                seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100);
                seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
-                                
                seq_printf(seq, "Leaves: %d\n", stat->leaves);
                bytes += sizeof(struct leaf) * stat->leaves;
                seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
@@ -2413,7 +2330,7 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
                        max--;
                pointers = 0;
-                for (i = 1; i <= max; i++) 
+                for (i = 1; i <= max; i++)
                        if (stat->nodesizes[i] != 0) {
                                seq_printf(seq, "  %d: %d",  i, stat->nodesizes[i]);
                                pointers += (1<<i) * stat->nodesizes[i];
@@ -2444,30 +2361,28 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 {
        char bf[128];
-    
        if (v == SEQ_START_TOKEN) {
-                seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", 
+                seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
                           sizeof(struct leaf), sizeof(struct tnode));
-                if (trie_local) 
+                if (trie_local)
                        collect_and_show(trie_local, seq);
-                if (trie_main) 
+                if (trie_main)
                        collect_and_show(trie_main, seq);
-        }
+        } else {
-        else {
+                snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400);
-                snprintf(bf, sizeof(bf),
-                         "*\t%08X\t%08X", 200, 400);
-                
                seq_printf(seq, "%-127s\n", bf);
        }
        return 0;
 }
 static struct seq_operations fib_triestat_seq_ops = {
-        .start  = fib_triestat_seq_start,
+        .start = fib_triestat_seq_start,
-        .next   = fib_triestat_seq_next,
+        .next  = fib_triestat_seq_next,
-        .stop   = fib_triestat_seq_stop,
+        .stop  = fib_triestat_seq_stop,
-        .show   = fib_triestat_seq_show,
+        .show  = fib_triestat_seq_show,
 };
 static int fib_triestat_seq_open(struct inode *inode, struct file *file)
@@ -2479,7 +2394,7 @@ static int fib_triestat_seq_open(struct inode *inode, struct file *file)
        if (rc)
                goto out_kfree;
-        seq          = file->private_data;
+        seq = file->private_data;
 out:
        return rc;
 out_kfree:
@@ -2487,11 +2402,11 @@ out_kfree:
 }
 static struct file_operations fib_triestat_seq_fops = {
-        .owner          = THIS_MODULE,
+        .owner  = THIS_MODULE,
-        .open           = fib_triestat_seq_open,
+        .open   = fib_triestat_seq_open,
-        .read           = seq_read,
+        .read   = seq_read,
-        .llseek         = seq_lseek,
+        .llseek = seq_lseek,
-        .release        = seq_release_private,
+        .release = seq_release_private,
 };
 int __init fib_stat_proc_init(void)
@@ -2518,25 +2433,30 @@ static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
 {
-        void *v = NULL;
+        if (!ip_fib_main_table)
+                return NULL;
-        if (ip_fib_main_table)
+        if (*pos)
-                v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
+                return fib_trie_get_next(seq);
-        return v;
+        else
+                return SEQ_START_TOKEN;
 }
 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        ++*pos;
-        return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
+        if (v == SEQ_START_TOKEN)
+                return fib_trie_get_first(seq);
+        else
+                return fib_trie_get_next(seq);
 }
 static void fib_trie_seq_stop(struct seq_file *seq, void *v)
 {
 }
-/* 
+/*
 *      This outputs /proc/net/fib_trie.
 *
 *      It always works in backward compatibility mode.
@@ -2548,14 +2468,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
        char bf[128];
        if (v == SEQ_START_TOKEN) {
-                if (trie_local) 
+                if (trie_local)
                        trie_dump_seq(seq, trie_local);
-                if (trie_main) 
+                if (trie_main)
                        trie_dump_seq(seq, trie_main);
-        }
+        } else {
-        else {
                snprintf(bf, sizeof(bf),
                         "*\t%08X\t%08X", 200, 400);
                seq_printf(seq, "%-127s\n", bf);
@@ -2565,10 +2483,10 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 }
 static struct seq_operations fib_trie_seq_ops = {
-        .start  = fib_trie_seq_start,
+        .start = fib_trie_seq_start,
-        .next   = fib_trie_seq_next,
+        .next  = fib_trie_seq_next,
-        .stop   = fib_trie_seq_stop,
+        .stop  = fib_trie_seq_stop,
-        .show   = fib_trie_seq_show,
+        .show  = fib_trie_seq_show,
 };
 static int fib_trie_seq_open(struct inode *inode, struct file *file)
@@ -2580,7 +2498,7 @@ static int fib_trie_seq_open(struct inode *inode, struct file *file)
        if (rc)
                goto out_kfree;
-        seq          = file->private_data;
+        seq = file->private_data;
 out:
        return rc;
 out_kfree:
@@ -2588,11 +2506,11 @@ out_kfree:
 }
 static struct file_operations fib_trie_seq_fops = {
-        .owner          = THIS_MODULE,
+        .owner  = THIS_MODULE,
-        .open           = fib_trie_seq_open,
+        .open   = fib_trie_seq_open,
-        .read           = seq_read,
+        .read   = seq_read,
-        .llseek         = seq_lseek,
+        .llseek = seq_lseek,
-        .release        = seq_release_private,
+        .release= seq_release_private,
 };
 int __init fib_proc_init(void)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 279f57abfecb..24eb56ae1b5a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -114,7 +114,7 @@ struct icmp_bxm {
 /*
 *      Statistics
 */
-DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly;
 /* An array of errno for error messages from dest unreach. */
 /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
@@ -349,12 +349,12 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 {
        struct sk_buff *skb;
-        ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+        if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
-                       icmp_param->data_len+icmp_param->head_len,
+                           icmp_param->data_len+icmp_param->head_len,
-                       icmp_param->head_len,
+                           icmp_param->head_len,
-                       ipc, rt, MSG_DONTWAIT);
+                           ipc, rt, MSG_DONTWAIT) < 0)
+                ip_flush_pending_frames(icmp_socket->sk);
-        if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
+        else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
                struct icmphdr *icmph = skb->h.icmph;
                unsigned int csum = 0;
                struct sk_buff *skb1;
@@ -627,11 +627,10 @@ static void icmp_unreach(struct sk_buff *skb)
                        break;
                case ICMP_FRAG_NEEDED:
                        if (ipv4_config.no_pmtu_disc) {
-                                LIMIT_NETDEBUG(
+                                LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: "
-                                        printk(KERN_INFO "ICMP: %u.%u.%u.%u: "
                                                         "fragmentation needed "
                                                         "and DF set.\n",
-                                               NIPQUAD(iph->daddr)));
+                                               NIPQUAD(iph->daddr));
                        } else {
                                info = ip_rt_frag_needed(iph,
                                                     ntohs(icmph->un.frag.mtu));
@@ -640,10 +639,9 @@ static void icmp_unreach(struct sk_buff *skb)
                        }
                        break;
                case ICMP_SR_FAILED:
-                        LIMIT_NETDEBUG(
+                        LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
-                                printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source "
                                                 "Route Failed.\n",
-                                       NIPQUAD(iph->daddr)));
+                                       NIPQUAD(iph->daddr));
                        break;
                default:
                        break;
@@ -936,8 +934,7 @@ int icmp_rcv(struct sk_buff *skb)
        case CHECKSUM_HW:
                if (!(u16)csum_fold(skb->csum))
                        break;
-                NETDEBUG(if (net_ratelimit())
+                LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n");
-                                printk(KERN_DEBUG "icmp v4 hw csum failure\n"));
        case CHECKSUM_NONE:
                if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
                        goto error;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5088f90835ae..44607f4767b8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -904,7 +904,7 @@ int igmp_rcv(struct sk_buff *skb)
        case IGMP_MTRACE_RESP:
                break;
        default:
-                NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type));
+                NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type);
        }
        in_dev_put(in_dev);
        kfree_skb(skb);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
new file mode 100644
index 000000000000..fe3c6d3d0c91
--- /dev/null
+++ b/net/ipv4/inet_connection_sock.c
@@ -0,0 +1,641 @@
+/*
+ * INET         An implementation of the TCP/IP protocol suite for the LINUX
+ *              operating system.  INET is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              Support for INET connection oriented protocols.
+ *
+ * Authors:     See the TCP sources
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or(at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/jhash.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet_timewait_sock.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/tcp_states.h>
+#include <net/xfrm.h>
+#ifdef INET_CSK_DEBUG
+const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
+EXPORT_SYMBOL(inet_csk_timer_bug_msg);
+#endif
+/*
+ * This array holds the first and last local port number.
+ * For high-usage systems, use sysctl to change this to
+ * 32768-61000
+ */
+int sysctl_local_port_range[2] = { 1024, 4999 };
+static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb)
+{
+        const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
+        struct sock *sk2;
+        struct hlist_node *node;
+        int reuse = sk->sk_reuse;
+        sk_for_each_bound(sk2, node, &tb->owners) {
+                if (sk != sk2 &&
+                    !inet_v6_ipv6only(sk2) &&
+                    (!sk->sk_bound_dev_if ||
+                     !sk2->sk_bound_dev_if ||
+                     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+                        if (!reuse || !sk2->sk_reuse ||
+                            sk2->sk_state == TCP_LISTEN) {
+                                const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
+                                if (!sk2_rcv_saddr || !sk_rcv_saddr ||
+                                    sk2_rcv_saddr == sk_rcv_saddr)
+                                        break;
+                        }
+                }
+        }
+        return node != NULL;
+}
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ */
+int inet_csk_get_port(struct inet_hashinfo *hashinfo,
+                      struct sock *sk, unsigned short snum)
+{
+        struct inet_bind_hashbucket *head;
+        struct hlist_node *node;
+        struct inet_bind_bucket *tb;
+        int ret;
+        local_bh_disable();
+        if (!snum) {
+                int low = sysctl_local_port_range[0];
+                int high = sysctl_local_port_range[1];
+                int remaining = (high - low) + 1;
+                int rover;
+                spin_lock(&hashinfo->portalloc_lock);
+                if (hashinfo->port_rover < low)
+                        rover = low;
+                else
+                        rover = hashinfo->port_rover;
+                do {
+                        rover++;
+                        if (rover > high)
+                                rover = low;
+                        head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
+                        spin_lock(&head->lock);
+                        inet_bind_bucket_for_each(tb, node, &head->chain)
+                                if (tb->port == rover)
+                                        goto next;
+                        break;
+                next:
+                        spin_unlock(&head->lock);
+                } while (--remaining > 0);
+                hashinfo->port_rover = rover;
+                spin_unlock(&hashinfo->portalloc_lock);
+                /* Exhausted local port range during search?  It is not
+                 * possible for us to be holding one of the bind hash
+                 * locks if this test triggers, because if 'remaining'
+                 * drops to zero, we broke out of the do/while loop at
+                 * the top level, not from the 'break;' statement.
+                 */
+                ret = 1;
+                if (remaining <= 0)
+                        goto fail;
+                /* OK, here is the one we will use.  HEAD is
+                 * non-NULL and we hold it's mutex.
+                 */
+                snum = rover;
+        } else {
+                head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
+                spin_lock(&head->lock);
+                inet_bind_bucket_for_each(tb, node, &head->chain)
+                        if (tb->port == snum)
+                                goto tb_found;
+        }
+        tb = NULL;
+        goto tb_not_found;
+tb_found:
+        if (!hlist_empty(&tb->owners)) {
+                if (sk->sk_reuse > 1)
+                        goto success;
+                if (tb->fastreuse > 0 &&
+                    sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
+                        goto success;
+                } else {
+                        ret = 1;
+                        if (inet_csk_bind_conflict(sk, tb))
+                                goto fail_unlock;
+                }
+        }
+tb_not_found:
+        ret = 1;
+        if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
+                goto fail_unlock;
+        if (hlist_empty(&tb->owners)) {
+                if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+                        tb->fastreuse = 1;
+                else
+                        tb->fastreuse = 0;
+        } else if (tb->fastreuse &&
+                   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
+                tb->fastreuse = 0;
+success:
+        if (!inet_csk(sk)->icsk_bind_hash)
+                inet_bind_hash(sk, tb, snum);
+        BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
+        ret = 0;
+fail_unlock:
+        spin_unlock(&head->lock);
+fail:
+        local_bh_enable();
+        return ret;
+}
+EXPORT_SYMBOL_GPL(inet_csk_get_port);
+/*
+ * Wait for an incoming connection, avoid race conditions. This must be called
+ * with the socket locked.
+ */
+static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        DEFINE_WAIT(wait);
+        int err;
+        /*
+         * True wake-one mechanism for incoming connections: only
+         * one process gets woken up, not the 'whole herd'.
+         * Since we do not 'race & poll' for established sockets
+         * anymore, the common case will execute the loop only once.
+         *
+         * Subtle issue: "add_wait_queue_exclusive()" will be added
+         * after any current non-exclusive waiters, and we know that
+         * it will always _stay_ after any new non-exclusive waiters
+         * because all non-exclusive waiters are added at the
+         * beginning of the wait-queue. As such, it's ok to "drop"
+         * our exclusiveness temporarily when we get woken up without
+         * having to remove and re-insert us on the wait queue.
+         */
+        for (;;) {
+                prepare_to_wait_exclusive(sk->sk_sleep, &wait,
+                                          TASK_INTERRUPTIBLE);
+                release_sock(sk);
+                if (reqsk_queue_empty(&icsk->icsk_accept_queue))
+                        timeo = schedule_timeout(timeo);
+                lock_sock(sk);
+                err = 0;
+                if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
+                        break;
+                err = -EINVAL;
+                if (sk->sk_state != TCP_LISTEN)
+                        break;
+                err = sock_intr_errno(timeo);
+                if (signal_pending(current))
+                        break;
+                err = -EAGAIN;
+                if (!timeo)
+                        break;
+        }
+        finish_wait(sk->sk_sleep, &wait);
+        return err;
+}
+/*
+ * This will accept the next outstanding connection.
+ */
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        struct sock *newsk;
+        int error;
+        lock_sock(sk);
+        /* We need to make sure that this socket is listening,
+         * and that it has something pending.
+         */
+        error = -EINVAL;
+        if (sk->sk_state != TCP_LISTEN)
+                goto out_err;
+        /* Find already established connection */
+        if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
+                long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+                /* If this is a non blocking socket don't sleep */
+                error = -EAGAIN;
+                if (!timeo)
+                        goto out_err;
+                error = inet_csk_wait_for_connect(sk, timeo);
+                if (error)
+                        goto out_err;
+        }
+        newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
+        BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
+out:
+        release_sock(sk);
+        return newsk;
+out_err:
+        newsk = NULL;
+        *err = error;
+        goto out;
+}
+EXPORT_SYMBOL(inet_csk_accept);
+/*
+ * Using different timers for retransmit, delayed acks and probes
+ * We may wish use just one timer maintaining a list of expire jiffies 
+ * to optimize.
+ */
+void inet_csk_init_xmit_timers(struct sock *sk,
+                               void (*retransmit_handler)(unsigned long),
+                               void (*delack_handler)(unsigned long),
+                               void (*keepalive_handler)(unsigned long))
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        init_timer(&icsk->icsk_retransmit_timer);
+        init_timer(&icsk->icsk_delack_timer);
+        init_timer(&sk->sk_timer);
+        icsk->icsk_retransmit_timer.function = retransmit_handler;
+        icsk->icsk_delack_timer.function     = delack_handler;
+        sk->sk_timer.function                = keepalive_handler;
+        icsk->icsk_retransmit_timer.data = 
+                icsk->icsk_delack_timer.data =
+                        sk->sk_timer.data  = (unsigned long)sk;
+        icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+}
+EXPORT_SYMBOL(inet_csk_init_xmit_timers);
+void inet_csk_clear_xmit_timers(struct sock *sk)
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
+        sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
+        sk_stop_timer(sk, &icsk->icsk_delack_timer);
+        sk_stop_timer(sk, &sk->sk_timer);
+}
+EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+void inet_csk_delete_keepalive_timer(struct sock *sk)
+{
+        sk_stop_timer(sk, &sk->sk_timer);
+}
+EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
+void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
+{
+        sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+}
+EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
+struct dst_entry* inet_csk_route_req(struct sock *sk,
+                                     const struct request_sock *req)
+{
+        struct rtable *rt;
+        const struct inet_request_sock *ireq = inet_rsk(req);
+        struct ip_options *opt = inet_rsk(req)->opt;
+        struct flowi fl = { .oif = sk->sk_bound_dev_if,
+                            .nl_u = { .ip4_u =
+                                      { .daddr = ((opt && opt->srr) ?
+                                                  opt->faddr :
+                                                  ireq->rmt_addr),
+                                        .saddr = ireq->loc_addr,
+                                        .tos = RT_CONN_FLAGS(sk) } },
+                            .proto = sk->sk_protocol,
+                            .uli_u = { .ports =
+                                       { .sport = inet_sk(sk)->sport,
+                                         .dport = ireq->rmt_port } } };
+        if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+                IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+                return NULL;
+        }
+        if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
+                ip_rt_put(rt);
+                IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+                return NULL;
+        }
+        return &rt->u.dst;
+}
+EXPORT_SYMBOL_GPL(inet_csk_route_req);
+static inline u32 inet_synq_hash(const u32 raddr, const u16 rport,
+                                 const u32 rnd, const u16 synq_hsize)
+{
+        return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1);
+}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
+#else
+#define AF_INET_FAMILY(fam) 1
+#endif
+struct request_sock *inet_csk_search_req(const struct sock *sk,
+                                         struct request_sock ***prevp,
+                                         const __u16 rport, const __u32 raddr,
+                                         const __u32 laddr)
+{
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+        struct request_sock *req, **prev;
+        for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
+                                                    lopt->nr_table_entries)];
+             (req = *prev) != NULL;
+             prev = &req->dl_next) {
+                const struct inet_request_sock *ireq = inet_rsk(req);
+                if (ireq->rmt_port == rport &&
+                    ireq->rmt_addr == raddr &&
+                    ireq->loc_addr == laddr &&
+                    AF_INET_FAMILY(req->rsk_ops->family)) {
+                        BUG_TRAP(!req->sk);
+                        *prevp = prev;
+                        break;
+                }
+        }
+        return req;
+}
+EXPORT_SYMBOL_GPL(inet_csk_search_req);
+void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+                                   const unsigned timeout)
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
+        const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
+                                     lopt->hash_rnd, lopt->nr_table_entries);
+        reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
+        inet_csk_reqsk_queue_added(sk, timeout);
+}
+/* Only thing we need from tcp.h */
+extern int sysctl_tcp_synack_retries;
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
+void inet_csk_reqsk_queue_prune(struct sock *parent,
+                                const unsigned long interval,
+                                const unsigned long timeout,
+                                const unsigned long max_rto)
+{
+        struct inet_connection_sock *icsk = inet_csk(parent);
+        struct request_sock_queue *queue = &icsk->icsk_accept_queue;
+        struct listen_sock *lopt = queue->listen_opt;
+        int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+        int thresh = max_retries;
+        unsigned long now = jiffies;
+        struct request_sock **reqp, *req;
+        int i, budget;
+        if (lopt == NULL || lopt->qlen == 0)
+                return;
+        /* Normally all the openreqs are young and become mature
+         * (i.e. converted to established socket) for first timeout.
+         * If synack was not acknowledged for 3 seconds, it means
+         * one of the following things: synack was lost, ack was lost,
+         * rtt is high or nobody planned to ack (i.e. synflood).
+         * When server is a bit loaded, queue is populated with old
+         * open requests, reducing effective size of queue.
+         * When server is well loaded, queue size reduces to zero
+         * after several minutes of work. It is not synflood,
+         * it is normal operation. The solution is pruning
+         * too old entries overriding normal timeout, when
+         * situation becomes dangerous.
+         *
+         * Essentially, we reserve half of room for young
+         * embrions; and abort old ones without pity, if old
+         * ones are about to clog our table.
+         */
+        if (lopt->qlen>>(lopt->max_qlen_log-1)) {
+                int young = (lopt->qlen_young<<1);
+                while (thresh > 2) {
+                        if (lopt->qlen < young)
+                                break;
+                        thresh--;
+                        young <<= 1;
+                }
+        }
+        if (queue->rskq_defer_accept)
+                max_retries = queue->rskq_defer_accept;
+        budget = 2 * (lopt->nr_table_entries / (timeout / interval));
+        i = lopt->clock_hand;
+        do {
+                reqp=&lopt->syn_table[i];
+                while ((req = *reqp) != NULL) {
+                        if (time_after_eq(now, req->expires)) {
+                                if ((req->retrans < thresh ||
+                                     (inet_rsk(req)->acked && req->retrans < max_retries))
+                                    && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
+                                        unsigned long timeo;
+                                        if (req->retrans++ == 0)
+                                                lopt->qlen_young--;
+                                        timeo = min((timeout << req->retrans), max_rto);
+                                        req->expires = now + timeo;
+                                        reqp = &req->dl_next;
+                                        continue;
+                                }
+                                /* Drop this request */
+                                inet_csk_reqsk_queue_unlink(parent, req, reqp);
+                                reqsk_queue_removed(queue, req);
+                                reqsk_free(req);
+                                continue;
+                        }
+                        reqp = &req->dl_next;
+                }
+                i = (i + 1) & (lopt->nr_table_entries - 1);
+        } while (--budget > 0);
+        lopt->clock_hand = i;
+        if (lopt->qlen)
+                inet_csk_reset_keepalive_timer(parent, interval);
+}
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
+struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
+                            const unsigned int __nocast priority)
+{
+        struct sock *newsk = sk_clone(sk, priority);
+        if (newsk != NULL) {
+                struct inet_connection_sock *newicsk = inet_csk(newsk);
+                newsk->sk_state = TCP_SYN_RECV;
+                newicsk->icsk_bind_hash = NULL;
+                inet_sk(newsk)->dport = inet_rsk(req)->rmt_port;
+                newsk->sk_write_space = sk_stream_write_space;
+                newicsk->icsk_retransmits = 0;
+                newicsk->icsk_backoff     = 0;
+                newicsk->icsk_probes_out  = 0;
+                /* Deinitialize accept_queue to trap illegal accesses. */
+                memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+        }
+        return newsk;
+}
+EXPORT_SYMBOL_GPL(inet_csk_clone);
+/*
+ * At this point, there should be no process reference to this
+ * socket, and thus no user references at all.  Therefore we
+ * can assume the socket waitqueue is inactive and nobody will
+ * try to jump onto it.
+ */
+void inet_csk_destroy_sock(struct sock *sk)
+{
+        BUG_TRAP(sk->sk_state == TCP_CLOSE);
+        BUG_TRAP(sock_flag(sk, SOCK_DEAD));
+        /* It cannot be in hash table! */
+        BUG_TRAP(sk_unhashed(sk));
+        /* If it has not 0 inet_sk(sk)->num, it must be bound */
+        BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash);
+        sk->sk_prot->destroy(sk);
+        sk_stream_kill_queues(sk);
+        xfrm_sk_free_policy(sk);
+        sk_refcnt_debug_release(sk);
+        atomic_dec(sk->sk_prot->orphan_count);
+        sock_put(sk);
+}
+EXPORT_SYMBOL(inet_csk_destroy_sock);
+int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
+{
+        struct inet_sock *inet = inet_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
+        if (rc != 0)
+                return rc;
+        sk->sk_max_ack_backlog = 0;
+        sk->sk_ack_backlog = 0;
+        inet_csk_delack_init(sk);
+        /* There is race window here: we announce ourselves listening,
+         * but this transition is still not validated by get_port().
+         * It is OK, because this socket enters to hash table only
+         * after validation is complete.
+         */
+        sk->sk_state = TCP_LISTEN;
+        if (!sk->sk_prot->get_port(sk, inet->num)) {
+                inet->sport = htons(inet->num);
+                sk_dst_reset(sk);
+                sk->sk_prot->hash(sk);
+                return 0;
+        }
+        sk->sk_state = TCP_CLOSE;
+        __reqsk_queue_destroy(&icsk->icsk_accept_queue);
+        return -EADDRINUSE;
+}
+EXPORT_SYMBOL_GPL(inet_csk_listen_start);
+/*
+ *      This routine closes sockets which have been at least partially
+ *      opened, but not yet accepted.
+ */
+void inet_csk_listen_stop(struct sock *sk)
+{
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        struct request_sock *acc_req;
+        struct request_sock *req;
+        inet_csk_delete_keepalive_timer(sk);
+        /* make all the listen_opt local to us */
+        acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
+        /* Following specs, it would be better either to send FIN
+         * (and enter FIN-WAIT-1, it is normal close)
+         * or to send active reset (abort).
+         * Certainly, it is pretty dangerous while synflood, but it is
+         * bad justification for our negligence 8)
+         * To be honest, we are not able to make either
+         * of the variants now.                 --ANK
+         */
+        reqsk_queue_destroy(&icsk->icsk_accept_queue);
+        while ((req = acc_req) != NULL) {
+                struct sock *child = req->sk;
+                acc_req = req->dl_next;
+                local_bh_disable();
+                bh_lock_sock(child);
+                BUG_TRAP(!sock_owned_by_user(child));
+                sock_hold(child);
+                sk->sk_prot->disconnect(child, O_NONBLOCK);
+                sock_orphan(child);
+                atomic_inc(sk->sk_prot->orphan_count);
+                inet_csk_destroy_sock(child);
+                bh_unlock_sock(child);
+                local_bh_enable();
+                sock_put(child);
+                sk_acceptq_removed(sk);
+                __reqsk_free(req);
+        }
+        BUG_TRAP(!sk->sk_ack_backlog);
+}
+EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
new file mode 100644
index 000000000000..71f3c7350c6e
--- /dev/null
+++ b/net/ipv4/inet_diag.c
@@ -0,0 +1,868 @@
+/*
+ * inet_diag.c  Module for monitoring INET transport protocols sockets.
+ *
+ * Version:     $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
+ *
+ * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/random.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+#include <net/inet_common.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet_timewait_sock.h>
+#include <net/inet6_hashtables.h>
+#include <linux/inet.h>
+#include <linux/stddef.h>
+#include <linux/inet_diag.h>
+static const struct inet_diag_handler **inet_diag_table;
+struct inet_diag_entry {
+        u32 *saddr;
+        u32 *daddr;
+        u16 sport;
+        u16 dport;
+        u16 family;
+        u16 userlocks;
+};
+static struct sock *idiagnl;
+#define INET_DIAG_PUT(skb, attrtype, attrlen) \
+        RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
+static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
+                        int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+                        const struct nlmsghdr *unlh)
+{
+        const struct inet_sock *inet = inet_sk(sk);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        struct inet_diag_msg *r;
+        struct nlmsghdr  *nlh;
+        void *info = NULL;
+        struct inet_diag_meminfo  *minfo = NULL;
+        unsigned char    *b = skb->tail;
+        const struct inet_diag_handler *handler;
+        handler = inet_diag_table[unlh->nlmsg_type];
+        BUG_ON(handler == NULL);
+        nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
+        nlh->nlmsg_flags = nlmsg_flags;
+        r = NLMSG_DATA(nlh);
+        if (sk->sk_state != TCP_TIME_WAIT) {
+                if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
+                        minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO,
+                                              sizeof(*minfo));
+                if (ext & (1 << (INET_DIAG_INFO - 1)))
+                        info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
+                                           handler->idiag_info_size);
+                
+                if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+                        size_t len = strlen(icsk->icsk_ca_ops->name);
+                        strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+                               icsk->icsk_ca_ops->name);
+                }
+        }
+        r->idiag_family = sk->sk_family;
+        r->idiag_state = sk->sk_state;
+        r->idiag_timer = 0;
+        r->idiag_retrans = 0;
+        r->id.idiag_if = sk->sk_bound_dev_if;
+        r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
+        r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+        if (r->idiag_state == TCP_TIME_WAIT) {
+                const struct inet_timewait_sock *tw = inet_twsk(sk);
+                long tmo = tw->tw_ttd - jiffies;
+                if (tmo < 0)
+                        tmo = 0;
+                r->id.idiag_sport = tw->tw_sport;
+                r->id.idiag_dport = tw->tw_dport;
+                r->id.idiag_src[0] = tw->tw_rcv_saddr;
+                r->id.idiag_dst[0] = tw->tw_daddr;
+                r->idiag_state = tw->tw_substate;
+                r->idiag_timer = 3;
+                r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
+                r->idiag_rqueue = 0;
+                r->idiag_wqueue = 0;
+                r->idiag_uid = 0;
+                r->idiag_inode = 0;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+                if (r->idiag_family == AF_INET6) {
+                        const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
+                        ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+                                       &tcp6tw->tw_v6_rcv_saddr);
+                        ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+                                       &tcp6tw->tw_v6_daddr);
+                }
+#endif
+                nlh->nlmsg_len = skb->tail - b;
+                return skb->len;
+        }
+        r->id.idiag_sport = inet->sport;
+        r->id.idiag_dport = inet->dport;
+        r->id.idiag_src[0] = inet->rcv_saddr;
+        r->id.idiag_dst[0] = inet->daddr;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+        if (r->idiag_family == AF_INET6) {
+                struct ipv6_pinfo *np = inet6_sk(sk);
+                ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+                               &np->rcv_saddr);
+                ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+                               &np->daddr);
+        }
+#endif
+#define EXPIRES_IN_MS(tmo)  ((tmo - jiffies) * 1000 + HZ - 1) / HZ
+        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+                r->idiag_timer = 1;
+                r->idiag_retrans = icsk->icsk_retransmits;
+                r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
+        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+                r->idiag_timer = 4;
+                r->idiag_retrans = icsk->icsk_probes_out;
+                r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
+        } else if (timer_pending(&sk->sk_timer)) {
+                r->idiag_timer = 2;
+                r->idiag_retrans = icsk->icsk_probes_out;
+                r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
+        } else {
+                r->idiag_timer = 0;
+                r->idiag_expires = 0;
+        }
+#undef EXPIRES_IN_MS
+        r->idiag_uid = sock_i_uid(sk);
+        r->idiag_inode = sock_i_ino(sk);
+        if (minfo) {
+                minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc);
+                minfo->idiag_wmem = sk->sk_wmem_queued;
+                minfo->idiag_fmem = sk->sk_forward_alloc;
+                minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc);
+        }
+        handler->idiag_get_info(sk, r, info);
+        if (sk->sk_state < TCP_TIME_WAIT &&
+            icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
+                icsk->icsk_ca_ops->get_info(sk, ext, skb);
+        nlh->nlmsg_len = skb->tail - b;
+        return skb->len;
+rtattr_failure:
+nlmsg_failure:
+        skb_trim(skb, b - skb->data);
+        return -1;
+}
+static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
+{
+        int err;
+        struct sock *sk;
+        struct inet_diag_req *req = NLMSG_DATA(nlh);
+        struct sk_buff *rep;
+        struct inet_hashinfo *hashinfo;
+        const struct inet_diag_handler *handler;
+        handler = inet_diag_table[nlh->nlmsg_type];
+        BUG_ON(handler == NULL);
+        hashinfo = handler->idiag_hashinfo;
+        if (req->idiag_family == AF_INET) {
+                sk = inet_lookup(hashinfo, req->id.idiag_dst[0],
+                                 req->id.idiag_dport, req->id.idiag_src[0],
+                                 req->id.idiag_sport, req->id.idiag_if);
+        }
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+        else if (req->idiag_family == AF_INET6) {
+                sk = inet6_lookup(hashinfo,
+                                  (struct in6_addr *)req->id.idiag_dst,
+                                  req->id.idiag_dport,
+                                  (struct in6_addr *)req->id.idiag_src,
+                                  req->id.idiag_sport,
+                                  req->id.idiag_if);
+        }
+#endif
+        else {
+                return -EINVAL;
+        }
+        if (sk == NULL)
+                return -ENOENT;
+        err = -ESTALE;
+        if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
+             req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
+            ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
+             (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
+                goto out;
+        err = -ENOMEM;
+        rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
+                                     sizeof(struct inet_diag_meminfo) +
+                                     handler->idiag_info_size + 64)),
+                        GFP_KERNEL);
+        if (!rep)
+                goto out;
+        if (inet_diag_fill(rep, sk, req->idiag_ext,
+                         NETLINK_CB(in_skb).pid,
+                         nlh->nlmsg_seq, 0, nlh) <= 0)
+                BUG();
+        err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid,
+                              MSG_DONTWAIT);
+        if (err > 0)
+                err = 0;
+out:
+        if (sk) {
+                if (sk->sk_state == TCP_TIME_WAIT)
+                        inet_twsk_put((struct inet_timewait_sock *)sk);
+                else
+                        sock_put(sk);
+        }
+        return err;
+}
+static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
+{
+        int words = bits >> 5;
+        bits &= 0x1f;
+        if (words) {
+                if (memcmp(a1, a2, words << 2))
+                        return 0;
+        }
+        if (bits) {
+                __u32 w1, w2;
+                __u32 mask;
+                w1 = a1[words];
+                w2 = a2[words];
+                mask = htonl((0xffffffff) << (32 - bits));
+                if ((w1 ^ w2) & mask)
+                        return 0;
+        }
+        return 1;
+}
+static int inet_diag_bc_run(const void *bc, int len,
+                          const struct inet_diag_entry *entry)
+{
+        while (len > 0) {
+                int yes = 1;
+                const struct inet_diag_bc_op *op = bc;
+                switch (op->code) {
+                case INET_DIAG_BC_NOP:
+                        break;
+                case INET_DIAG_BC_JMP:
+                        yes = 0;
+                        break;
+                case INET_DIAG_BC_S_GE:
+                        yes = entry->sport >= op[1].no;
+                        break;
+                case INET_DIAG_BC_S_LE:
+                        yes = entry->dport <= op[1].no;
+                        break;
+                case INET_DIAG_BC_D_GE:
+                        yes = entry->dport >= op[1].no;
+                        break;
+                case INET_DIAG_BC_D_LE:
+                        yes = entry->dport <= op[1].no;
+                        break;
+                case INET_DIAG_BC_AUTO:
+                        yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
+                        break;
+                case INET_DIAG_BC_S_COND:
+                case INET_DIAG_BC_D_COND: {
+                        struct inet_diag_hostcond *cond;
+                        u32 *addr;
+                        cond = (struct inet_diag_hostcond *)(op + 1);
+                        if (cond->port != -1 &&
+                            cond->port != (op->code == INET_DIAG_BC_S_COND ?
+                                             entry->sport : entry->dport)) {
+                                yes = 0;
+                                break;
+                        }
+                        
+                        if (cond->prefix_len == 0)
+                                break;
+                        if (op->code == INET_DIAG_BC_S_COND)
+                                addr = entry->saddr;
+                        else
+                                addr = entry->daddr;
+                        if (bitstring_match(addr, cond->addr, cond->prefix_len))
+                                break;
+                        if (entry->family == AF_INET6 &&
+                            cond->family == AF_INET) {
+                                if (addr[0] == 0 && addr[1] == 0 &&
+                                    addr[2] == htonl(0xffff) &&
+                                    bitstring_match(addr + 3, cond->addr,
+                                                    cond->prefix_len))
+                                        break;
+                        }
+                        yes = 0;
+                        break;
+                }
+                }
+                if (yes) { 
+                        len -= op->yes;
+                        bc += op->yes;
+                } else {
+                        len -= op->no;
+                        bc += op->no;
+                }
+        }
+        return (len == 0);
+}
+static int valid_cc(const void *bc, int len, int cc)
+{
+        while (len >= 0) {
+                const struct inet_diag_bc_op *op = bc;
+                if (cc > len)
+                        return 0;
+                if (cc == len)
+                        return 1;
+                if (op->yes < 4)
+                        return 0;
+                len -= op->yes;
+                bc  += op->yes;
+        }
+        return 0;
+}
+static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
+{
+        const unsigned char *bc = bytecode;
+        int  len = bytecode_len;
+        while (len > 0) {
+                struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc;
+//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
+                switch (op->code) {
+                case INET_DIAG_BC_AUTO:
+                case INET_DIAG_BC_S_COND:
+                case INET_DIAG_BC_D_COND:
+                case INET_DIAG_BC_S_GE:
+                case INET_DIAG_BC_S_LE:
+                case INET_DIAG_BC_D_GE:
+                case INET_DIAG_BC_D_LE:
+                        if (op->yes < 4 || op->yes > len + 4)
+                                return -EINVAL;
+                case INET_DIAG_BC_JMP:
+                        if (op->no < 4 || op->no > len + 4)
+                                return -EINVAL;
+                        if (op->no < len &&
+                            !valid_cc(bytecode, bytecode_len, len - op->no))
+                                return -EINVAL;
+                        break;
+                case INET_DIAG_BC_NOP:
+                        if (op->yes < 4 || op->yes > len + 4)
+                                return -EINVAL;
+                        break;
+                default:
+                        return -EINVAL;
+                }
+                bc += op->yes;
+                len -= op->yes;
+        }
+        return len == 0 ? 0 : -EINVAL;
+}
+static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
+                             struct netlink_callback *cb)
+{
+        struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+        if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+                struct inet_diag_entry entry;
+                struct rtattr *bc = (struct rtattr *)(r + 1);
+                struct inet_sock *inet = inet_sk(sk);
+                entry.family = sk->sk_family;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+                if (entry.family == AF_INET6) {
+                        struct ipv6_pinfo *np = inet6_sk(sk);
+                        entry.saddr = np->rcv_saddr.s6_addr32;
+                        entry.daddr = np->daddr.s6_addr32;
+                } else
+#endif
+                {
+                        entry.saddr = &inet->rcv_saddr;
+                        entry.daddr = &inet->daddr;
+                }
+                entry.sport = inet->num;
+                entry.dport = ntohs(inet->dport);
+                entry.userlocks = sk->sk_userlocks;
+                if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+                        return 0;
+        }
+        return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid,
+                            cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+}
+static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
+                            struct request_sock *req,
+                            u32 pid, u32 seq,
+                            const struct nlmsghdr *unlh)
+{
+        const struct inet_request_sock *ireq = inet_rsk(req);
+        struct inet_sock *inet = inet_sk(sk);
+        unsigned char *b = skb->tail;
+        struct inet_diag_msg *r;
+        struct nlmsghdr *nlh;
+        long tmo;
+        nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
+        nlh->nlmsg_flags = NLM_F_MULTI;
+        r = NLMSG_DATA(nlh);
+        r->idiag_family = sk->sk_family;
+        r->idiag_state = TCP_SYN_RECV;
+        r->idiag_timer = 1;
+        r->idiag_retrans = req->retrans;
+        r->id.idiag_if = sk->sk_bound_dev_if;
+        r->id.idiag_cookie[0] = (u32)(unsigned long)req;
+        r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
+        tmo = req->expires - jiffies;
+        if (tmo < 0)
+                tmo = 0;
+        r->id.idiag_sport = inet->sport;
+        r->id.idiag_dport = ireq->rmt_port;
+        r->id.idiag_src[0] = ireq->loc_addr;
+        r->id.idiag_dst[0] = ireq->rmt_addr;
+        r->idiag_expires = jiffies_to_msecs(tmo);
+        r->idiag_rqueue = 0;
+        r->idiag_wqueue = 0;
+        r->idiag_uid = sock_i_uid(sk);
+        r->idiag_inode = 0;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+        if (r->idiag_family == AF_INET6) {
+                ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+                               &tcp6_rsk(req)->loc_addr);
+                ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+                               &tcp6_rsk(req)->rmt_addr);
+        }
+#endif
+        nlh->nlmsg_len = skb->tail - b;
+        return skb->len;
+nlmsg_failure:
+        skb_trim(skb, b - skb->data);
+        return -1;
+}
+static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
+                             struct netlink_callback *cb)
+{
+        struct inet_diag_entry entry;
+        struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        struct listen_sock *lopt;
+        struct rtattr *bc = NULL;
+        struct inet_sock *inet = inet_sk(sk);
+        int j, s_j;
+        int reqnum, s_reqnum;
+        int err = 0;
+        s_j = cb->args[3];
+        s_reqnum = cb->args[4];
+        if (s_j > 0)
+                s_j--;
+        entry.family = sk->sk_family;
+        read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+        lopt = icsk->icsk_accept_queue.listen_opt;
+        if (!lopt || !lopt->qlen)
+                goto out;
+        if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+                bc = (struct rtattr *)(r + 1);
+                entry.sport = inet->num;
+                entry.userlocks = sk->sk_userlocks;
+        }
+        for (j = s_j; j < lopt->nr_table_entries; j++) {
+                struct request_sock *req, *head = lopt->syn_table[j];
+                reqnum = 0;
+                for (req = head; req; reqnum++, req = req->dl_next) {
+                        struct inet_request_sock *ireq = inet_rsk(req);
+                        if (reqnum < s_reqnum)
+                                continue;
+                        if (r->id.idiag_dport != ireq->rmt_port &&
+                            r->id.idiag_dport)
+                                continue;
+                        if (bc) {
+                                entry.saddr =
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+                                        (entry.family == AF_INET6) ?
+                                        tcp6_rsk(req)->loc_addr.s6_addr32 :
+#endif
+                                        &ireq->loc_addr;
+                                entry.daddr = 
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+                                        (entry.family == AF_INET6) ?
+                                        tcp6_rsk(req)->rmt_addr.s6_addr32 :
+#endif
+                                        &ireq->rmt_addr;
+                                entry.dport = ntohs(ireq->rmt_port);
+                                if (!inet_diag_bc_run(RTA_DATA(bc),
+                                                    RTA_PAYLOAD(bc), &entry))
+                                        continue;
+                        }
+                        err = inet_diag_fill_req(skb, sk, req,
+                                               NETLINK_CB(cb->skb).pid,
+                                               cb->nlh->nlmsg_seq, cb->nlh);
+                        if (err < 0) {
+                                cb->args[3] = j + 1;
+                                cb->args[4] = reqnum;
+                                goto out;
+                        }
+                }
+                s_reqnum = 0;
+        }
+out:
+        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+        return err;
+}
+static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+        int i, num;
+        int s_i, s_num;
+        struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+        const struct inet_diag_handler *handler;
+        struct inet_hashinfo *hashinfo;
+        handler = inet_diag_table[cb->nlh->nlmsg_type];
+        BUG_ON(handler == NULL);
+        hashinfo = handler->idiag_hashinfo;
+                
+        s_i = cb->args[1];
+        s_num = num = cb->args[2];
+        if (cb->args[0] == 0) {
+                if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV)))
+                        goto skip_listen_ht;
+                inet_listen_lock(hashinfo);
+                for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
+                        struct sock *sk;
+                        struct hlist_node *node;
+                        num = 0;
+                        sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
+                                struct inet_sock *inet = inet_sk(sk);
+                                if (num < s_num) {
+                                        num++;
+                                        continue;
+                                }
+                                if (r->id.idiag_sport != inet->sport &&
+                                    r->id.idiag_sport)
+                                        goto next_listen;
+                                if (!(r->idiag_states & TCPF_LISTEN) ||
+                                    r->id.idiag_dport ||
+                                    cb->args[3] > 0)
+                                        goto syn_recv;
+                                if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+                                        inet_listen_unlock(hashinfo);
+                                        goto done;
+                                }
+syn_recv:
+                                if (!(r->idiag_states & TCPF_SYN_RECV))
+                                        goto next_listen;
+                                if (inet_diag_dump_reqs(skb, sk, cb) < 0) {
+                                        inet_listen_unlock(hashinfo);
+                                        goto done;
+                                }
+next_listen:
+                                cb->args[3] = 0;
+                                cb->args[4] = 0;
+                                ++num;
+                        }
+                        s_num = 0;
+                        cb->args[3] = 0;
+                        cb->args[4] = 0;
+                }
+                inet_listen_unlock(hashinfo);
+skip_listen_ht:
+                cb->args[0] = 1;
+                s_i = num = s_num = 0;
+        }
+        if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
+                return skb->len;
+        for (i = s_i; i < hashinfo->ehash_size; i++) {
+                struct inet_ehash_bucket *head = &hashinfo->ehash[i];
+                struct sock *sk;
+                struct hlist_node *node;
+                if (i > s_i)
+                        s_num = 0;
+                read_lock_bh(&head->lock);
+                num = 0;
+                sk_for_each(sk, node, &head->chain) {
+                        struct inet_sock *inet = inet_sk(sk);
+                        if (num < s_num)
+                                goto next_normal;
+                        if (!(r->idiag_states & (1 << sk->sk_state)))
+                                goto next_normal;
+                        if (r->id.idiag_sport != inet->sport &&
+                            r->id.idiag_sport)
+                                goto next_normal;
+                        if (r->id.idiag_dport != inet->dport && r->id.idiag_dport)
+                                goto next_normal;
+                        if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+                                read_unlock_bh(&head->lock);
+                                goto done;
+                        }
+next_normal:
+                        ++num;
+                }
+                if (r->idiag_states & TCPF_TIME_WAIT) {
+                        sk_for_each(sk, node,
+                                    &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
+                                struct inet_sock *inet = inet_sk(sk);
+                                if (num < s_num)
+                                        goto next_dying;
+                                if (r->id.idiag_sport != inet->sport &&
+                                    r->id.idiag_sport)
+                                        goto next_dying;
+                                if (r->id.idiag_dport != inet->dport &&
+                                    r->id.idiag_dport)
+                                        goto next_dying;
+                                if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+                                        read_unlock_bh(&head->lock);
+                                        goto done;
+                                }
+next_dying:
+                                ++num;
+                        }
+                }
+                read_unlock_bh(&head->lock);
+        }
+done:
+        cb->args[1] = i;
+        cb->args[2] = num;
+        return skb->len;
+}
+static int inet_diag_dump_done(struct netlink_callback *cb)
+{
+        return 0;
+}
+static __inline__ int
+inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+        if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
+                return 0;
+        if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX)
+                goto err_inval;
+        if (inet_diag_table[nlh->nlmsg_type] == NULL)
+                return -ENOENT;
+        if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len)
+                goto err_inval;
+        if (nlh->nlmsg_flags&NLM_F_DUMP) {
+                if (nlh->nlmsg_len >
+                    (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) {
+                        struct rtattr *rta = (void *)(NLMSG_DATA(nlh) +
+                                                 sizeof(struct inet_diag_req));
+                        if (rta->rta_type != INET_DIAG_REQ_BYTECODE ||
+                            rta->rta_len < 8 ||
+                            rta->rta_len >
+                            (nlh->nlmsg_len -
+                             NLMSG_SPACE(sizeof(struct inet_diag_req))))
+                                goto err_inval;
+                        if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
+                                goto err_inval;
+                }
+                return netlink_dump_start(idiagnl, skb, nlh,
+                                          inet_diag_dump,
+                                          inet_diag_dump_done);
+        } else {
+                return inet_diag_get_exact(skb, nlh);
+        }
+err_inval:
+        return -EINVAL;
+}
+static inline void inet_diag_rcv_skb(struct sk_buff *skb)
+{
+        int err;
+        struct nlmsghdr * nlh;
+        if (skb->len >= NLMSG_SPACE(0)) {
+                nlh = (struct nlmsghdr *)skb->data;
+                if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+                        return;
+                err = inet_diag_rcv_msg(skb, nlh);
+                if (err || nlh->nlmsg_flags & NLM_F_ACK) 
+                        netlink_ack(skb, nlh, err);
+        }
+}
+static void inet_diag_rcv(struct sock *sk, int len)
+{
+        struct sk_buff *skb;
+        unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+        while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
+                inet_diag_rcv_skb(skb);
+                kfree_skb(skb);
+        }
+}
+static DEFINE_SPINLOCK(inet_diag_register_lock);
+int inet_diag_register(const struct inet_diag_handler *h)
+{
+        const __u16 type = h->idiag_type;
+        int err = -EINVAL;
+        if (type >= INET_DIAG_GETSOCK_MAX)
+                goto out;
+        spin_lock(&inet_diag_register_lock);
+        err = -EEXIST;
+        if (inet_diag_table[type] == NULL) {
+                inet_diag_table[type] = h;
+                err = 0;
+        }
+        spin_unlock(&inet_diag_register_lock);
+out:
+        return err;
+}
+EXPORT_SYMBOL_GPL(inet_diag_register);
+void inet_diag_unregister(const struct inet_diag_handler *h)
+{
+        const __u16 type = h->idiag_type;
+        if (type >= INET_DIAG_GETSOCK_MAX)
+                return;
+        spin_lock(&inet_diag_register_lock);
+        inet_diag_table[type] = NULL;
+        spin_unlock(&inet_diag_register_lock);
+        synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(inet_diag_unregister);
+static int __init inet_diag_init(void)
+{
+        const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX *
+                                          sizeof(struct inet_diag_handler *));
+        int err = -ENOMEM;
+        inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL);
+        if (!inet_diag_table)
+                goto out;
+        memset(inet_diag_table, 0, inet_diag_table_size);
+        idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
+                                        THIS_MODULE);
+        if (idiagnl == NULL)
+                goto out_free_table;
+        err = 0;
+out:
+        return err;
+out_free_table:
+        kfree(inet_diag_table);
+        goto out;
+}
+static void __exit inet_diag_exit(void)
+{
+        sock_release(idiagnl->sk_socket);
+        kfree(inet_diag_table);
+}
+module_init(inet_diag_init);
+module_exit(inet_diag_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
new file mode 100644
index 000000000000..e8d29fe736d2
--- /dev/null
+++ b/net/ipv4/inet_hashtables.c
@@ -0,0 +1,165 @@
+/*
+ * INET         An implementation of the TCP/IP protocol suite for the LINUX
+ *              operating system.  INET is implemented using the BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              Generic INET transport hashtables
+ *
+ * Authors:     Lotsa people, from code originally in tcp
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_hashtables.h>
+/*
+ * Allocate and initialize a new local port bind bucket.
+ * The bindhash mutex for snum's hash chain must be held here.
+ */
+struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
+                                                 struct inet_bind_hashbucket *head,
+                                                 const unsigned short snum)
+{
+        struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC);
+        if (tb != NULL) {
+                tb->port      = snum;
+                tb->fastreuse = 0;
+                INIT_HLIST_HEAD(&tb->owners);
+                hlist_add_head(&tb->node, &head->chain);
+        }
+        return tb;
+}
+EXPORT_SYMBOL(inet_bind_bucket_create);
+/*
+ * Caller must hold hashbucket lock for this tb with local BH disabled
+ */
+void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb)
+{
+        if (hlist_empty(&tb->owners)) {
+                __hlist_del(&tb->node);
+                kmem_cache_free(cachep, tb);
+        }
+}
+void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
+                    const unsigned short snum)
+{
+        inet_sk(sk)->num = snum;
+        sk_add_bind_node(sk, &tb->owners);
+        inet_csk(sk)->icsk_bind_hash = tb;
+}
+EXPORT_SYMBOL(inet_bind_hash);
+/*
+ * Get rid of any references to a local port held by the given sock.
+ */
+static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
+{
+        const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
+        struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+        struct inet_bind_bucket *tb;
+        spin_lock(&head->lock);
+        tb = inet_csk(sk)->icsk_bind_hash;
+        __sk_del_bind_node(sk);
+        inet_csk(sk)->icsk_bind_hash = NULL;
+        inet_sk(sk)->num = 0;
+        inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+        spin_unlock(&head->lock);
+}
+void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
+{
+        local_bh_disable();
+        __inet_put_port(hashinfo, sk);
+        local_bh_enable();
+}
+EXPORT_SYMBOL(inet_put_port);
+/*
+ * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
+ * Look, when several writers sleep and reader wakes them up, all but one
+ * immediately hit write lock and grab all the cpus. Exclusive sleep solves
+ * this, _but_ remember, it adds useless work on UP machines (wake up each
+ * exclusive lock release). It should be ifdefed really.
+ */
+void inet_listen_wlock(struct inet_hashinfo *hashinfo)
+{
+        write_lock(&hashinfo->lhash_lock);
+        if (atomic_read(&hashinfo->lhash_users)) {
+                DEFINE_WAIT(wait);
+                for (;;) {
+                        prepare_to_wait_exclusive(&hashinfo->lhash_wait,
+                                                  &wait, TASK_UNINTERRUPTIBLE);
+                        if (!atomic_read(&hashinfo->lhash_users))
+                                break;
+                        write_unlock_bh(&hashinfo->lhash_lock);
+                        schedule();
+                        write_lock_bh(&hashinfo->lhash_lock);
+                }
+                finish_wait(&hashinfo->lhash_wait, &wait);
+        }
+}
+EXPORT_SYMBOL(inet_listen_wlock);
+/*
+ * Don't inline this cruft. Here are some nice properties to exploit here. The
+ * BSD API does not allow a listening sock to specify the remote port nor the
+ * remote address for the connection. So always assume those are both
+ * wildcarded during the search since they can never be otherwise.
+ */
+struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
+                                    const unsigned short hnum, const int dif)
+{
+        struct sock *result = NULL, *sk;
+        const struct hlist_node *node;
+        int hiscore = -1;
+        sk_for_each(sk, node, head) {
+                const struct inet_sock *inet = inet_sk(sk);
+                if (inet->num == hnum && !ipv6_only_sock(sk)) {
+                        const __u32 rcv_saddr = inet->rcv_saddr;
+                        int score = sk->sk_family == PF_INET ? 1 : 0;
+                        if (rcv_saddr) {
+                                if (rcv_saddr != daddr)
+                                        continue;
+                                score += 2;
+                        }
+                        if (sk->sk_bound_dev_if) {
+                                if (sk->sk_bound_dev_if != dif)
+                                        continue;
+                                score += 2;
+                        }
+                        if (score == 5)
+                                return sk;
+                        if (score > hiscore) {
+                                hiscore = score;
+                                result  = sk;
+                        }
+                }
+        }
+        return result;
+}
+EXPORT_SYMBOL_GPL(__inet_lookup_listener);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
new file mode 100644
index 000000000000..4d1502a49852
--- /dev/null
+++ b/net/ipv4/inet_timewait_sock.c
@@ -0,0 +1,384 @@
+/*
+ * INET         An implementation of the TCP/IP protocol suite for the LINUX
+ *              operating system.  INET is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              Generic TIME_WAIT sockets functions
+ *
+ *              From code orinally in TCP
+ */
+#include <linux/config.h>
+#include <net/inet_hashtables.h>
+#include <net/inet_timewait_sock.h>
+#include <net/ip.h>
+/* Must be called with locally disabled BHs. */
+void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo)
+{
+        struct inet_bind_hashbucket *bhead;
+        struct inet_bind_bucket *tb;
+        /* Unlink from established hashes. */
+        struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent];
+        write_lock(&ehead->lock);
+        if (hlist_unhashed(&tw->tw_node)) {
+                write_unlock(&ehead->lock);
+                return;
+        }
+        __hlist_del(&tw->tw_node);
+        sk_node_init(&tw->tw_node);
+        write_unlock(&ehead->lock);
+        /* Disassociate with bind bucket. */
+        bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
+        spin_lock(&bhead->lock);
+        tb = tw->tw_tb;
+        __hlist_del(&tw->tw_bind_node);
+        tw->tw_tb = NULL;
+        inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+        spin_unlock(&bhead->lock);
+#ifdef SOCK_REFCNT_DEBUG
+        if (atomic_read(&tw->tw_refcnt) != 1) {
+                printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n",
+                       tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
+        }
+#endif
+        inet_twsk_put(tw);
+}
+EXPORT_SYMBOL_GPL(__inet_twsk_kill);
+/*
+ * Enter the time wait state. This is called with locally disabled BH.
+ * Essentially we whip up a timewait bucket, copy the relevant info into it
+ * from the SK, and mess with hash chains and list linkage.
+ */
+void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+                           struct inet_hashinfo *hashinfo)
+{
+        const struct inet_sock *inet = inet_sk(sk);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent];
+        struct inet_bind_hashbucket *bhead;
+        /* Step 1: Put TW into bind hash. Original socket stays there too.
+           Note, that any socket with inet->num != 0 MUST be bound in
+           binding cache, even if it is closed.
+         */
+        bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
+        spin_lock(&bhead->lock);
+        tw->tw_tb = icsk->icsk_bind_hash;
+        BUG_TRAP(icsk->icsk_bind_hash);
+        inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
+        spin_unlock(&bhead->lock);
+        write_lock(&ehead->lock);
+        /* Step 2: Remove SK from established hash. */
+        if (__sk_del_node_init(sk))
+                sock_prot_dec_use(sk->sk_prot);
+        /* Step 3: Hash TW into TIMEWAIT half of established hash table. */
+        inet_twsk_add_node(tw, &(ehead + hashinfo->ehash_size)->chain);
+        atomic_inc(&tw->tw_refcnt);
+        write_unlock(&ehead->lock);
+}
+EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
+struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
+{
+        struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
+                                                         SLAB_ATOMIC);
+        if (tw != NULL) {
+                const struct inet_sock *inet = inet_sk(sk);
+                /* Give us an identity. */
+                tw->tw_daddr        = inet->daddr;
+                tw->tw_rcv_saddr    = inet->rcv_saddr;
+                tw->tw_bound_dev_if = sk->sk_bound_dev_if;
+                tw->tw_num          = inet->num;
+                tw->tw_state        = TCP_TIME_WAIT;
+                tw->tw_substate     = state;
+                tw->tw_sport        = inet->sport;
+                tw->tw_dport        = inet->dport;
+                tw->tw_family       = sk->sk_family;
+                tw->tw_reuse        = sk->sk_reuse;
+                tw->tw_hashent      = sk->sk_hashent;
+                tw->tw_ipv6only     = 0;
+                tw->tw_prot         = sk->sk_prot_creator;
+                atomic_set(&tw->tw_refcnt, 1);
+                inet_twsk_dead_node_init(tw);
+        }
+        return tw;
+}
+EXPORT_SYMBOL_GPL(inet_twsk_alloc);
+/* Returns non-zero if quota exceeded.  */
+static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
+                                    const int slot)
+{
+        struct inet_timewait_sock *tw;
+        struct hlist_node *node;
+        unsigned int killed;
+        int ret;
+        /* NOTE: compare this to previous version where lock
+         * was released after detaching chain. It was racy,
+         * because tw buckets are scheduled in not serialized context
+         * in 2.3 (with netfilter), and with softnet it is common, because
+         * soft irqs are not sequenced.
+         */
+        killed = 0;
+        ret = 0;
+rescan:
+        inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
+                __inet_twsk_del_dead_node(tw);
+                spin_unlock(&twdr->death_lock);
+                __inet_twsk_kill(tw, twdr->hashinfo);
+                inet_twsk_put(tw);
+                killed++;
+                spin_lock(&twdr->death_lock);
+                if (killed > INET_TWDR_TWKILL_QUOTA) {
+                        ret = 1;
+                        break;
+                }
+                /* While we dropped twdr->death_lock, another cpu may have
+                 * killed off the next TW bucket in the list, therefore
+                 * do a fresh re-read of the hlist head node with the
+                 * lock reacquired.  We still use the hlist traversal
+                 * macro in order to get the prefetches.
+                 */
+                goto rescan;
+        }
+        twdr->tw_count -= killed;
+        NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
+        return ret;
+}
+void inet_twdr_hangman(unsigned long data)
+{
+        struct inet_timewait_death_row *twdr;
+        int unsigned need_timer;
+        twdr = (struct inet_timewait_death_row *)data;
+        spin_lock(&twdr->death_lock);
+        if (twdr->tw_count == 0)
+                goto out;
+        need_timer = 0;
+        if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
+                twdr->thread_slots |= (1 << twdr->slot);
+                mb();
+                schedule_work(&twdr->twkill_work);
+                need_timer = 1;
+        } else {
+                /* We purged the entire slot, anything left?  */
+                if (twdr->tw_count)
+                        need_timer = 1;
+        }
+        twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
+        if (need_timer)
+                mod_timer(&twdr->tw_timer, jiffies + twdr->period);
+out:
+        spin_unlock(&twdr->death_lock);
+}
+EXPORT_SYMBOL_GPL(inet_twdr_hangman);
+extern void twkill_slots_invalid(void);
+void inet_twdr_twkill_work(void *data)
+{
+        struct inet_timewait_death_row *twdr = data;
+        int i;
+        if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
+                twkill_slots_invalid();
+        while (twdr->thread_slots) {
+                spin_lock_bh(&twdr->death_lock);
+                for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
+                        if (!(twdr->thread_slots & (1 << i)))
+                                continue;
+                        while (inet_twdr_do_twkill_work(twdr, i) != 0) {
+                                if (need_resched()) {
+                                        spin_unlock_bh(&twdr->death_lock);
+                                        schedule();
+                                        spin_lock_bh(&twdr->death_lock);
+                                }
+                        }
+                        twdr->thread_slots &= ~(1 << i);
+                }
+                spin_unlock_bh(&twdr->death_lock);
+        }
+}
+EXPORT_SYMBOL_GPL(inet_twdr_twkill_work);
+/* These are always called from BH context.  See callers in
+ * tcp_input.c to verify this.
+ */
+/* This is for handling early-kills of TIME_WAIT sockets. */
+void inet_twsk_deschedule(struct inet_timewait_sock *tw,
+                          struct inet_timewait_death_row *twdr)
+{
+        spin_lock(&twdr->death_lock);
+        if (inet_twsk_del_dead_node(tw)) {
+                inet_twsk_put(tw);
+                if (--twdr->tw_count == 0)
+                        del_timer(&twdr->tw_timer);
+        }
+        spin_unlock(&twdr->death_lock);
+        __inet_twsk_kill(tw, twdr->hashinfo);
+}
+EXPORT_SYMBOL(inet_twsk_deschedule);
+void inet_twsk_schedule(struct inet_timewait_sock *tw,
+                       struct inet_timewait_death_row *twdr,
+                       const int timeo, const int timewait_len)
+{
+        struct hlist_head *list;
+        int slot;
+        /* timeout := RTO * 3.5
+         *
+         * 3.5 = 1+2+0.5 to wait for two retransmits.
+         *
+         * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
+         * our ACK acking that FIN can be lost. If N subsequent retransmitted
+         * FINs (or previous seqments) are lost (probability of such event
+         * is p^(N+1), where p is probability to lose single packet and
+         * time to detect the loss is about RTO*(2^N - 1) with exponential
+         * backoff). Normal timewait length is calculated so, that we
+         * waited at least for one retransmitted FIN (maximal RTO is 120sec).
+         * [ BTW Linux. following BSD, violates this requirement waiting
+         *   only for 60sec, we should wait at least for 240 secs.
+         *   Well, 240 consumes too much of resources 8)
+         * ]
+         * This interval is not reduced to catch old duplicate and
+         * responces to our wandering segments living for two MSLs.
+         * However, if we use PAWS to detect
+         * old duplicates, we can reduce the interval to bounds required
+         * by RTO, rather than MSL. So, if peer understands PAWS, we
+         * kill tw bucket after 3.5*RTO (it is important that this number
+         * is greater than TS tick!) and detect old duplicates with help
+         * of PAWS.
+         */
+        slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
+        spin_lock(&twdr->death_lock);
+        /* Unlink it, if it was scheduled */
+        if (inet_twsk_del_dead_node(tw))
+                twdr->tw_count--;
+        else
+                atomic_inc(&tw->tw_refcnt);
+        if (slot >= INET_TWDR_RECYCLE_SLOTS) {
+                /* Schedule to slow timer */
+                if (timeo >= timewait_len) {
+                        slot = INET_TWDR_TWKILL_SLOTS - 1;
+                } else {
+                        slot = (timeo + twdr->period - 1) / twdr->period;
+                        if (slot >= INET_TWDR_TWKILL_SLOTS)
+                                slot = INET_TWDR_TWKILL_SLOTS - 1;
+                }
+                tw->tw_ttd = jiffies + timeo;
+                slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
+                list = &twdr->cells[slot];
+        } else {
+                tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
+                if (twdr->twcal_hand < 0) {
+                        twdr->twcal_hand = 0;
+                        twdr->twcal_jiffie = jiffies;
+                        twdr->twcal_timer.expires = twdr->twcal_jiffie +
+                                              (slot << INET_TWDR_RECYCLE_TICK);
+                        add_timer(&twdr->twcal_timer);
+                } else {
+                        if (time_after(twdr->twcal_timer.expires,
+                                       jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
+                                mod_timer(&twdr->twcal_timer,
+                                          jiffies + (slot << INET_TWDR_RECYCLE_TICK));
+                        slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
+                }
+                list = &twdr->twcal_row[slot];
+        }
+        hlist_add_head(&tw->tw_death_node, list);
+        if (twdr->tw_count++ == 0)
+                mod_timer(&twdr->tw_timer, jiffies + twdr->period);
+        spin_unlock(&twdr->death_lock);
+}
+EXPORT_SYMBOL_GPL(inet_twsk_schedule);
+void inet_twdr_twcal_tick(unsigned long data)
+{
+        struct inet_timewait_death_row *twdr;
+        int n, slot;
+        unsigned long j;
+        unsigned long now = jiffies;
+        int killed = 0;
+        int adv = 0;
+        twdr = (struct inet_timewait_death_row *)data;
+        spin_lock(&twdr->death_lock);
+        if (twdr->twcal_hand < 0)
+                goto out;
+        slot = twdr->twcal_hand;
+        j = twdr->twcal_jiffie;
+        for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
+                if (time_before_eq(j, now)) {
+                        struct hlist_node *node, *safe;
+                        struct inet_timewait_sock *tw;
+                        inet_twsk_for_each_inmate_safe(tw, node, safe,
+                                                       &twdr->twcal_row[slot]) {
+                                __inet_twsk_del_dead_node(tw);
+                                __inet_twsk_kill(tw, twdr->hashinfo);
+                                inet_twsk_put(tw);
+                                killed++;
+                        }
+                } else {
+                        if (!adv) {
+                                adv = 1;
+                                twdr->twcal_jiffie = j;
+                                twdr->twcal_hand = slot;
+                        }
+                        if (!hlist_empty(&twdr->twcal_row[slot])) {
+                                mod_timer(&twdr->twcal_timer, j);
+                                goto out;
+                        }
+                }
+                j += 1 << INET_TWDR_RECYCLE_TICK;
+                slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
+        }
+        twdr->twcal_hand = -1;
+out:
+        if ((twdr->tw_count -= killed) == 0)
+                del_timer(&twdr->tw_timer);
+        NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
+        spin_unlock(&twdr->death_lock);
+}
+EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 95473953c406..f84ba9c96551 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/net.h>
+#include <net/ip.h>
 #include <net/inetpeer.h>
 /*
@@ -72,7 +73,7 @@
 /* Exported for inet_getid inline function.  */
 DEFINE_SPINLOCK(inet_peer_idlock);
-static kmem_cache_t *peer_cachep;
+static kmem_cache_t *peer_cachep __read_mostly;
 #define node_height(x) x->avl_height
 static struct inet_peer peer_fake_node = {
@@ -450,11 +451,12 @@ static void peer_check_expire(unsigned long dummy)
        /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
         * interval depending on the total number of entries (more entries,
         * less interval). */
-        peer_periodic_timer.expires = jiffies
+        if (peer_total >= inet_peer_threshold)
-                + inet_peer_gc_maxtime
+                peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
-                - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
+        else
-                        peer_total / inet_peer_threshold * HZ;
+                peer_periodic_timer.expires = jiffies
+                        + inet_peer_gc_maxtime
+                        - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
+                                peer_total / inet_peer_threshold * HZ;
        add_timer(&peer_periodic_timer);
 }
-EXPORT_SYMBOL(inet_peer_idlock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 77094aac6c28..0923add122b4 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -76,16 +76,12 @@ int ip_forward(struct sk_buff *skb)
         *      that reaches zero, we must reply an ICMP control message telling
         *      that the packet's lifetime expired.
         */
+        if (skb->nh.iph->ttl <= 1)
-        iph = skb->nh.iph;
-        if (iph->ttl <= 1)
                goto too_many_hops;
        if (!xfrm4_route_forward(skb))
                goto drop;
-        iph = skb->nh.iph;
        rt = (struct rtable*)skb->dst;
        if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7f68e27eb4ea..9e6e683cc34d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -377,7 +377,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
        return ip_frag_intern(hash, qp);
 out_nomem:
-        NETDEBUG(if (net_ratelimit()) printk(KERN_ERR "ip_frag_create: no memory left !\n"));
+        LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
        return NULL;
 }
@@ -533,7 +533,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
        if (skb->dev)
                qp->iif = skb->dev->ifindex;
        skb->dev = NULL;
-        qp->stamp = skb->stamp;
+        skb_get_timestamp(skb, &qp->stamp);
        qp->meat += skb->len;
        atomic_add(skb->truesize, &ip_frag_mem);
        if (offset == 0)
@@ -615,7 +615,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
        head->next = NULL;
        head->dev = dev;
-        head->stamp = qp->stamp;
+        skb_set_timestamp(head, &qp->stamp);
        iph = head->nh.iph;
        iph->frag_off = 0;
@@ -625,10 +625,8 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
        return head;
 out_nomem:
-        NETDEBUG(if (net_ratelimit())
+        LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
-                 printk(KERN_ERR 
+                              "queue %p\n", qp);
-                        "IP: queue_glue: no memory for gluing queue %p\n",
-                        qp));
        goto out_fail;
 out_oversize:
        if (net_ratelimit())
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 884835522224..f0d5740d7e22 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -290,7 +290,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
        dev_hold(dev);
        ipgre_tunnel_link(nt);
-        /* Do not decrement MOD_USE_COUNT here. */
        return nt;
 failed:
@@ -1277,12 +1276,28 @@ err1:
        goto out;
 }
-static void ipgre_fini(void)
+static void __exit ipgre_destroy_tunnels(void)
+{
+        int prio;
+        for (prio = 0; prio < 4; prio++) {
+                int h;
+                for (h = 0; h < HASH_SIZE; h++) {
+                        struct ip_tunnel *t;
+                        while ((t = tunnels[prio][h]) != NULL)
+                                unregister_netdevice(t->dev);
+                }
+        }
+}
+static void __exit ipgre_fini(void)
 {
        if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
                printk(KERN_INFO "ipgre close: can't remove protocol\n");
-        unregister_netdev(ipgre_fb_tunnel_dev);
+        rtnl_lock();
+        ipgre_destroy_tunnels();
+        rtnl_unlock();
 }
 module_init(ipgre_init);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c703528e0bcd..473d0f2b2e0d 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -150,7 +150,7 @@
 *      SNMP management statistics
 */
-DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
 /*
 *      Process Router Attention IP option
@@ -225,8 +225,8 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
                /* If there maybe a raw socket we must check - if not we
                 * don't care less
                 */
-                if (raw_sk)
+                if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash))
-                        raw_v4_input(skb, skb->nh.iph, hash);
+                        raw_sk = NULL;
                if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
                        int ret;
@@ -279,18 +279,70 @@ int ip_local_deliver(struct sk_buff *skb)
                       ip_local_deliver_finish);
 }
-static inline int ip_rcv_finish(struct sk_buff *skb)
+static inline int ip_rcv_options(struct sk_buff *skb)
 {
+        struct ip_options *opt;
+        struct iphdr *iph;
        struct net_device *dev = skb->dev;
+        /* It looks as overkill, because not all
+           IP options require packet mangling.
+           But it is the easiest for now, especially taking
+           into account that combination of IP options
+           and running sniffer is extremely rare condition.
+                                              --ANK (980813)
+        */
+        if (skb_cow(skb, skb_headroom(skb))) {
+                IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+                goto drop;
+        }
+        iph = skb->nh.iph;
+        if (ip_options_compile(NULL, skb)) {
+                IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+                goto drop;
+        }
+        opt = &(IPCB(skb)->opt);
+        if (unlikely(opt->srr)) {
+                struct in_device *in_dev = in_dev_get(dev);
+                if (in_dev) {
+                        if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
+                                if (IN_DEV_LOG_MARTIANS(in_dev) &&
+                                    net_ratelimit())
+                                        printk(KERN_INFO "source route option "
+                                               "%u.%u.%u.%u -> %u.%u.%u.%u\n",
+                                               NIPQUAD(iph->saddr),
+                                               NIPQUAD(iph->daddr));
+                                in_dev_put(in_dev);
+                                goto drop;
+                        }
+                        in_dev_put(in_dev);
+                }
+                if (ip_options_rcv_srr(skb))
+                        goto drop;
+        }
+        return 0;
+drop:
+        return -1;
+}
+static inline int ip_rcv_finish(struct sk_buff *skb)
+{
        struct iphdr *iph = skb->nh.iph;
-        int err;
        /*
         *      Initialise the virtual path cache for the packet. It describes
         *      how the packet travels inside Linux networking.
         */ 
-        if (skb->dst == NULL) {
+        if (likely(skb->dst == NULL)) {
-                if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
+                int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
+                                         skb->dev);
+                if (unlikely(err)) {
                        if (err == -EHOSTUNREACH)
                                IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
                        goto drop; 
@@ -298,7 +350,7 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
        }
 #ifdef CONFIG_NET_CLS_ROUTE
-        if (skb->dst->tclassid) {
+        if (unlikely(skb->dst->tclassid)) {
                struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id();
                u32 idx = skb->dst->tclassid;
                st[idx&0xFF].o_packets++;
@@ -308,48 +360,11 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
        }
 #endif
-        if (iph->ihl > 5) {
+        if (iph->ihl > 5 && ip_rcv_options(skb))
-                struct ip_options *opt;
+                goto drop;
-                /* It looks as overkill, because not all
-                   IP options require packet mangling.
-                   But it is the easiest for now, especially taking
-                   into account that combination of IP options
-                   and running sniffer is extremely rare condition.
-                                                      --ANK (980813)
-                */
-                if (skb_cow(skb, skb_headroom(skb))) {
-                        IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
-                        goto drop;
-                }
-                iph = skb->nh.iph;
-                if (ip_options_compile(NULL, skb))
-                        goto inhdr_error;
-                opt = &(IPCB(skb)->opt);
-                if (opt->srr) {
-                        struct in_device *in_dev = in_dev_get(dev);
-                        if (in_dev) {
-                                if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
-                                        if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
-                                                printk(KERN_INFO "source route option %u.%u.%u.%u -> %u.%u.%u.%u\n",
-                                                       NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
-                                        in_dev_put(in_dev);
-                                        goto drop;
-                                }
-                                in_dev_put(in_dev);
-                        }
-                        if (ip_options_rcv_srr(skb))
-                                goto drop;
-                }
-        }
        return dst_input(skb);
-inhdr_error:
-        IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
 drop:
        kfree_skb(skb);
        return NET_RX_DROP;
@@ -358,9 +373,10 @@ drop:
 /*
 *      Main IP Receive routine.
 */ 
-int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct iphdr *iph;
+        u32 len;
        /* When the interface is in promisc. mode, drop all the crap
         * that it receives, do not try to analyse it.
@@ -392,29 +408,27 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
         */
        if (iph->ihl < 5 || iph->version != 4)
-                goto inhdr_error; 
+                goto inhdr_error;
        if (!pskb_may_pull(skb, iph->ihl*4))
                goto inhdr_error;
        iph = skb->nh.iph;
-        if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+        if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
-                goto inhdr_error; 
+                goto inhdr_error;
-        {
+        len = ntohs(iph->tot_len);
-                __u32 len = ntohs(iph->tot_len); 
+        if (skb->len < len || len < (iph->ihl*4))
-                if (skb->len < len || len < (iph->ihl<<2))
+                goto inhdr_error;
-                        goto inhdr_error;
-                /* Our transport medium may have padded the buffer out. Now we know it
+        /* Our transport medium may have padded the buffer out. Now we know it
-                 * is IP we can trim to the true length of the frame.
+         * is IP we can trim to the true length of the frame.
-                 * Note this now means skb->len holds ntohs(iph->tot_len).
+         * Note this now means skb->len holds ntohs(iph->tot_len).
-                 */
+         */
-                if (pskb_trim_rcsum(skb, len)) {
+        if (pskb_trim_rcsum(skb, len)) {
-                        IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+                IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
-                        goto drop;
+                goto drop;
-                }
        }
        return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
@@ -428,5 +442,4 @@ out:
        return NET_RX_DROP;
 }
-EXPORT_SYMBOL(ip_rcv);
 EXPORT_SYMBOL(ip_statistics);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 6d89f3f3e701..bce4e875193b 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -489,23 +489,18 @@ void ip_options_undo(struct ip_options * opt)
        }
 }
-int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user)
+static struct ip_options *ip_options_get_alloc(const int optlen)
 {
-        struct ip_options *opt;
+        struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3),
+                                         GFP_KERNEL);
+        if (opt)
+                memset(opt, 0, sizeof(*opt));
+        return opt;
+}
-        opt = kmalloc(sizeof(struct ip_options)+((optlen+3)&~3), GFP_KERNEL);
+static int ip_options_get_finish(struct ip_options **optp,
-        if (!opt)
+                                 struct ip_options *opt, int optlen)
-                return -ENOMEM;
+{
-        memset(opt, 0, sizeof(struct ip_options));
-        if (optlen) {
-                if (user) {
-                        if (copy_from_user(opt->__data, data, optlen)) {
-                                kfree(opt);
-                                return -EFAULT;
-                        }
-                } else
-                        memcpy(opt->__data, data, optlen);
-        }
        while (optlen & 3)
                opt->__data[optlen++] = IPOPT_END;
        opt->optlen = optlen;
@@ -521,6 +516,30 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, in
        return 0;
 }
+int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen)
+{
+        struct ip_options *opt = ip_options_get_alloc(optlen);
+        if (!opt)
+                return -ENOMEM;
+        if (optlen && copy_from_user(opt->__data, data, optlen)) {
+                kfree(opt);
+                return -EFAULT;
+        }
+        return ip_options_get_finish(optp, opt, optlen);
+}
+int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen)
+{
+        struct ip_options *opt = ip_options_get_alloc(optlen);
+        if (!opt)
+                return -ENOMEM;
+        if (optlen)
+                memcpy(opt->__data, data, optlen);
+        return ip_options_get_finish(optp, opt, optlen);
+}
 void ip_forward_options(struct sk_buff *skb)
 {
        struct   ip_options * opt       = &(IPCB(skb)->opt);
@@ -620,6 +639,3 @@ int ip_options_rcv_srr(struct sk_buff *skb)
        }
        return 0;
 }
-EXPORT_SYMBOL(ip_options_compile);
-EXPORT_SYMBOL(ip_options_undo);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9de83e6e0f1d..3f1a263e1249 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -69,13 +69,10 @@
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
-#include <net/tcp.h>
-#include <net/udp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/arp.h>
 #include <net/icmp.h>
-#include <net/raw.h>
 #include <net/checksum.h>
 #include <net/inetpeer.h>
 #include <net/checksum.h>
@@ -84,12 +81,8 @@
 #include <linux/netfilter_bridge.h>
 #include <linux/mroute.h>
 #include <linux/netlink.h>
+#include <linux/tcp.h>
-/*
- *      Shall we try to damage output packets if routing dev changes?
- */
-int sysctl_ip_dynaddr;
 int sysctl_ip_default_ttl = IPDEFTTL;
 /* Generate a checksum for an outgoing IP datagram. */
@@ -107,7 +100,6 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
        newskb->pkt_type = PACKET_LOOPBACK;
        newskb->ip_summed = CHECKSUM_UNNECESSARY;
        BUG_TRAP(newskb->dst);
-        nf_reset(newskb);
        netif_rx(newskb);
        return 0;
 }
@@ -166,6 +158,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
                       dst_output);
 }
+EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 static inline int ip_finish_output2(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb->dst;
@@ -188,14 +182,6 @@ static inline int ip_finish_output2(struct sk_buff *skb)
                skb = skb2;
        }
-#ifdef CONFIG_BRIDGE_NETFILTER
-        /* bridge-netfilter defers calling some IP hooks to the bridge layer
-         * and still needs the conntrack reference.
-         */
-        if (skb->nf_bridge == NULL)
-#endif
-                nf_reset(skb);
        if (hh) {
                int hh_alen;
@@ -214,7 +200,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
        return -EINVAL;
 }
-int ip_finish_output(struct sk_buff *skb)
+static inline int ip_finish_output(struct sk_buff *skb)
 {
        struct net_device *dev = skb->dst->dev;
@@ -338,8 +324,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
                        if (ip_route_output_flow(&rt, &fl, sk, 0))
                                goto no_route;
                }
-                __sk_dst_set(sk, &rt->u.dst);
+                sk_setup_caps(sk, &rt->u.dst);
-                tcp_v4_setup_caps(sk, &rt->u.dst);
        }
        skb->dst = dst_clone(&rt->u.dst);
@@ -401,7 +386,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 #endif
 #ifdef CONFIG_NETFILTER
        to->nfmark = from->nfmark;
-        to->nfcache = from->nfcache;
        /* Connection association is same as pre-frag packet */
        nf_conntrack_put(to->nfct);
        to->nfct = from->nfct;
@@ -589,7 +573,7 @@ slow_path:
                 */
                if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
-                        NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
+                        NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
                        err = -ENOMEM;
                        goto fail;
                }
@@ -1338,12 +1322,7 @@ void __init ip_init(void)
 #endif
 }
-EXPORT_SYMBOL(ip_finish_output);
 EXPORT_SYMBOL(ip_fragment);
 EXPORT_SYMBOL(ip_generic_getfrag);
 EXPORT_SYMBOL(ip_queue_xmit);
 EXPORT_SYMBOL(ip_send_check);
-#ifdef CONFIG_SYSCTL
-EXPORT_SYMBOL(sysctl_ip_default_ttl);
-#endif
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fc7c481d0d79..2f0b47da5b37 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -153,7 +153,7 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
                switch (cmsg->cmsg_type) {
                case IP_RETOPTS:
                        err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
-                        err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0);
+                        err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40);
                        if (err)
                                return err;
                        break;
@@ -425,7 +425,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                        struct ip_options * opt = NULL;
                        if (optlen > 40 || optlen < 0)
                                goto e_inval;
-                        err = ip_options_get(&opt, optval, optlen, 1);
+                        err = ip_options_get_from_user(&opt, optval, optlen);
                        if (err)
                                break;
                        if (sk->sk_type == SOCK_STREAM) {
@@ -614,7 +614,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                }
                case IP_MSFILTER:
                {
-                        extern int sysctl_optmem_max;
                        extern int sysctl_igmp_max_msf;
                        struct ip_msfilter *msf;
@@ -769,7 +768,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                }
                case MCAST_MSFILTER:
                {
-                        extern int sysctl_optmem_max;
                        extern int sysctl_igmp_max_msf;
                        struct sockaddr_in *psin;
                        struct ip_msfilter *msf = NULL;
@@ -848,6 +846,9 @@ mc_msf_out:
 
                case IP_IPSEC_POLICY:
                case IP_XFRM_POLICY:
+                        err = -EPERM;
+                        if (!capable(CAP_NET_ADMIN))
+                                break;
                        err = xfrm_user_policy(sk, optname, optval, optlen);
                        break;
@@ -1087,7 +1088,5 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 EXPORT_SYMBOL(ip_cmsg_recv);
-#ifdef CONFIG_IP_SCTP_MODULE
 EXPORT_SYMBOL(ip_getsockopt);
 EXPORT_SYMBOL(ip_setsockopt);
-#endif
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 2065944fd9e5..fc718df17b40 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -214,8 +214,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
                              spi, IPPROTO_COMP, AF_INET);
        if (!x)
                return;
-        NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
+        NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
-               spi, NIPQUAD(iph->daddr)));
+                 spi, NIPQUAD(iph->daddr));
        xfrm_state_put(x);
 }
@@ -345,8 +345,7 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms)
        for_each_cpu(cpu) {
                struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
-                if (tfm)
+                crypto_free_tfm(tfm);
-                        crypto_free_tfm(tfm);
        }
        free_percpu(tfms);
 }
@@ -358,7 +357,7 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
        int cpu;
        /* This can be any valid CPU ID so we don't need locking. */
-        cpu = smp_processor_id();
+        cpu = raw_smp_processor_id();
        list_for_each_entry(pos, &ipcomp_tfms_list, list) {
                struct crypto_tfm *tfm;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index d2bf8e1930a3..953129d392d2 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -54,6 +54,7 @@
 #include <linux/major.h>
 #include <linux/root_dev.h>
 #include <linux/delay.h>
+#include <linux/nfs_fs.h>
 #include <net/arp.h>
 #include <net/ip.h>
 #include <net/ipconfig.h>
@@ -393,7 +394,7 @@ static int __init ic_defaults(void)
 #ifdef IPCONFIG_RARP
-static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt);
+static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
 static struct packet_type rarp_packet_type __initdata = {
        .type = __constant_htons(ETH_P_RARP),
@@ -414,7 +415,7 @@ static inline void ic_rarp_cleanup(void)
 *  Process received RARP packet.
 */
 static int __init
-ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct arphdr *rarp;
        unsigned char *rarp_ptr;
@@ -555,7 +556,7 @@ struct bootp_pkt {		/* BOOTP packet format */
 #define DHCPRELEASE     7
 #define DHCPINFORM      8
-static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt);
+static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
 static struct packet_type bootp_packet_type __initdata = {
        .type = __constant_htons(ETH_P_IP),
@@ -823,7 +824,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
 /*
 *  Receive BOOTP reply.
 */
-static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct bootp_pkt *b;
        struct iphdr *h;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 68a78731f722..c05c1df0bb04 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -255,7 +255,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
        dev_hold(dev);
        ipip_tunnel_link(nt);
-        /* Do not decrement MOD_USE_COUNT here. */
        return nt;
 failed:
@@ -273,7 +272,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
        dev_put(dev);
 }
-static void ipip_err(struct sk_buff *skb, void *__unused)
+static void ipip_err(struct sk_buff *skb, u32 info)
 {
 #ifndef I_WISH_WORLD_WERE_PERFECT
@@ -852,11 +851,39 @@ static int __init ipip_fb_tunnel_init(struct net_device *dev)
        return 0;
 }
+#ifdef CONFIG_INET_TUNNEL
 static struct xfrm_tunnel ipip_handler = {
        .handler        =       ipip_rcv,
        .err_handler    =       ipip_err,
 };
+static inline int ipip_register(void)
+{
+        return xfrm4_tunnel_register(&ipip_handler);
+}
+static inline int ipip_unregister(void)
+{
+        return xfrm4_tunnel_deregister(&ipip_handler);
+}
+#else
+static struct net_protocol ipip_protocol = {
+        .handler        =       ipip_rcv,
+        .err_handler    =       ipip_err,
+        .no_policy      =       1,
+};
+static inline int ipip_register(void)
+{
+        return inet_add_protocol(&ipip_protocol, IPPROTO_IPIP);
+}
+static inline int ipip_unregister(void)
+{
+        return inet_del_protocol(&ipip_protocol, IPPROTO_IPIP);
+}
+#endif
 static char banner[] __initdata =
        KERN_INFO "IPv4 over IPv4 tunneling driver\n";
@@ -866,7 +893,7 @@ static int __init ipip_init(void)
        printk(banner);
-        if (xfrm4_tunnel_register(&ipip_handler) < 0) {
+        if (ipip_register() < 0) {
                printk(KERN_INFO "ipip init: can't register tunnel\n");
                return -EAGAIN;
        }
@@ -888,16 +915,33 @@ static int __init ipip_init(void)
 err2:
        free_netdev(ipip_fb_tunnel_dev);
 err1:
-        xfrm4_tunnel_deregister(&ipip_handler);
+        ipip_unregister();
        goto out;
 }
+static void __exit ipip_destroy_tunnels(void)
+{
+        int prio;
+        for (prio = 1; prio < 4; prio++) {
+                int h;
+                for (h = 0; h < HASH_SIZE; h++) {
+                        struct ip_tunnel *t;
+                        while ((t = tunnels[prio][h]) != NULL)
+                                unregister_netdevice(t->dev);
+                }
+        }
+}
 static void __exit ipip_fini(void)
 {
-        if (xfrm4_tunnel_deregister(&ipip_handler) < 0)
+        if (ipip_unregister() < 0)
                printk(KERN_INFO "ipip close: can't deregister tunnel\n");
-        unregister_netdev(ipip_fb_tunnel_dev);
+        rtnl_lock();
+        ipip_destroy_tunnels();
+        unregister_netdevice(ipip_fb_tunnel_dev);
+        rtnl_unlock();
 }
 module_init(ipip_init);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7833d920bdba..9dbf5909f3a6 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -103,7 +103,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
   In this case data path is free of exclusive locks at all.
 */
-static kmem_cache_t *mrt_cachep;
+static kmem_cache_t *mrt_cachep __read_mostly;
 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
@@ -362,7 +362,7 @@ out:
 /* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
+static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 {
        int vifi;
@@ -727,7 +727,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
        if (c != NULL) {
                write_lock_bh(&mrt_lock);
                c->mfc_parent = mfc->mfcc_parent;
-                ipmr_update_threshoulds(c, mfc->mfcc_ttls);
+                ipmr_update_thresholds(c, mfc->mfcc_ttls);
                if (!mrtsock)
                        c->mfc_flags |= MFC_STATIC;
                write_unlock_bh(&mrt_lock);
@@ -744,7 +744,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
        c->mfc_origin=mfc->mfcc_origin.s_addr;
        c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
        c->mfc_parent=mfc->mfcc_parent;
-        ipmr_update_threshoulds(c, mfc->mfcc_ttls);
+        ipmr_update_thresholds(c, mfc->mfcc_ttls);
        if (!mrtsock)
                c->mfc_flags |= MFC_STATIC;
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 63a82b4b64bb..c9820bfc493a 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -2,11 +2,11 @@
 # IP Virtual Server configuration
 #
 menu    "IP: Virtual Server Configuration"
-        depends on INET && NETFILTER
+        depends on NETFILTER
 config  IP_VS
        tristate "IP virtual server support (EXPERIMENTAL)"
-        depends on INET && NETFILTER
+        depends on NETFILTER
        ---help---
          IP Virtual Server support will let you build a high-performance
          virtual server based on cluster of two or more real servers. This
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index d9212addd193..6e092dadb388 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -26,6 +26,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/protocol.h>
+#include <net/tcp.h>
 #include <asm/system.h>
 #include <linux/stat.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 9f16ab309106..e11952ea17af 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -40,7 +40,7 @@
 static struct list_head *ip_vs_conn_tab;
 /*  SLAB cache for IPVS connections */
-static kmem_cache_t *ip_vs_conn_cachep;
+static kmem_cache_t *ip_vs_conn_cachep __read_mostly;
 /*  counter for current IPVS connections */
 static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
@@ -758,7 +758,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
        return 1;
 }
+/* Called from keventd and must protect itself from softirqs */
 void ip_vs_random_dropentry(void)
 {
        int idx;
@@ -773,7 +773,7 @@ void ip_vs_random_dropentry(void)
                /*
                 *  Lock is actually needed in this loop.
                 */
-                ct_write_lock(hash);
+                ct_write_lock_bh(hash);
                list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
                        if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT))
@@ -806,7 +806,7 @@ void ip_vs_random_dropentry(void)
                                ip_vs_conn_expire_now(cp->control);
                        }
                }
-                ct_write_unlock(hash);
+                ct_write_unlock_bh(hash);
        }
 }
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 5fb257dd07cb..3ac7eeca04ac 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -22,6 +22,7 @@
 *
 * Changes:
 *      Paul `Rusty' Russell            properly handle non-linear skbs
+ *      Harald Welte                    don't use nfcache
 *
 */
@@ -529,7 +530,7 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
                                       const struct net_device *out,
                                       int (*okfn)(struct sk_buff *))
 {
-        if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY))
+        if (!((*pskb)->ipvs_property))
                return NF_ACCEPT;
        /* The packet was sent from IPVS, exit this chain */
@@ -701,7 +702,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
        /* do the statistics and put it back */
        ip_vs_out_stats(cp, skb);
-        skb->nfcache |= NFC_IPVS_PROPERTY;
+        skb->ipvs_property = 1;
        verdict = NF_ACCEPT;
  out:
@@ -739,7 +740,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
        EnterFunction(11);
-        if (skb->nfcache & NFC_IPVS_PROPERTY)
+        if (skb->ipvs_property)
                return NF_ACCEPT;
        iph = skb->nh.iph;
@@ -821,7 +822,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
        ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
        ip_vs_conn_put(cp);
-        skb->nfcache |= NFC_IPVS_PROPERTY;
+        skb->ipvs_property = 1;
        LeaveFunction(11);
        return NF_ACCEPT;
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 12a82e91d22a..2d66848e7aa0 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -90,7 +90,8 @@ int ip_vs_get_debug_level(void)
 #endif
 /*
- *      update_defense_level is called from keventd and from sysctl.
+ *      update_defense_level is called from keventd and from sysctl,
+ *      so it needs to protect itself from softirqs
 */
 static void update_defense_level(void)
 {
@@ -110,6 +111,8 @@ static void update_defense_level(void)
        nomem = (availmem < sysctl_ip_vs_amemthresh);
+        local_bh_disable();
        /* drop_entry */
        spin_lock(&__ip_vs_dropentry_lock);
        switch (sysctl_ip_vs_drop_entry) {
@@ -206,6 +209,8 @@ static void update_defense_level(void)
        if (to_change >= 0)
                ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
        write_unlock(&__ip_vs_securetcp_lock);
+        local_bh_enable();
 }
@@ -1360,9 +1365,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
                        /* Restore the correct value */
                        *valp = val;
                } else {
-                        local_bh_disable();
                        update_defense_level();
-                        local_bh_enable();
                }
        }
        return rc;
@@ -1595,7 +1598,7 @@ static ctl_table vs_table[] = {
        { .ctl_name = 0 }
 };
-static ctl_table ipv4_table[] = {
+static ctl_table ipvs_ipv4_table[] = {
        {
                .ctl_name       = NET_IPV4,
                .procname       = "ipv4",
@@ -1610,7 +1613,7 @@ static ctl_table vs_root_table[] = {
                .ctl_name       = CTL_NET,
                .procname       = "net",
                .mode           = 0555,
-                .child          = ipv4_table,
+                .child          = ipvs_ipv4_table,
        },
        { .ctl_name = 0 }
 };
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index c035838b780a..561cda326fa8 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -131,7 +131,7 @@ static ctl_table vs_table[] = {
        { .ctl_name = 0 }
 };
-static ctl_table ipv4_table[] = {
+static ctl_table ipvs_ipv4_table[] = {
        {
                .ctl_name       = NET_IPV4,
                .procname       = "ipv4", 
@@ -146,7 +146,7 @@ static ctl_table lblc_root_table[] = {
                .ctl_name       = CTL_NET,
                .procname       = "net", 
                .mode           = 0555, 
-                .child          = ipv4_table
+                .child          = ipvs_ipv4_table
        },
        { .ctl_name = 0 }
 };
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 22b5dd55d271..ce456dbf09a5 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -320,7 +320,7 @@ static ctl_table vs_table[] = {
        { .ctl_name = 0 }
 };
-static ctl_table ipv4_table[] = {
+static ctl_table ipvs_ipv4_table[] = {
        {
                .ctl_name       = NET_IPV4,
                .procname       = "ipv4", 
@@ -335,7 +335,7 @@ static ctl_table lblcr_root_table[] = {
                .ctl_name       = CTL_NET,
                .procname       = "net", 
                .mode           = 0555, 
-                .child          = ipv4_table
+                .child          = ipvs_ipv4_table
        },
        { .ctl_name = 0 }
 };
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index e65de675da74..c19408973c09 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -604,14 +604,14 @@ void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
 }
-static void tcp_init(struct ip_vs_protocol *pp)
+static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
 {
        IP_VS_INIT_HASH_TABLE(tcp_apps);
        pp->timeout_table = tcp_timeouts;
 }
-static void tcp_exit(struct ip_vs_protocol *pp)
+static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
 {
 }
@@ -621,8 +621,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
        .protocol =             IPPROTO_TCP,
        .dont_defrag =          0,
        .appcnt =               ATOMIC_INIT(0),
-        .init =                 tcp_init,
+        .init =                 ip_vs_tcp_init,
-        .exit =                 tcp_exit,
+        .exit =                 ip_vs_tcp_exit,
        .register_app =         tcp_register_app,
        .unregister_app =       tcp_unregister_app,
        .conn_schedule =        tcp_conn_schedule,
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index a8512a3fd08a..3b87482049cf 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -127,7 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
 #define IP_VS_XMIT(skb, rt)                             \
 do {                                                    \
-        (skb)->nfcache |= NFC_IPVS_PROPERTY;            \
+        (skb)->ipvs_property = 1;                       \
        (skb)->ip_summed = CHECKSUM_NONE;               \
        NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL,  \
                (rt)->u.dst.dev, dst_output);           \
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index c9cf8726051d..db67373f9b34 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -107,7 +107,7 @@ static int drr_dev_event(struct notifier_block *this,
        return NOTIFY_DONE;
 }
-struct notifier_block drr_dev_notifier = {
+static struct notifier_block drr_dev_notifier = {
        .notifier_call  = drr_dev_event,
 };
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
new file mode 100644
index 000000000000..ae0779d82c5d
--- /dev/null
+++ b/net/ipv4/netfilter.c
@@ -0,0 +1,139 @@
+/* IPv4 specific functions of netfilter core */
+#include <linux/config.h>
+#ifdef CONFIG_NETFILTER
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/route.h>
+#include <linux/ip.h>
+/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
+int ip_route_me_harder(struct sk_buff **pskb)
+{
+        struct iphdr *iph = (*pskb)->nh.iph;
+        struct rtable *rt;
+        struct flowi fl = {};
+        struct dst_entry *odst;
+        unsigned int hh_len;
+        /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
+         * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
+         */
+        if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
+                fl.nl_u.ip4_u.daddr = iph->daddr;
+                fl.nl_u.ip4_u.saddr = iph->saddr;
+                fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+                fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
+#ifdef CONFIG_IP_ROUTE_FWMARK
+                fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
+#endif
+                fl.proto = iph->protocol;
+                if (ip_route_output_key(&rt, &fl) != 0)
+                        return -1;
+                /* Drop old route. */
+                dst_release((*pskb)->dst);
+                (*pskb)->dst = &rt->u.dst;
+        } else {
+                /* non-local src, find valid iif to satisfy
+                 * rp-filter when calling ip_route_input. */
+                fl.nl_u.ip4_u.daddr = iph->saddr;
+                if (ip_route_output_key(&rt, &fl) != 0)
+                        return -1;
+                odst = (*pskb)->dst;
+                if (ip_route_input(*pskb, iph->daddr, iph->saddr,
+                                   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
+                        dst_release(&rt->u.dst);
+                        return -1;
+                }
+                dst_release(&rt->u.dst);
+                dst_release(odst);
+        }
+        
+        if ((*pskb)->dst->error)
+                return -1;
+        /* Change in oif may mean change in hh_len. */
+        hh_len = (*pskb)->dst->dev->hard_header_len;
+        if (skb_headroom(*pskb) < hh_len) {
+                struct sk_buff *nskb;
+                nskb = skb_realloc_headroom(*pskb, hh_len);
+                if (!nskb) 
+                        return -1;
+                if ((*pskb)->sk)
+                        skb_set_owner_w(nskb, (*pskb)->sk);
+                kfree_skb(*pskb);
+                *pskb = nskb;
+        }
+        return 0;
+}
+EXPORT_SYMBOL(ip_route_me_harder);
+/*
+ * Extra routing may needed on local out, as the QUEUE target never
+ * returns control to the table.
+ */
+struct ip_rt_info {
+        u_int32_t daddr;
+        u_int32_t saddr;
+        u_int8_t tos;
+};
+static void queue_save(const struct sk_buff *skb, struct nf_info *info)
+{
+        struct ip_rt_info *rt_info = nf_info_reroute(info);
+        if (info->hook == NF_IP_LOCAL_OUT) {
+                const struct iphdr *iph = skb->nh.iph;
+                rt_info->tos = iph->tos;
+                rt_info->daddr = iph->daddr;
+                rt_info->saddr = iph->saddr;
+        }
+}
+static int queue_reroute(struct sk_buff **pskb, const struct nf_info *info)
+{
+        const struct ip_rt_info *rt_info = nf_info_reroute(info);
+        if (info->hook == NF_IP_LOCAL_OUT) {
+                struct iphdr *iph = (*pskb)->nh.iph;
+                if (!(iph->tos == rt_info->tos
+                      && iph->daddr == rt_info->daddr
+                      && iph->saddr == rt_info->saddr))
+                        return ip_route_me_harder(pskb);
+        }
+        return 0;
+}
+static struct nf_queue_rerouter ip_reroute = {
+        .rer_size       = sizeof(struct ip_rt_info),
+        .save           = queue_save,
+        .reroute        = queue_reroute,
+};
+static int init(void)
+{
+        return nf_register_queue_rerouter(PF_INET, &ip_reroute);
+}
+static void fini(void)
+{
+        nf_unregister_queue_rerouter(PF_INET);
+}
+module_init(init);
+module_exit(fini);
+#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 46d4cb1c06f0..e046f5521814 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -40,6 +40,16 @@ config IP_NF_CONNTRACK_MARK
          of packets, but this mark value is kept in the conntrack session
          instead of the individual packets.
        
+config IP_NF_CONNTRACK_EVENTS
+        bool "Connection tracking events"
+        depends on IP_NF_CONNTRACK
+        help
+          If this option is enabled, the connection tracking code will
+          provide a notifier chain that can be used by other kernel code
+          to get notified about changes in the connection tracking state.
+          
+          IF unsure, say `N'.
 config IP_NF_CT_PROTO_SCTP
        tristate  'SCTP protocol connection tracking support (EXPERIMENTAL)'
        depends on IP_NF_CONNTRACK && EXPERIMENTAL
@@ -100,11 +110,15 @@ config IP_NF_AMANDA
          To compile it as a module, choose M here.  If unsure, say Y.
 config IP_NF_QUEUE
-        tristate "Userspace queueing via NETLINK"
+        tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
        help
          Netfilter has the ability to queue packets to user space: the
          netlink device can be used to access them using this driver.
+          This option enables the old IPv4-only "ip_queue" implementation
+          which has been obsoleted by the new "nfnetlink_queue" code (see
+          CONFIG_NETFILTER_NETLINK_QUEUE).
          To compile it as a module, choose M here.  If unsure, say N.
 config IP_NF_IPTABLES
@@ -340,6 +354,17 @@ config IP_NF_MATCH_SCTP
          If you want to compile it as a module, say M here and read
          <file:Documentation/modules.txt>.  If unsure, say `N'.
+config IP_NF_MATCH_DCCP
+        tristate  'DCCP protocol match support'
+        depends on IP_NF_IPTABLES
+        help
+          With this option enabled, you will be able to use the iptables
+          `dccp' match in order to match on DCCP source/destination ports
+          and DCCP flags.
+          If you want to compile it as a module, say M here and read
+          <file:Documentation/modules.txt>.  If unsure, say `N'.
 config IP_NF_MATCH_COMMENT
        tristate  'comment match support'
        depends on IP_NF_IPTABLES
@@ -361,6 +386,16 @@ config IP_NF_MATCH_CONNMARK
          <file:Documentation/modules.txt>.  The module will be called
          ipt_connmark.o.  If unsure, say `N'.
+config IP_NF_MATCH_CONNBYTES
+        tristate  'Connection byte/packet counter match support'
+        depends on IP_NF_CT_ACCT && IP_NF_IPTABLES
+        help
+          This option adds a `connbytes' match, which allows you to match the
+          number of bytes and/or packets for each direction within a connection.
+          If you want to compile it as a module, say M here and read
+          <file:Documentation/modules.txt>.  If unsure, say `N'.
 config IP_NF_MATCH_HASHLIMIT
        tristate  'hashlimit match support'
        depends on IP_NF_IPTABLES
@@ -375,6 +410,19 @@ config IP_NF_MATCH_HASHLIMIT
          destination IP' or `500pps from any given source IP'  with a single
          IPtables rule.
+config IP_NF_MATCH_STRING
+        tristate  'string match support'
+        depends on IP_NF_IPTABLES 
+        select TEXTSEARCH
+        select TEXTSEARCH_KMP
+        select TEXTSEARCH_BM
+        select TEXTSEARCH_FSM
+        help
+          This option adds a `string' match, which allows you to look for
+          pattern matchings in packets.
+          To compile it as a module, choose M here.  If unsure, say N.
 # `filter', generic and specific targets
 config IP_NF_FILTER
        tristate "Packet filtering"
@@ -616,6 +664,20 @@ config IP_NF_TARGET_CLASSIFY
          To compile it as a module, choose M here.  If unsure, say N.
+config IP_NF_TARGET_TTL
+        tristate  'TTL target support'
+        depends on IP_NF_MANGLE
+        help
+          This option adds a `TTL' target, which enables the user to modify
+          the TTL value of the IP header.
+          While it is safe to decrement/lower the TTL, this target also enables
+          functionality to increment and set the TTL value of the IP header to
+          arbitrary values.  This is EXTREMELY DANGEROUS since you can easily
+          create immortal packets that loop forever on the network.
+          To compile it as a module, choose M here.  If unsure, say N.
 config IP_NF_TARGET_CONNMARK
        tristate  'CONNMARK target support'
        depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE
@@ -692,5 +754,11 @@ config IP_NF_ARP_MANGLE
          Allows altering the ARP packet payload: source and destination
          hardware and network addresses.
+config IP_NF_CONNTRACK_NETLINK
+        tristate 'Connection tracking netlink interface'
+        depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
+        help
+          This option enables support for a netlink-based userspace interface
 endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 45796d5924dd..a7bd38f50522 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -9,6 +9,10 @@ iptable_nat-objs	:= ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helpe
 # connection tracking
 obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+# conntrack netlink interface
+obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
 # SCTP protocol connection tracking
 obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
@@ -38,6 +42,7 @@ obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
 obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
 obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
 obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
+obj-$(CONFIG_IP_NF_MATCH_DCCP) += ipt_dccp.o
 obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
 obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
 obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
@@ -54,11 +59,13 @@ obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
 obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
 obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
 obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
+obj-$(CONFIG_IP_NF_MATCH_CONNBYTES) += ipt_connbytes.o
 obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
 obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
 obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
 obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
+obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
 # targets
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
@@ -78,6 +85,7 @@ obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
 obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
+obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
 # generic ARP tables
 obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
@@ -87,3 +95,4 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
 obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
+obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index a78a320eee08..be4c9eb3243f 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -40,7 +40,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
 static char *conns[] = { "DATA ", "MESG ", "INDEX " };
 /* This is slow, but it's simple. --RR */
-static char amanda_buffer[65536];
+static char *amanda_buffer;
 static DEFINE_SPINLOCK(amanda_buffer_lock);
 unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
@@ -101,14 +101,13 @@ static int help(struct sk_buff **pskb,
                if (port == 0 || len > 5)
                        break;
-                exp = ip_conntrack_expect_alloc();
+                exp = ip_conntrack_expect_alloc(ct);
                if (exp == NULL) {
                        ret = NF_DROP;
                        goto out;
                }
                exp->expectfn = NULL;
-                exp->master = ct;
                exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
                exp->tuple.src.u.tcp.port = 0;
@@ -126,10 +125,9 @@ static int help(struct sk_buff **pskb,
                        ret = ip_nat_amanda_hook(pskb, ctinfo,
                                                 tmp - amanda_buffer,
                                                 len, exp);
-                else if (ip_conntrack_expect_related(exp) != 0) {
+                else if (ip_conntrack_expect_related(exp) != 0)
-                        ip_conntrack_expect_free(exp);
                        ret = NF_DROP;
-                }
+                ip_conntrack_expect_put(exp);
        }
 out:
@@ -155,11 +153,25 @@ static struct ip_conntrack_helper amanda_helper = {
 static void __exit fini(void)
 {
        ip_conntrack_helper_unregister(&amanda_helper);
+        kfree(amanda_buffer);
 }
 static int __init init(void)
 {
-        return ip_conntrack_helper_register(&amanda_helper);
+        int ret;
+        amanda_buffer = kmalloc(65536, GFP_KERNEL);
+        if (!amanda_buffer)
+                return -ENOMEM;
+        ret = ip_conntrack_helper_register(&amanda_helper);
+        if (ret < 0) {
+                kfree(amanda_buffer);
+                return ret;
+        }
+        return 0;
 }
 module_init(init);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 4b78ebeb6635..a0648600190e 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -37,6 +37,7 @@
 #include <linux/err.h>
 #include <linux/percpu.h>
 #include <linux/moduleparam.h>
+#include <linux/notifier.h>
 /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
   registrations, conntrack timers*/
@@ -49,7 +50,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/listhelp.h>
-#define IP_CONNTRACK_VERSION    "2.1"
+#define IP_CONNTRACK_VERSION    "2.3"
 #if 0
 #define DEBUGP printk
@@ -69,21 +70,80 @@ static LIST_HEAD(helpers);
 unsigned int ip_conntrack_htable_size = 0;
 int ip_conntrack_max;
 struct list_head *ip_conntrack_hash;
-static kmem_cache_t *ip_conntrack_cachep;
+static kmem_cache_t *ip_conntrack_cachep __read_mostly;
-static kmem_cache_t *ip_conntrack_expect_cachep;
+static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
 struct ip_conntrack ip_conntrack_untracked;
 unsigned int ip_ct_log_invalid;
 static LIST_HEAD(unconfirmed);
 static int ip_conntrack_vmalloc;
-DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
+static unsigned int ip_conntrack_next_id = 1;
+static unsigned int ip_conntrack_expect_next_id = 1;
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+struct notifier_block *ip_conntrack_chain;
+struct notifier_block *ip_conntrack_expect_chain;
+DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
+/* deliver cached events and clear cache entry - must be called with locally
+ * disabled softirqs */
+static inline void
+__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
+{
+        DEBUGP("ecache: delivering events for %p\n", ecache->ct);
+        if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
+                notifier_call_chain(&ip_conntrack_chain, ecache->events,
+                                    ecache->ct);
+        ecache->events = 0;
+        ip_conntrack_put(ecache->ct);
+        ecache->ct = NULL;
+}
+/* Deliver all cached events for a particular conntrack. This is called
+ * by code prior to async packet handling or freeing the skb */
+void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
+{
+        struct ip_conntrack_ecache *ecache;
+        
+        local_bh_disable();
+        ecache = &__get_cpu_var(ip_conntrack_ecache);
+        if (ecache->ct == ct)
+                __ip_ct_deliver_cached_events(ecache);
+        local_bh_enable();
+}
+void __ip_ct_event_cache_init(struct ip_conntrack *ct)
+{
+        struct ip_conntrack_ecache *ecache;
+        /* take care of delivering potentially old events */
+        ecache = &__get_cpu_var(ip_conntrack_ecache);
+        BUG_ON(ecache->ct == ct);
+        if (ecache->ct)
+                __ip_ct_deliver_cached_events(ecache);
+        /* initialize for this conntrack/packet */
+        ecache->ct = ct;
+        nf_conntrack_get(&ct->ct_general);
+}
-void 
+/* flush the event cache - touches other CPU's data and must not be called while
-ip_conntrack_put(struct ip_conntrack *ct)
+ * packets are still passing through the code */
+static void ip_ct_event_cache_flush(void)
 {
-        IP_NF_ASSERT(ct);
+        struct ip_conntrack_ecache *ecache;
-        nf_conntrack_put(&ct->ct_general);
+        int cpu;
+        for_each_cpu(cpu) {
+                ecache = &per_cpu(ip_conntrack_ecache, cpu);
+                if (ecache->ct)
+                        ip_conntrack_put(ecache->ct);
+        }
 }
+#else
+static inline void ip_ct_event_cache_flush(void) {}
+#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
+DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
 static int ip_conntrack_hash_rnd_initted;
 static unsigned int ip_conntrack_hash_rnd;
@@ -137,20 +197,20 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
 /* ip_conntrack_expect helper functions */
-static void destroy_expect(struct ip_conntrack_expect *exp)
+static void unlink_expect(struct ip_conntrack_expect *exp)
 {
-        ip_conntrack_put(exp->master);
+        ASSERT_WRITE_LOCK(&ip_conntrack_lock);
        IP_NF_ASSERT(!timer_pending(&exp->timeout));
-        kmem_cache_free(ip_conntrack_expect_cachep, exp);
+        list_del(&exp->list);
        CONNTRACK_STAT_INC(expect_delete);
+        exp->master->expecting--;
+        ip_conntrack_expect_put(exp);
 }
-static void unlink_expect(struct ip_conntrack_expect *exp)
+void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp)
 {
-        ASSERT_WRITE_LOCK(&ip_conntrack_lock);
+        unlink_expect(exp);
-        list_del(&exp->list);
+        ip_conntrack_expect_put(exp);
-        /* Logically in destroy_expect, but we hold the lock here. */
-        exp->master->expecting--;
 }
 static void expectation_timed_out(unsigned long ul_expect)
@@ -160,7 +220,34 @@ static void expectation_timed_out(unsigned long ul_expect)
        write_lock_bh(&ip_conntrack_lock);
        unlink_expect(exp);
        write_unlock_bh(&ip_conntrack_lock);
-        destroy_expect(exp);
+        ip_conntrack_expect_put(exp);
+}
+struct ip_conntrack_expect *
+__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
+{
+        struct ip_conntrack_expect *i;
+        
+        list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+                if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+                        atomic_inc(&i->use);
+                        return i;
+                }
+        }
+        return NULL;
+}
+/* Just find a expectation corresponding to a tuple. */
+struct ip_conntrack_expect *
+ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
+{
+        struct ip_conntrack_expect *i;
+        
+        read_lock_bh(&ip_conntrack_lock);
+        i = __ip_conntrack_expect_find(tuple);
+        read_unlock_bh(&ip_conntrack_lock);
+        return i;
 }
 /* If an expectation for this connection is found, it gets delete from
@@ -187,7 +274,7 @@ find_expectation(const struct ip_conntrack_tuple *tuple)
 }
 /* delete all expectations for this conntrack */
-static void remove_expectations(struct ip_conntrack *ct)
+void ip_ct_remove_expectations(struct ip_conntrack *ct)
 {
        struct ip_conntrack_expect *i, *tmp;
@@ -198,7 +285,7 @@ static void remove_expectations(struct ip_conntrack *ct)
        list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
                if (i->master == ct && del_timer(&i->timeout)) {
                        unlink_expect(i);
-                        destroy_expect(i);
+                        ip_conntrack_expect_put(i);
                }
        }
 }
@@ -217,7 +304,7 @@ clean_from_lists(struct ip_conntrack *ct)
        LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
        /* Destroy all pending expectations */
-        remove_expectations(ct);
+        ip_ct_remove_expectations(ct);
 }
 static void
@@ -230,10 +317,13 @@ destroy_conntrack(struct nf_conntrack *nfct)
        IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
        IP_NF_ASSERT(!timer_pending(&ct->timeout));
+        ip_conntrack_event(IPCT_DESTROY, ct);
+        set_bit(IPS_DYING_BIT, &ct->status);
        /* To make sure we don't get any weird locking issues here:
         * destroy_conntrack() MUST NOT be called with a write lock
         * to ip_conntrack_lock!!! -HW */
-        proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+        proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
        if (proto && proto->destroy)
                proto->destroy(ct);
@@ -245,7 +335,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
         * except TFTP can create an expectation on the first packet,
         * before connection is in the list, so we need to clean here,
         * too. */
-        remove_expectations(ct);
+        ip_ct_remove_expectations(ct);
        /* We overload first tuple to link into unconfirmed list. */
        if (!is_confirmed(ct)) {
@@ -260,8 +350,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
                ip_conntrack_put(ct->master);
        DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
-        kmem_cache_free(ip_conntrack_cachep, ct);
+        ip_conntrack_free(ct);
-        atomic_dec(&ip_conntrack_count);
 }
 static void death_by_timeout(unsigned long ul_conntrack)
@@ -287,7 +376,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
                && ip_ct_tuple_equal(tuple, &i->tuple);
 }
-static struct ip_conntrack_tuple_hash *
+struct ip_conntrack_tuple_hash *
 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
                    const struct ip_conntrack *ignored_conntrack)
 {
@@ -322,6 +411,29 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
        return h;
 }
+static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
+                                        unsigned int hash,
+                                        unsigned int repl_hash) 
+{
+        ct->id = ++ip_conntrack_next_id;
+        list_prepend(&ip_conntrack_hash[hash],
+                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+        list_prepend(&ip_conntrack_hash[repl_hash],
+                     &ct->tuplehash[IP_CT_DIR_REPLY].list);
+}
+void ip_conntrack_hash_insert(struct ip_conntrack *ct)
+{
+        unsigned int hash, repl_hash;
+        hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+        repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+        write_lock_bh(&ip_conntrack_lock);
+        __ip_conntrack_hash_insert(ct, hash, repl_hash);
+        write_unlock_bh(&ip_conntrack_lock);
+}
 /* Confirm a connection given skb; places it in hash table */
 int
 __ip_conntrack_confirm(struct sk_buff **pskb)
@@ -368,10 +480,7 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
                /* Remove from unconfirmed list */
                list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-                list_prepend(&ip_conntrack_hash[hash],
+                __ip_conntrack_hash_insert(ct, hash, repl_hash);
-                             &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-                list_prepend(&ip_conntrack_hash[repl_hash],
-                             &ct->tuplehash[IP_CT_DIR_REPLY]);
                /* Timer relative to confirmation time, not original
                   setting time, otherwise we'd get timer wrap in
                   weird delay cases. */
@@ -381,6 +490,16 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
                set_bit(IPS_CONFIRMED_BIT, &ct->status);
                CONNTRACK_STAT_INC(insert);
                write_unlock_bh(&ip_conntrack_lock);
+                if (ct->helper)
+                        ip_conntrack_event_cache(IPCT_HELPER, *pskb);
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+                if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+                    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+                        ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
+#endif
+                ip_conntrack_event_cache(master_ct(ct) ?
+                                         IPCT_RELATED : IPCT_NEW, *pskb);
                return NF_ACCEPT;
        }
@@ -445,34 +564,84 @@ static inline int helper_cmp(const struct ip_conntrack_helper *i,
        return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
 }
-static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
+static struct ip_conntrack_helper *
+__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
 {
        return LIST_FIND(&helpers, helper_cmp,
                         struct ip_conntrack_helper *,
                         tuple);
 }
-/* Allocate a new conntrack: we return -ENOMEM if classification
+struct ip_conntrack_helper *
-   failed due to stress.  Otherwise it really is unclassifiable. */
+ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
-static struct ip_conntrack_tuple_hash *
+{
-init_conntrack(const struct ip_conntrack_tuple *tuple,
+        struct ip_conntrack_helper *helper;
-               struct ip_conntrack_protocol *protocol,
-               struct sk_buff *skb)
+        /* need ip_conntrack_lock to assure that helper exists until
+         * try_module_get() is called */
+        read_lock_bh(&ip_conntrack_lock);
+        helper = __ip_conntrack_helper_find(tuple);
+        if (helper) {
+                /* need to increase module usage count to assure helper will
+                 * not go away while the caller is e.g. busy putting a
+                 * conntrack in the hash that uses the helper */
+                if (!try_module_get(helper->me))
+                        helper = NULL;
+        }
+        read_unlock_bh(&ip_conntrack_lock);
+        return helper;
+}
+void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
+{
+        module_put(helper->me);
+}
+struct ip_conntrack_protocol *
+__ip_conntrack_proto_find(u_int8_t protocol)
+{
+        return ip_ct_protos[protocol];
+}
+/* this is guaranteed to always return a valid protocol helper, since
+ * it falls back to generic_protocol */
+struct ip_conntrack_protocol *
+ip_conntrack_proto_find_get(u_int8_t protocol)
+{
+        struct ip_conntrack_protocol *p;
+        preempt_disable();
+        p = __ip_conntrack_proto_find(protocol);
+        if (p) {
+                if (!try_module_get(p->me))
+                        p = &ip_conntrack_generic_protocol;
+        }
+        preempt_enable();
+        
+        return p;
+}
+void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
+{
+        module_put(p->me);
+}
+struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
+                                        struct ip_conntrack_tuple *repl)
 {
        struct ip_conntrack *conntrack;
-        struct ip_conntrack_tuple repl_tuple;
-        size_t hash;
-        struct ip_conntrack_expect *exp;
        if (!ip_conntrack_hash_rnd_initted) {
                get_random_bytes(&ip_conntrack_hash_rnd, 4);
                ip_conntrack_hash_rnd_initted = 1;
        }
-        hash = hash_conntrack(tuple);
        if (ip_conntrack_max
            && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+                unsigned int hash = hash_conntrack(orig);
                /* Try dropping from this hash chain. */
                if (!early_drop(&ip_conntrack_hash[hash])) {
                        if (net_ratelimit())
@@ -483,11 +652,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
                }
        }
-        if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
-                DEBUGP("Can't invert tuple.\n");
-                return NULL;
-        }
        conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
        if (!conntrack) {
                DEBUGP("Can't allocate conntrack.\n");
@@ -497,17 +661,50 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
        memset(conntrack, 0, sizeof(*conntrack));
        atomic_set(&conntrack->ct_general.use, 1);
        conntrack->ct_general.destroy = destroy_conntrack;
-        conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
+        conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
-        conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
+        conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
-        if (!protocol->new(conntrack, skb)) {
-                kmem_cache_free(ip_conntrack_cachep, conntrack);
-                return NULL;
-        }
        /* Don't set timer yet: wait for confirmation */
        init_timer(&conntrack->timeout);
        conntrack->timeout.data = (unsigned long)conntrack;
        conntrack->timeout.function = death_by_timeout;
+        atomic_inc(&ip_conntrack_count);
+        return conntrack;
+}
+void
+ip_conntrack_free(struct ip_conntrack *conntrack)
+{
+        atomic_dec(&ip_conntrack_count);
+        kmem_cache_free(ip_conntrack_cachep, conntrack);
+}
+/* Allocate a new conntrack: we return -ENOMEM if classification
+ * failed due to stress.   Otherwise it really is unclassifiable */
+static struct ip_conntrack_tuple_hash *
+init_conntrack(struct ip_conntrack_tuple *tuple,
+               struct ip_conntrack_protocol *protocol,
+               struct sk_buff *skb)
+{
+        struct ip_conntrack *conntrack;
+        struct ip_conntrack_tuple repl_tuple;
+        struct ip_conntrack_expect *exp;
+        if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
+                DEBUGP("Can't invert tuple.\n");
+                return NULL;
+        }
+        conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
+        if (conntrack == NULL || IS_ERR(conntrack))
+                return (struct ip_conntrack_tuple_hash *)conntrack;
+        if (!protocol->new(conntrack, skb)) {
+                ip_conntrack_free(conntrack);
+                return NULL;
+        }
        write_lock_bh(&ip_conntrack_lock);
        exp = find_expectation(tuple);
@@ -517,13 +714,18 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
                /* Welcome, Mr. Bond.  We've been expecting you... */
                __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
                conntrack->master = exp->master;
-#if CONFIG_IP_NF_CONNTRACK_MARK
+#ifdef CONFIG_IP_NF_CONNTRACK_MARK
                conntrack->mark = exp->master->mark;
 #endif
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+    defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+                /* this is ugly, but there is no other place where to put it */
+                conntrack->nat.masq_index = exp->master->nat.masq_index;
+#endif
                nf_conntrack_get(&conntrack->master->ct_general);
                CONNTRACK_STAT_INC(expect_new);
        } else {
-                conntrack->helper = ip_ct_find_helper(&repl_tuple);
+                conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
                CONNTRACK_STAT_INC(new);
        }
@@ -531,13 +733,12 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
        /* Overload tuple linked list to put us in unconfirmed list. */
        list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
-        atomic_inc(&ip_conntrack_count);
        write_unlock_bh(&ip_conntrack_lock);
        if (exp) {
                if (exp->expectfn)
                        exp->expectfn(conntrack, exp);
-                destroy_expect(exp);
+                ip_conntrack_expect_put(exp);
        }
        return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
@@ -609,7 +810,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
        struct ip_conntrack *ct;
        enum ip_conntrack_info ctinfo;
        struct ip_conntrack_protocol *proto;
-        int set_reply;
+        int set_reply = 0;
        int ret;
        /* Previously seen (loopback or untracked)?  Ignore. */
@@ -627,9 +828,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
                return NF_DROP;
        }
-        /* FIXME: Do this right please. --RR */
-        (*pskb)->nfcache |= NFC_UNKNOWN;
 /* Doesn't cover locally-generated broadcast, so not worth it. */
 #if 0
        /* Ignore broadcast: no `connection'. */
@@ -645,7 +843,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
        }
 #endif
-        proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
+        proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
        /* It may be an special packet, error, unclean...
         * inverse of the return code tells to the netfilter
@@ -681,8 +879,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
                return -ret;
        }
-        if (set_reply)
+        if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
-                set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
+                ip_conntrack_event_cache(IPCT_STATUS, *pskb);
        return ret;
 }
@@ -691,7 +889,7 @@ int invert_tuplepr(struct ip_conntrack_tuple *inverse,
                   const struct ip_conntrack_tuple *orig)
 {
        return ip_ct_invert_tuple(inverse, orig, 
-                                  ip_ct_find_proto(orig->dst.protonum));
+                                  __ip_conntrack_proto_find(orig->dst.protonum));
 }
 /* Would two expected things clash? */
@@ -729,14 +927,14 @@ void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
                if (expect_matches(i, exp) && del_timer(&i->timeout)) {
                        unlink_expect(i);
                        write_unlock_bh(&ip_conntrack_lock);
-                        destroy_expect(i);
+                        ip_conntrack_expect_put(i);
                        return;
                }
        }
        write_unlock_bh(&ip_conntrack_lock);
 }
-struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
+struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
 {
        struct ip_conntrack_expect *new;
@@ -745,18 +943,23 @@ struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
                DEBUGP("expect_related: OOM allocating expect\n");
                return NULL;
        }
-        new->master = NULL;
+        new->master = me;
+        atomic_inc(&new->master->ct_general.use);
+        atomic_set(&new->use, 1);
        return new;
 }
-void ip_conntrack_expect_free(struct ip_conntrack_expect *expect)
+void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
 {
-        kmem_cache_free(ip_conntrack_expect_cachep, expect);
+        if (atomic_dec_and_test(&exp->use)) {
+                ip_conntrack_put(exp->master);
+                kmem_cache_free(ip_conntrack_expect_cachep, exp);
+        }
 }
 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
 {
-        atomic_inc(&exp->master->ct_general.use);
+        atomic_inc(&exp->use);
        exp->master->expecting++;
        list_add(&exp->list, &ip_conntrack_expect_list);
@@ -766,6 +969,8 @@ static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
        exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
        add_timer(&exp->timeout);
+        exp->id = ++ip_conntrack_expect_next_id;
+        atomic_inc(&exp->use);
        CONNTRACK_STAT_INC(expect_create);
 }
@@ -778,7 +983,7 @@ static void evict_oldest_expect(struct ip_conntrack *master)
                if (i->master == master) {
                        if (del_timer(&i->timeout)) {
                                unlink_expect(i);
-                                destroy_expect(i);
+                                ip_conntrack_expect_put(i);
                        }
                        break;
                }
@@ -810,8 +1015,6 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
                        /* Refresh timer: if it's dying, ignore.. */
                        if (refresh_timer(i)) {
                                ret = 0;
-                                /* We don't need the one they've given us. */
-                                ip_conntrack_expect_free(expect);
                                goto out;
                        }
                } else if (expect_clash(i, expect)) {
@@ -826,6 +1029,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
                evict_oldest_expect(expect->master);
        ip_conntrack_expect_insert(expect);
+        ip_conntrack_expect_event(IPEXP_NEW, expect);
        ret = 0;
 out:
        write_unlock_bh(&ip_conntrack_lock);
@@ -846,7 +1050,7 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
        conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
        if (!conntrack->master && conntrack->expecting == 0)
-                conntrack->helper = ip_ct_find_helper(newreply);
+                conntrack->helper = __ip_conntrack_helper_find(newreply);
        write_unlock_bh(&ip_conntrack_lock);
 }
@@ -860,11 +1064,26 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
        return 0;
 }
+struct ip_conntrack_helper *
+__ip_conntrack_helper_find_byname(const char *name)
+{
+        struct ip_conntrack_helper *h;
+        list_for_each_entry(h, &helpers, list) {
+                if (!strcmp(h->name, name))
+                        return h;
+        }
+        return NULL;
+}
 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
                         const struct ip_conntrack_helper *me)
 {
-        if (tuplehash_to_ctrack(i)->helper == me)
+        if (tuplehash_to_ctrack(i)->helper == me) {
+                ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
                tuplehash_to_ctrack(i)->helper = NULL;
+        }
        return 0;
 }
@@ -881,7 +1100,7 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
        list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
                if (exp->master->helper == me && del_timer(&exp->timeout)) {
                        unlink_expect(exp);
-                        destroy_expect(exp);
+                        ip_conntrack_expect_put(exp);
                }
        }
        /* Get rid of expecteds, set helpers to NULL. */
@@ -926,12 +1145,46 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
                if (del_timer(&ct->timeout)) {
                        ct->timeout.expires = jiffies + extra_jiffies;
                        add_timer(&ct->timeout);
+                        ip_conntrack_event_cache(IPCT_REFRESH, skb);
                }
                ct_add_counters(ct, ctinfo, skb);
                write_unlock_bh(&ip_conntrack_lock);
        }
 }
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
+ * in ip_conntrack_core, since we don't want the protocols to autoload
+ * or depend on ctnetlink */
+int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
+                               const struct ip_conntrack_tuple *tuple)
+{
+        NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
+                &tuple->src.u.tcp.port);
+        NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
+                &tuple->dst.u.tcp.port);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
+                               struct ip_conntrack_tuple *t)
+{
+        if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
+                return -EINVAL;
+        t->src.u.tcp.port =
+                *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
+        t->dst.u.tcp.port =
+                *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
+        return 0;
+}
+#endif
 /* Returns new sk_buff, or NULL */
 struct sk_buff *
 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
@@ -942,10 +1195,8 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
        skb = ip_defrag(skb, user);
        local_bh_enable();
-        if (skb) {
+        if (skb)
                ip_send_check(skb->nh.iph);
-                skb->nfcache |= NFC_ALTERED;
-        }
        return skb;
 }
@@ -1095,23 +1346,31 @@ static void free_conntrack_hash(void)
                                     * ip_conntrack_htable_size));
 }
-/* Mishearing the voices in his head, our hero wonders how he's
+void ip_conntrack_flush()
-   supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
 {
-        ip_ct_attach = NULL;
        /* This makes sure all current packets have passed through
           netfilter framework.  Roll on, two-stage module
           delete... */
        synchronize_net();
- 
+        ip_ct_event_cache_flush();
 i_see_dead_people:
        ip_ct_iterate_cleanup(kill_all, NULL);
        if (atomic_read(&ip_conntrack_count) != 0) {
                schedule();
                goto i_see_dead_people;
        }
+        /* wait until all references to ip_conntrack_untracked are dropped */
+        while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
+                schedule();
+}
+/* Mishearing the voices in his head, our hero wonders how he's
+   supposed to kill the mall. */
+void ip_conntrack_cleanup(void)
+{
+        ip_ct_attach = NULL;
+        ip_conntrack_flush();
        kmem_cache_destroy(ip_conntrack_cachep);
        kmem_cache_destroy(ip_conntrack_expect_cachep);
        free_conntrack_hash();
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index fea6dd2a00b6..3a2627db1729 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -25,8 +25,7 @@ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
 MODULE_DESCRIPTION("ftp connection tracking helper");
 /* This is slow, but it's simple. --RR */
-static char ftp_buffer[65536];
+static char *ftp_buffer;
 static DEFINE_SPINLOCK(ip_ftp_lock);
 #define MAX_PORTS 8
@@ -262,7 +261,8 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
 }
 /* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
+static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
+                          struct sk_buff *skb)
 {
        unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
@@ -276,10 +276,13 @@ static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
                        oldest = i;
        }
-        if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER)
+        if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
                info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
-        else if (oldest != NUM_SEQ_TO_REMEMBER)
+                ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+        } else if (oldest != NUM_SEQ_TO_REMEMBER) {
                info->seq_aft_nl[dir][oldest] = nl_seq;
+                ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+        }
 }
 static int help(struct sk_buff **pskb,
@@ -376,7 +379,7 @@ static int help(struct sk_buff **pskb,
               fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff);
                         
        /* Allocate expectation which will be inserted */
-        exp = ip_conntrack_expect_alloc();
+        exp = ip_conntrack_expect_alloc(ct);
        if (exp == NULL) {
                ret = NF_DROP;
                goto out;
@@ -403,8 +406,7 @@ static int help(struct sk_buff **pskb,
                   networks, or the packet filter itself). */
                if (!loose) {
                        ret = NF_ACCEPT;
-                        ip_conntrack_expect_free(exp);
+                        goto out_put_expect;
-                        goto out_update_nl;
                }
                exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
                                         | (array[2] << 8) | array[3]);
@@ -419,7 +421,6 @@ static int help(struct sk_buff **pskb,
                  { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
        exp->expectfn = NULL;
-        exp->master = ct;
        /* Now, NAT might want to mangle the packet, and register the
         * (possibly changed) expectation itself. */
@@ -428,18 +429,20 @@ static int help(struct sk_buff **pskb,
                                      matchoff, matchlen, exp, &seq);
        else {
                /* Can't expect this?  Best to drop packet now. */
-                if (ip_conntrack_expect_related(exp) != 0) {
+                if (ip_conntrack_expect_related(exp) != 0)
-                        ip_conntrack_expect_free(exp);
                        ret = NF_DROP;
-                } else
+                else
                        ret = NF_ACCEPT;
        }
+out_put_expect:
+        ip_conntrack_expect_put(exp);
 out_update_nl:
        /* Now if this ends in \n, update ftp info.  Seq may have been
         * adjusted by NAT code. */
        if (ends_in_nl)
-                update_nl_seq(seq, ct_ftp_info,dir);
+                update_nl_seq(seq, ct_ftp_info,dir, *pskb);
 out:
        spin_unlock_bh(&ip_ftp_lock);
        return ret;
@@ -457,6 +460,8 @@ static void fini(void)
                                ports[i]);
                ip_conntrack_helper_unregister(&ftp[i]);
        }
+        kfree(ftp_buffer);
 }
 static int __init init(void)
@@ -464,6 +469,10 @@ static int __init init(void)
        int i, ret;
        char *tmpname;
+        ftp_buffer = kmalloc(65536, GFP_KERNEL);
+        if (!ftp_buffer)
+                return -ENOMEM;
        if (ports_c == 0)
                ports[ports_c++] = FTP_PORT;
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index cd98772cc332..25438eec21a1 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -39,7 +39,7 @@ static int ports_c;
 static int max_dcc_channels = 8;
 static unsigned int dcc_timeout = 300;
 /* This is slow, but it's simple. --RR */
-static char irc_buffer[65536];
+static char *irc_buffer;
 static DEFINE_SPINLOCK(irc_buffer_lock);
 unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
@@ -197,7 +197,7 @@ static int help(struct sk_buff **pskb,
                                continue;
                        }
-                        exp = ip_conntrack_expect_alloc();
+                        exp = ip_conntrack_expect_alloc(ct);
                        if (exp == NULL) {
                                ret = NF_DROP;
                                goto out;
@@ -221,16 +221,14 @@ static int help(struct sk_buff **pskb,
                                { { 0, { 0 } },
                                  { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
                        exp->expectfn = NULL;
-                        exp->master = ct;
                        if (ip_nat_irc_hook)
                                ret = ip_nat_irc_hook(pskb, ctinfo, 
                                                      addr_beg_p - ib_ptr,
                                                      addr_end_p - addr_beg_p,
                                                      exp);
-                        else if (ip_conntrack_expect_related(exp) != 0) {
+                        else if (ip_conntrack_expect_related(exp) != 0)
-                                ip_conntrack_expect_free(exp);
                                ret = NF_DROP;
-                        }
+                        ip_conntrack_expect_put(exp);
                        goto out;
                } /* for .. NUM_DCCPROTO */
        } /* while data < ... */
@@ -259,6 +257,10 @@ static int __init init(void)
                printk("ip_conntrack_irc: dcc_timeout must be a positive integer\n");
                return -EBUSY;
        }
+        irc_buffer = kmalloc(65536, GFP_KERNEL);
+        if (!irc_buffer)
+                return -ENOMEM;
        
        /* If no port given, default to standard irc port */
        if (ports_c == 0)
@@ -306,6 +308,7 @@ static void fini(void)
                       ports[i]);
                ip_conntrack_helper_unregister(&irc_helpers[i]);
        }
+        kfree(irc_buffer);
 }
 module_init(init);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
new file mode 100644
index 000000000000..a4e9278db4ed
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -0,0 +1,1579 @@
+/* Connection tracking via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>
+ * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2003 by Patrick Mchardy <kaber@trash.net>
+ * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * I've reworked this stuff to use attributes instead of conntrack 
+ * structures. 5.44 am. I need more tea. --pablo 05/07/11.
+ *
+ * Initial connection tracking via netlink development funded and 
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+MODULE_LICENSE("GPL");
+static char __initdata version[] = "0.90";
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+static inline int
+ctnetlink_dump_tuples_proto(struct sk_buff *skb, 
+                            const struct ip_conntrack_tuple *tuple)
+{
+        struct ip_conntrack_protocol *proto;
+        NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
+        proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
+        if (proto && proto->tuple_to_nfattr)
+                return proto->tuple_to_nfattr(skb, tuple);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+static inline int
+ctnetlink_dump_tuples(struct sk_buff *skb, 
+                      const struct ip_conntrack_tuple *tuple)
+{
+        struct nfattr *nest_parms;
+        
+        nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
+        NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
+        NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip);
+        NFA_NEST_END(skb, nest_parms);
+        nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
+        ctnetlink_dump_tuples_proto(skb, tuple);
+        NFA_NEST_END(skb, nest_parms);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+static inline int
+ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+        u_int32_t status = htonl((u_int32_t) ct->status);
+        NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+static inline int
+ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+        long timeout_l = ct->timeout.expires - jiffies;
+        u_int32_t timeout;
+        if (timeout_l < 0)
+                timeout = 0;
+        else
+                timeout = htonl(timeout_l / HZ);
+        
+        NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+static inline int
+ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+        struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+        struct nfattr *nest_proto;
+        int ret;
+        
+        if (!proto || !proto->to_nfattr)
+                return 0;
+        
+        nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
+        ret = proto->to_nfattr(skb, nest_proto, ct);
+        ip_conntrack_proto_put(proto);
+        NFA_NEST_END(skb, nest_proto);
+        return ret;
+nfattr_failure:
+        return -1;
+}
+static inline int
+ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+        struct nfattr *nest_helper;
+        if (!ct->helper)
+                return 0;
+                
+        nest_helper = NFA_NEST(skb, CTA_HELP);
+        NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name);
+        if (ct->helper->to_nfattr)
+                ct->helper->to_nfattr(skb, ct);
+        NFA_NEST_END(skb, nest_helper);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+#ifdef CONFIG_IP_NF_CT_ACCT
+static inline int
+ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
+                        enum ip_conntrack_dir dir)
+{
+        enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+        struct nfattr *nest_count = NFA_NEST(skb, type);
+        u_int64_t tmp;
+        tmp = cpu_to_be64(ct->counters[dir].packets);
+        NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp);
+        tmp = cpu_to_be64(ct->counters[dir].bytes);
+        NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp);
+        NFA_NEST_END(skb, nest_count);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+#else
+#define ctnetlink_dump_counters(a, b, c) (0)
+#endif
+#ifdef CONFIG_IP_NF_CONNTRACK_MARK
+static inline int
+ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+        u_int32_t mark = htonl(ct->mark);
+        NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+#else
+#define ctnetlink_dump_mark(a, b) (0)
+#endif
+static inline int
+ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+        u_int32_t id = htonl(ct->id);
+        NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+static inline int
+ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
+{
+        unsigned int use = htonl(atomic_read(&ct->ct_general.use));
+        
+        NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
+static int
+ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+                    int event, int nowait, 
+                    const struct ip_conntrack *ct)
+{
+        struct nlmsghdr *nlh;
+        struct nfgenmsg *nfmsg;
+        struct nfattr *nest_parms;
+        unsigned char *b;
+        b = skb->tail;
+        event |= NFNL_SUBSYS_CTNETLINK << 8;
+        nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+        nfmsg  = NLMSG_DATA(nlh);
+        nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+        nfmsg->nfgen_family = AF_INET;
+        nfmsg->version      = NFNETLINK_V0;
+        nfmsg->res_id       = 0;
+        nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+        if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+                goto nfattr_failure;
+        NFA_NEST_END(skb, nest_parms);
+        
+        nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+        if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+                goto nfattr_failure;
+        NFA_NEST_END(skb, nest_parms);
+        if (ctnetlink_dump_status(skb, ct) < 0 ||
+            ctnetlink_dump_timeout(skb, ct) < 0 ||
+            ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+            ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+            ctnetlink_dump_protoinfo(skb, ct) < 0 ||
+            ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+            ctnetlink_dump_mark(skb, ct) < 0 ||
+            ctnetlink_dump_id(skb, ct) < 0 ||
+            ctnetlink_dump_use(skb, ct) < 0)
+                goto nfattr_failure;
+        nlh->nlmsg_len = skb->tail - b;
+        return skb->len;
+nlmsg_failure:
+nfattr_failure:
+        skb_trim(skb, b - skb->data);
+        return -1;
+}
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+static int ctnetlink_conntrack_event(struct notifier_block *this,
+                                     unsigned long events, void *ptr)
+{
+        struct nlmsghdr *nlh;
+        struct nfgenmsg *nfmsg;
+        struct nfattr *nest_parms;
+        struct ip_conntrack *ct = (struct ip_conntrack *)ptr;
+        struct sk_buff *skb;
+        unsigned int type;
+        unsigned char *b;
+        unsigned int flags = 0, group;
+        /* ignore our fake conntrack entry */
+        if (ct == &ip_conntrack_untracked)
+                return NOTIFY_DONE;
+        if (events & IPCT_DESTROY) {
+                type = IPCTNL_MSG_CT_DELETE;
+                group = NFNLGRP_CONNTRACK_DESTROY;
+                goto alloc_skb;
+        }
+        if (events & (IPCT_NEW | IPCT_RELATED)) {
+                type = IPCTNL_MSG_CT_NEW;
+                flags = NLM_F_CREATE|NLM_F_EXCL;
+                /* dump everything */
+                events = ~0UL;
+                group = NFNLGRP_CONNTRACK_NEW;
+                goto alloc_skb;
+        }
+        if (events & (IPCT_STATUS |
+                      IPCT_PROTOINFO |
+                      IPCT_HELPER |
+                      IPCT_HELPINFO |
+                      IPCT_NATINFO)) {
+                type = IPCTNL_MSG_CT_NEW;
+                group = NFNLGRP_CONNTRACK_UPDATE;
+                goto alloc_skb;
+        } 
+        
+        return NOTIFY_DONE;
+alloc_skb:
+  /* FIXME: Check if there are any listeners before, don't hurt performance */
+        
+        skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+        if (!skb)
+                return NOTIFY_DONE;
+        b = skb->tail;
+        type |= NFNL_SUBSYS_CTNETLINK << 8;
+        nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+        nfmsg = NLMSG_DATA(nlh);
+        nlh->nlmsg_flags    = flags;
+        nfmsg->nfgen_family = AF_INET;
+        nfmsg->version  = NFNETLINK_V0;
+        nfmsg->res_id   = 0;
+        nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+        if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+                goto nfattr_failure;
+        NFA_NEST_END(skb, nest_parms);
+        
+        nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+        if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+                goto nfattr_failure;
+        NFA_NEST_END(skb, nest_parms);
+        
+        /* NAT stuff is now a status flag */
+        if ((events & IPCT_STATUS || events & IPCT_NATINFO)
+            && ctnetlink_dump_status(skb, ct) < 0)
+                goto nfattr_failure;
+        if (events & IPCT_REFRESH
+            && ctnetlink_dump_timeout(skb, ct) < 0)
+                goto nfattr_failure;
+        if (events & IPCT_PROTOINFO
+            && ctnetlink_dump_protoinfo(skb, ct) < 0)
+                goto nfattr_failure;
+        if (events & IPCT_HELPINFO
+            && ctnetlink_dump_helpinfo(skb, ct) < 0)
+                goto nfattr_failure;
+        if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+            ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+                goto nfattr_failure;
+        nlh->nlmsg_len = skb->tail - b;
+        nfnetlink_send(skb, 0, group, 0);
+        return NOTIFY_DONE;
+nlmsg_failure:
+nfattr_failure:
+        kfree_skb(skb);
+        return NOTIFY_DONE;
+}
+#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
+static int ctnetlink_done(struct netlink_callback *cb)
+{
+        DEBUGP("entered %s\n", __FUNCTION__);
+        return 0;
+}
+static int
+ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+        struct ip_conntrack *ct = NULL;
+        struct ip_conntrack_tuple_hash *h;
+        struct list_head *i;
+        u_int32_t *id = (u_int32_t *) &cb->args[1];
+        DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, 
+                        cb->args[0], *id);
+        read_lock_bh(&ip_conntrack_lock);
+        for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
+                list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+                        h = (struct ip_conntrack_tuple_hash *) i;
+                        if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+                                continue;
+                        ct = tuplehash_to_ctrack(h);
+                        if (ct->id <= *id)
+                                continue;
+                        if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+                                                cb->nlh->nlmsg_seq,
+                                                IPCTNL_MSG_CT_NEW,
+                                                1, ct) < 0)
+                                goto out;
+                        *id = ct->id;
+                }
+        }
+out:    
+        read_unlock_bh(&ip_conntrack_lock);
+        DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+        return skb->len;
+}
+#ifdef CONFIG_IP_NF_CT_ACCT
+static int
+ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
+{
+        struct ip_conntrack *ct = NULL;
+        struct ip_conntrack_tuple_hash *h;
+        struct list_head *i;
+        u_int32_t *id = (u_int32_t *) &cb->args[1];
+        DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, 
+                        cb->args[0], *id);
+        write_lock_bh(&ip_conntrack_lock);
+        for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
+                list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
+                        h = (struct ip_conntrack_tuple_hash *) i;
+                        if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+                                continue;
+                        ct = tuplehash_to_ctrack(h);
+                        if (ct->id <= *id)
+                                continue;
+                        if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+                                                cb->nlh->nlmsg_seq,
+                                                IPCTNL_MSG_CT_NEW,
+                                                1, ct) < 0)
+                                goto out;
+                        *id = ct->id;
+                        memset(&ct->counters, 0, sizeof(ct->counters));
+                }
+        }
+out:    
+        write_unlock_bh(&ip_conntrack_lock);
+        DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+        return skb->len;
+}
+#endif
+static const int cta_min_ip[CTA_IP_MAX] = {
+        [CTA_IP_V4_SRC-1]       = sizeof(u_int32_t),
+        [CTA_IP_V4_DST-1]       = sizeof(u_int32_t),
+};
+static inline int
+ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
+{
+        struct nfattr *tb[CTA_IP_MAX];
+        DEBUGP("entered %s\n", __FUNCTION__);
+        
+        if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0)
+                goto nfattr_failure;
+        if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+                return -EINVAL;
+        if (!tb[CTA_IP_V4_SRC-1])
+                return -EINVAL;
+        tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+        if (!tb[CTA_IP_V4_DST-1])
+                return -EINVAL;
+        tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+        DEBUGP("leaving\n");
+        return 0;
+nfattr_failure:
+        return -1;
+}
+static const int cta_min_proto[CTA_PROTO_MAX] = {
+        [CTA_PROTO_NUM-1]       = sizeof(u_int16_t),
+        [CTA_PROTO_SRC_PORT-1]  = sizeof(u_int16_t),
+        [CTA_PROTO_DST_PORT-1]  = sizeof(u_int16_t),
+        [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
+        [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
+        [CTA_PROTO_ICMP_ID-1]   = sizeof(u_int16_t),
+};
+static inline int
+ctnetlink_parse_tuple_proto(struct nfattr *attr, 
+                            struct ip_conntrack_tuple *tuple)
+{
+        struct nfattr *tb[CTA_PROTO_MAX];
+        struct ip_conntrack_protocol *proto;
+        int ret = 0;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0)
+                goto nfattr_failure;
+        if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+                return -EINVAL;
+        if (!tb[CTA_PROTO_NUM-1])
+                return -EINVAL;
+        tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+        proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
+        if (likely(proto && proto->nfattr_to_tuple)) {
+                ret = proto->nfattr_to_tuple(tb, tuple);
+                ip_conntrack_proto_put(proto);
+        }
+        
+        return ret;
+nfattr_failure:
+        return -1;
+}
+static inline int
+ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
+                      enum ctattr_tuple type)
+{
+        struct nfattr *tb[CTA_TUPLE_MAX];
+        int err;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        memset(tuple, 0, sizeof(*tuple));
+        if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0)
+                goto nfattr_failure;
+        if (!tb[CTA_TUPLE_IP-1])
+                return -EINVAL;
+        err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
+        if (err < 0)
+                return err;
+        if (!tb[CTA_TUPLE_PROTO-1])
+                return -EINVAL;
+        err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
+        if (err < 0)
+                return err;
+        /* orig and expect tuples get DIR_ORIGINAL */
+        if (type == CTA_TUPLE_REPLY)
+                tuple->dst.dir = IP_CT_DIR_REPLY;
+        else
+                tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+        DUMP_TUPLE(tuple);
+        DEBUGP("leaving\n");
+        return 0;
+nfattr_failure:
+        return -1;
+}
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+static const int cta_min_protonat[CTA_PROTONAT_MAX] = {
+        [CTA_PROTONAT_PORT_MIN-1]       = sizeof(u_int16_t),
+        [CTA_PROTONAT_PORT_MAX-1]       = sizeof(u_int16_t),
+};
+static int ctnetlink_parse_nat_proto(struct nfattr *attr,
+                                     const struct ip_conntrack *ct,
+                                     struct ip_nat_range *range)
+{
+        struct nfattr *tb[CTA_PROTONAT_MAX];
+        struct ip_nat_protocol *npt;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0)
+                goto nfattr_failure;
+        if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
+                goto nfattr_failure;
+        npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+        if (!npt)
+                return 0;
+        if (!npt->nfattr_to_range) {
+                ip_nat_proto_put(npt);
+                return 0;
+        }
+        /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
+        if (npt->nfattr_to_range(tb, range) > 0)
+                range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+        ip_nat_proto_put(npt);
+        DEBUGP("leaving\n");
+        return 0;
+nfattr_failure:
+        return -1;
+}
+static inline int
+ctnetlink_parse_nat(struct nfattr *cda[],
+                    const struct ip_conntrack *ct, struct ip_nat_range *range)
+{
+        struct nfattr *tb[CTA_NAT_MAX];
+        int err;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        memset(range, 0, sizeof(*range));
+        
+        if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0)
+                goto nfattr_failure;
+        if (tb[CTA_NAT_MINIP-1])
+                range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
+        if (!tb[CTA_NAT_MAXIP-1])
+                range->max_ip = range->min_ip;
+        else
+                range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
+        if (range->min_ip)
+                range->flags |= IP_NAT_RANGE_MAP_IPS;
+        if (!tb[CTA_NAT_PROTO-1])
+                return 0;
+        err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
+        if (err < 0)
+                return err;
+        DEBUGP("leaving\n");
+        return 0;
+nfattr_failure:
+        return -1;
+}
+#endif
+static inline int
+ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
+{
+        struct nfattr *tb[CTA_HELP_MAX];
+        DEBUGP("entered %s\n", __FUNCTION__);
+        if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0)
+                goto nfattr_failure;
+        if (!tb[CTA_HELP_NAME-1])
+                return -EINVAL;
+        *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+static int
+ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+                        struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+        struct ip_conntrack_tuple_hash *h;
+        struct ip_conntrack_tuple tuple;
+        struct ip_conntrack *ct;
+        int err = 0;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        if (cda[CTA_TUPLE_ORIG-1])
+                err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
+        else if (cda[CTA_TUPLE_REPLY-1])
+                err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
+        else {
+                /* Flush the whole table */
+                ip_conntrack_flush();
+                return 0;
+        }
+        if (err < 0)
+                return err;
+        h = ip_conntrack_find_get(&tuple, NULL);
+        if (!h) {
+                DEBUGP("tuple not found in conntrack hash\n");
+                return -ENOENT;
+        }
+        ct = tuplehash_to_ctrack(h);
+        
+        if (cda[CTA_ID-1]) {
+                u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
+                if (ct->id != id) {
+                        ip_conntrack_put(ct);
+                        return -ENOENT;
+                }
+        }       
+        if (del_timer(&ct->timeout)) {
+                ip_conntrack_put(ct);
+                ct->timeout.function((unsigned long)ct);
+                return 0;
+        }
+        ip_conntrack_put(ct);
+        DEBUGP("leaving\n");
+        return 0;
+}
+static int
+ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+                        struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+        struct ip_conntrack_tuple_hash *h;
+        struct ip_conntrack_tuple tuple;
+        struct ip_conntrack *ct;
+        struct sk_buff *skb2 = NULL;
+        int err = 0;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        if (nlh->nlmsg_flags & NLM_F_DUMP) {
+                struct nfgenmsg *msg = NLMSG_DATA(nlh);
+                u32 rlen;
+                if (msg->nfgen_family != AF_INET)
+                        return -EAFNOSUPPORT;
+                if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
+                                        IPCTNL_MSG_CT_GET_CTRZERO) {
+#ifdef CONFIG_IP_NF_CT_ACCT
+                        if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+                                                ctnetlink_dump_table_w,
+                                                ctnetlink_done)) != 0)
+                                return -EINVAL;
+#else
+                        return -ENOTSUPP;
+#endif
+                } else {
+                        if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+                                                        ctnetlink_dump_table,
+                                                        ctnetlink_done)) != 0)
+                        return -EINVAL;
+                }
+                rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+                if (rlen > skb->len)
+                        rlen = skb->len;
+                skb_pull(skb, rlen);
+                return 0;
+        }
+        if (cda[CTA_TUPLE_ORIG-1])
+                err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
+        else if (cda[CTA_TUPLE_REPLY-1])
+                err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
+        else
+                return -EINVAL;
+        if (err < 0)
+                return err;
+        h = ip_conntrack_find_get(&tuple, NULL);
+        if (!h) {
+                DEBUGP("tuple not found in conntrack hash");
+                return -ENOENT;
+        }
+        DEBUGP("tuple found\n");
+        ct = tuplehash_to_ctrack(h);
+        err = -ENOMEM;
+        skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+        if (!skb2) {
+                ip_conntrack_put(ct);
+                return -ENOMEM;
+        }
+        NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+        err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 
+                                  IPCTNL_MSG_CT_NEW, 1, ct);
+        ip_conntrack_put(ct);
+        if (err <= 0)
+                goto out;
+        err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+        if (err < 0)
+                goto out;
+        DEBUGP("leaving\n");
+        return 0;
+out:
+        if (skb2)
+                kfree_skb(skb2);
+        return -1;
+}
+static inline int
+ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+        unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]);
+        d = ct->status ^ status;
+        if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
+                /* unchangeable */
+                return -EINVAL;
+        
+        if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+                /* SEEN_REPLY bit can only be set */
+                return -EINVAL;
+        
+        if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+                /* ASSURED bit can only be set */
+                return -EINVAL;
+        if (cda[CTA_NAT-1]) {
+#ifndef CONFIG_IP_NF_NAT_NEEDED
+                return -EINVAL;
+#else
+                unsigned int hooknum;
+                struct ip_nat_range range;
+                if (ctnetlink_parse_nat(cda, ct, &range) < 0)
+                        return -EINVAL;
+                DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", 
+                       NIPQUAD(range.min_ip), NIPQUAD(range.max_ip),
+                       htons(range.min.all), htons(range.max.all));
+                
+                /* This is tricky but it works. ip_nat_setup_info needs the
+                 * hook number as parameter, so let's do the correct 
+                 * conversion and run away */
+                if (status & IPS_SRC_NAT_DONE)
+                        hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */
+                else if (status & IPS_DST_NAT_DONE)
+                        hooknum = NF_IP_PRE_ROUTING;  /* IP_NAT_MANIP_DST */
+                else 
+                        return -EINVAL; /* Missing NAT flags */
+                DEBUGP("NAT status: %lu\n", 
+                       status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+                
+                if (ip_nat_initialized(ct, hooknum))
+                        return -EEXIST;
+                ip_nat_setup_info(ct, &range, hooknum);
+                DEBUGP("NAT status after setup_info: %lu\n",
+                       ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+#endif
+        }
+        /* Be careful here, modifying NAT bits can screw up things,
+         * so don't let users modify them directly if they don't pass
+         * ip_nat_range. */
+        ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+        return 0;
+}
+static inline int
+ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+        struct ip_conntrack_helper *helper;
+        char *helpname;
+        int err;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        /* don't change helper of sibling connections */
+        if (ct->master)
+                return -EINVAL;
+        err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
+        if (err < 0)
+                return err;
+        helper = __ip_conntrack_helper_find_byname(helpname);
+        if (!helper) {
+                if (!strcmp(helpname, ""))
+                        helper = NULL;
+                else
+                        return -EINVAL;
+        }
+        if (ct->helper) {
+                if (!helper) {
+                        /* we had a helper before ... */
+                        ip_ct_remove_expectations(ct);
+                        ct->helper = NULL;
+                } else {
+                        /* need to zero data of old helper */
+                        memset(&ct->help, 0, sizeof(ct->help));
+                }
+        }
+        
+        ct->helper = helper;
+        return 0;
+}
+static inline int
+ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+        u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+        
+        if (!del_timer(&ct->timeout))
+                return -ETIME;
+        ct->timeout.expires = jiffies + timeout * HZ;
+        add_timer(&ct->timeout);
+        return 0;
+}
+static int
+ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+        int err;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        if (cda[CTA_HELP-1]) {
+                err = ctnetlink_change_helper(ct, cda);
+                if (err < 0)
+                        return err;
+        }
+        if (cda[CTA_TIMEOUT-1]) {
+                err = ctnetlink_change_timeout(ct, cda);
+                if (err < 0)
+                        return err;
+        }
+        if (cda[CTA_STATUS-1]) {
+                err = ctnetlink_change_status(ct, cda);
+                if (err < 0)
+                        return err;
+        }
+        DEBUGP("all done\n");
+        return 0;
+}
+static int
+ctnetlink_create_conntrack(struct nfattr *cda[], 
+                           struct ip_conntrack_tuple *otuple,
+                           struct ip_conntrack_tuple *rtuple)
+{
+        struct ip_conntrack *ct;
+        int err = -EINVAL;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        ct = ip_conntrack_alloc(otuple, rtuple);
+        if (ct == NULL || IS_ERR(ct))
+                return -ENOMEM; 
+        if (!cda[CTA_TIMEOUT-1])
+                goto err;
+        ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+        ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+        ct->status |= IPS_CONFIRMED;
+        err = ctnetlink_change_status(ct, cda);
+        if (err < 0)
+                goto err;
+        ct->helper = ip_conntrack_helper_find_get(rtuple);
+        add_timer(&ct->timeout);
+        ip_conntrack_hash_insert(ct);
+        if (ct->helper)
+                ip_conntrack_helper_put(ct->helper);
+        DEBUGP("conntrack with id %u inserted\n", ct->id);
+        return 0;
+err:    
+        ip_conntrack_free(ct);
+        return err;
+}
+static int 
+ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+                        struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+        struct ip_conntrack_tuple otuple, rtuple;
+        struct ip_conntrack_tuple_hash *h = NULL;
+        int err = 0;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        if (cda[CTA_TUPLE_ORIG-1]) {
+                err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
+                if (err < 0)
+                        return err;
+        }
+        if (cda[CTA_TUPLE_REPLY-1]) {
+                err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY);
+                if (err < 0)
+                        return err;
+        }
+        write_lock_bh(&ip_conntrack_lock);
+        if (cda[CTA_TUPLE_ORIG-1])
+                h = __ip_conntrack_find(&otuple, NULL);
+        else if (cda[CTA_TUPLE_REPLY-1])
+                h = __ip_conntrack_find(&rtuple, NULL);
+        if (h == NULL) {
+                write_unlock_bh(&ip_conntrack_lock);
+                DEBUGP("no such conntrack, create new\n");
+                err = -ENOENT;
+                if (nlh->nlmsg_flags & NLM_F_CREATE)
+                        err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
+                return err;
+        }
+        /* implicit 'else' */
+        /* we only allow nat config for new conntracks */
+        if (cda[CTA_NAT-1]) {
+                err = -EINVAL;
+                goto out_unlock;
+        }
+        /* We manipulate the conntrack inside the global conntrack table lock,
+         * so there's no need to increase the refcount */
+        DEBUGP("conntrack found\n");
+        err = -EEXIST;
+        if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+                err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda);
+out_unlock:
+        write_unlock_bh(&ip_conntrack_lock);
+        return err;
+}
+/*********************************************************************** 
+ * EXPECT 
+ ***********************************************************************/ 
+static inline int
+ctnetlink_exp_dump_tuple(struct sk_buff *skb,
+                         const struct ip_conntrack_tuple *tuple,
+                         enum ctattr_expect type)
+{
+        struct nfattr *nest_parms = NFA_NEST(skb, type);
+        
+        if (ctnetlink_dump_tuples(skb, tuple) < 0)
+                goto nfattr_failure;
+        NFA_NEST_END(skb, nest_parms);
+        return 0;
+nfattr_failure:
+        return -1;
+}                       
+static inline int
+ctnetlink_exp_dump_expect(struct sk_buff *skb,
+                          const struct ip_conntrack_expect *exp)
+{
+        struct ip_conntrack *master = exp->master;
+        u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
+        u_int32_t id = htonl(exp->id);
+        if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
+                goto nfattr_failure;
+        if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0)
+                goto nfattr_failure;
+        if (ctnetlink_exp_dump_tuple(skb,
+                                 &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                 CTA_EXPECT_MASTER) < 0)
+                goto nfattr_failure;
+        
+        NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
+        NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
+        return 0;
+        
+nfattr_failure:
+        return -1;
+}
+static int
+ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+                    int event, 
+                    int nowait, 
+                    const struct ip_conntrack_expect *exp)
+{
+        struct nlmsghdr *nlh;
+        struct nfgenmsg *nfmsg;
+        unsigned char *b;
+        b = skb->tail;
+        event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+        nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+        nfmsg  = NLMSG_DATA(nlh);
+        nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+        nfmsg->nfgen_family = AF_INET;
+        nfmsg->version      = NFNETLINK_V0;
+        nfmsg->res_id       = 0;
+        if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+                goto nfattr_failure;
+        nlh->nlmsg_len = skb->tail - b;
+        return skb->len;
+nlmsg_failure:
+nfattr_failure:
+        skb_trim(skb, b - skb->data);
+        return -1;
+}
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+static int ctnetlink_expect_event(struct notifier_block *this,
+                                  unsigned long events, void *ptr)
+{
+        struct nlmsghdr *nlh;
+        struct nfgenmsg *nfmsg;
+        struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr;
+        struct sk_buff *skb;
+        unsigned int type;
+        unsigned char *b;
+        int flags = 0;
+        u16 proto;
+        if (events & IPEXP_NEW) {
+                type = IPCTNL_MSG_EXP_NEW;
+                flags = NLM_F_CREATE|NLM_F_EXCL;
+        } else
+                return NOTIFY_DONE;
+        skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+        if (!skb)
+                return NOTIFY_DONE;
+        b = skb->tail;
+        type |= NFNL_SUBSYS_CTNETLINK << 8;
+        nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+        nfmsg = NLMSG_DATA(nlh);
+        nlh->nlmsg_flags    = flags;
+        nfmsg->nfgen_family = AF_INET;
+        nfmsg->version      = NFNETLINK_V0;
+        nfmsg->res_id       = 0;
+        if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+                goto nfattr_failure;
+        nlh->nlmsg_len = skb->tail - b;
+        proto = exp->tuple.dst.protonum;
+        nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
+        return NOTIFY_DONE;
+nlmsg_failure:
+nfattr_failure:
+        kfree_skb(skb);
+        return NOTIFY_DONE;
+}
+#endif
+static int
+ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+        struct ip_conntrack_expect *exp = NULL;
+        struct list_head *i;
+        u_int32_t *id = (u_int32_t *) &cb->args[0];
+        DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
+        read_lock_bh(&ip_conntrack_lock);
+        list_for_each_prev(i, &ip_conntrack_expect_list) {
+                exp = (struct ip_conntrack_expect *) i;
+                if (exp->id <= *id)
+                        continue;
+                if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+                                            cb->nlh->nlmsg_seq,
+                                            IPCTNL_MSG_EXP_NEW,
+                                            1, exp) < 0)
+                        goto out;
+                *id = exp->id;
+        }
+out:    
+        read_unlock_bh(&ip_conntrack_lock);
+        DEBUGP("leaving, last id=%llu\n", *id);
+        return skb->len;
+}
+static int
+ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, 
+                     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+        struct ip_conntrack_tuple tuple;
+        struct ip_conntrack_expect *exp;
+        struct sk_buff *skb2;
+        int err = 0;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        if (nlh->nlmsg_flags & NLM_F_DUMP) {
+                struct nfgenmsg *msg = NLMSG_DATA(nlh);
+                u32 rlen;
+                if (msg->nfgen_family != AF_INET)
+                        return -EAFNOSUPPORT;
+                if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+                                                ctnetlink_exp_dump_table,
+                                                ctnetlink_done)) != 0)
+                        return -EINVAL;
+                rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+                if (rlen > skb->len)
+                        rlen = skb->len;
+                skb_pull(skb, rlen);
+                return 0;
+        }
+        if (cda[CTA_EXPECT_MASTER-1])
+                err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER);
+        else
+                return -EINVAL;
+        if (err < 0)
+                return err;
+        exp = ip_conntrack_expect_find_get(&tuple);
+        if (!exp)
+                return -ENOENT;
+        err = -ENOMEM;
+        skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+        if (!skb2)
+                goto out;
+        NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+        
+        err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, 
+                                      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+                                      1, exp);
+        if (err <= 0)
+                goto out;
+        ip_conntrack_expect_put(exp);
+        err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+        if (err < 0)
+                goto free;
+        return err;
+out:
+        ip_conntrack_expect_put(exp);
+free:
+        if (skb2)
+                kfree_skb(skb2);
+        return err;
+}
+static int
+ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, 
+                     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+        struct ip_conntrack_expect *exp, *tmp;
+        struct ip_conntrack_tuple tuple;
+        struct ip_conntrack_helper *h;
+        int err;
+        if (cda[CTA_EXPECT_TUPLE-1]) {
+                /* delete a single expect by tuple */
+                err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
+                if (err < 0)
+                        return err;
+                /* bump usage count to 2 */
+                exp = ip_conntrack_expect_find_get(&tuple);
+                if (!exp)
+                        return -ENOENT;
+                if (cda[CTA_EXPECT_ID-1]) {
+                        u_int32_t id = 
+                                *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+                        if (exp->id != ntohl(id)) {
+                                ip_conntrack_expect_put(exp);
+                                return -ENOENT;
+                        }
+                }
+                /* after list removal, usage count == 1 */
+                ip_conntrack_unexpect_related(exp);
+                /* have to put what we 'get' above. 
+                 * after this line usage count == 0 */
+                ip_conntrack_expect_put(exp);
+        } else if (cda[CTA_EXPECT_HELP_NAME-1]) {
+                char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
+                /* delete all expectations for this helper */
+                write_lock_bh(&ip_conntrack_lock);
+                h = __ip_conntrack_helper_find_byname(name);
+                if (!h) {
+                        write_unlock_bh(&ip_conntrack_lock);
+                        return -EINVAL;
+                }
+                list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+                                         list) {
+                        if (exp->master->helper == h 
+                            && del_timer(&exp->timeout))
+                                __ip_ct_expect_unlink_destroy(exp);
+                }
+                write_unlock(&ip_conntrack_lock);
+        } else {
+                /* This basically means we have to flush everything*/
+                write_lock_bh(&ip_conntrack_lock);
+                list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
+                                         list) {
+                        if (del_timer(&exp->timeout))
+                                __ip_ct_expect_unlink_destroy(exp);
+                }
+                write_unlock_bh(&ip_conntrack_lock);
+        }
+        return 0;
+}
+static int
+ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[])
+{
+        return -EOPNOTSUPP;
+}
+static int
+ctnetlink_create_expect(struct nfattr *cda[])
+{
+        struct ip_conntrack_tuple tuple, mask, master_tuple;
+        struct ip_conntrack_tuple_hash *h = NULL;
+        struct ip_conntrack_expect *exp;
+        struct ip_conntrack *ct;
+        int err = 0;
+        DEBUGP("entered %s\n", __FUNCTION__);
+        /* caller guarantees that those three CTA_EXPECT_* exist */
+        err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
+        if (err < 0)
+                return err;
+        err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK);
+        if (err < 0)
+                return err;
+        err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER);
+        if (err < 0)
+                return err;
+        /* Look for master conntrack of this expectation */
+        h = ip_conntrack_find_get(&master_tuple, NULL);
+        if (!h)
+                return -ENOENT;
+        ct = tuplehash_to_ctrack(h);
+        if (!ct->helper) {
+                /* such conntrack hasn't got any helper, abort */
+                err = -EINVAL;
+                goto out;
+        }
+        exp = ip_conntrack_expect_alloc(ct);
+        if (!exp) {
+                err = -ENOMEM;
+                goto out;
+        }
+        
+        exp->expectfn = NULL;
+        exp->master = ct;
+        memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple));
+        memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple));
+        err = ip_conntrack_expect_related(exp);
+        ip_conntrack_expect_put(exp);
+out:    
+        ip_conntrack_put(tuplehash_to_ctrack(h));
+        return err;
+}
+static int
+ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
+                     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+        struct ip_conntrack_tuple tuple;
+        struct ip_conntrack_expect *exp;
+        int err = 0;
+        DEBUGP("entered %s\n", __FUNCTION__);   
+        if (!cda[CTA_EXPECT_TUPLE-1]
+            || !cda[CTA_EXPECT_MASK-1]
+            || !cda[CTA_EXPECT_MASTER-1])
+                return -EINVAL;
+        err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
+        if (err < 0)
+                return err;
+        write_lock_bh(&ip_conntrack_lock);
+        exp = __ip_conntrack_expect_find(&tuple);
+        if (!exp) {
+                write_unlock_bh(&ip_conntrack_lock);
+                err = -ENOENT;
+                if (nlh->nlmsg_flags & NLM_F_CREATE)
+                        err = ctnetlink_create_expect(cda);
+                return err;
+        }
+        err = -EEXIST;
+        if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+                err = ctnetlink_change_expect(exp, cda);
+        write_unlock_bh(&ip_conntrack_lock);
+        DEBUGP("leaving\n");
+        
+        return err;
+}
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+static struct notifier_block ctnl_notifier = {
+        .notifier_call  = ctnetlink_conntrack_event,
+};
+static struct notifier_block ctnl_notifier_exp = {
+        .notifier_call  = ctnetlink_expect_event,
+};
+#endif
+static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
+        [IPCTNL_MSG_CT_NEW]             = { .call = ctnetlink_new_conntrack,
+                                            .attr_count = CTA_MAX,
+                                            .cap_required = CAP_NET_ADMIN },
+        [IPCTNL_MSG_CT_GET]             = { .call = ctnetlink_get_conntrack,
+                                            .attr_count = CTA_MAX,
+                                            .cap_required = CAP_NET_ADMIN },
+        [IPCTNL_MSG_CT_DELETE]          = { .call = ctnetlink_del_conntrack,
+                                            .attr_count = CTA_MAX,
+                                            .cap_required = CAP_NET_ADMIN },
+        [IPCTNL_MSG_CT_GET_CTRZERO]     = { .call = ctnetlink_get_conntrack,
+                                            .attr_count = CTA_MAX,
+                                            .cap_required = CAP_NET_ADMIN },
+};
+static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
+        [IPCTNL_MSG_EXP_GET]            = { .call = ctnetlink_get_expect,
+                                            .attr_count = CTA_EXPECT_MAX,
+                                            .cap_required = CAP_NET_ADMIN },
+        [IPCTNL_MSG_EXP_NEW]            = { .call = ctnetlink_new_expect,
+                                            .attr_count = CTA_EXPECT_MAX,
+                                            .cap_required = CAP_NET_ADMIN },
+        [IPCTNL_MSG_EXP_DELETE]         = { .call = ctnetlink_del_expect,
+                                            .attr_count = CTA_EXPECT_MAX,
+                                            .cap_required = CAP_NET_ADMIN },
+};
+static struct nfnetlink_subsystem ctnl_subsys = {
+        .name                           = "conntrack",
+        .subsys_id                      = NFNL_SUBSYS_CTNETLINK,
+        .cb_count                       = IPCTNL_MSG_MAX,
+        .cb                             = ctnl_cb,
+};
+static struct nfnetlink_subsystem ctnl_exp_subsys = {
+        .name                           = "conntrack_expect",
+        .subsys_id                      = NFNL_SUBSYS_CTNETLINK_EXP,
+        .cb_count                       = IPCTNL_MSG_EXP_MAX,
+        .cb                             = ctnl_exp_cb,
+};
+static int __init ctnetlink_init(void)
+{
+        int ret;
+        printk("ctnetlink v%s: registering with nfnetlink.\n", version);
+        ret = nfnetlink_subsys_register(&ctnl_subsys);
+        if (ret < 0) {
+                printk("ctnetlink_init: cannot register with nfnetlink.\n");
+                goto err_out;
+        }
+        ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
+        if (ret < 0) {
+                printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
+                goto err_unreg_subsys;
+        }
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+        ret = ip_conntrack_register_notifier(&ctnl_notifier);
+        if (ret < 0) {
+                printk("ctnetlink_init: cannot register notifier.\n");
+                goto err_unreg_exp_subsys;
+        }
+        ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp);
+        if (ret < 0) {
+                printk("ctnetlink_init: cannot expect register notifier.\n");
+                goto err_unreg_notifier;
+        }
+#endif
+        return 0;
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+err_unreg_notifier:
+        ip_conntrack_unregister_notifier(&ctnl_notifier);
+err_unreg_exp_subsys:
+        nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+#endif
+err_unreg_subsys:
+        nfnetlink_subsys_unregister(&ctnl_subsys);
+err_out:
+        return ret;
+}
+static void __exit ctnetlink_exit(void)
+{
+        printk("ctnetlink: unregistering from nfnetlink.\n");
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+        ip_conntrack_unregister_notifier(&ctnl_notifier_exp);
+        ip_conntrack_unregister_notifier(&ctnl_notifier);
+#endif
+        nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+        nfnetlink_subsys_unregister(&ctnl_subsys);
+        return;
+}
+module_init(ctnetlink_init);
+module_exit(ctnetlink_exit);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 602c74db3252..838d1d69b36e 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -102,22 +102,24 @@ static int icmp_packet(struct ip_conntrack *ct,
                        ct->timeout.function((unsigned long)ct);
        } else {
                atomic_inc(&ct->proto.icmp.count);
+                ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
                ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
        }
        return NF_ACCEPT;
 }
+static u_int8_t valid_new[] = { 
+        [ICMP_ECHO] = 1,
+        [ICMP_TIMESTAMP] = 1,
+        [ICMP_INFO_REQUEST] = 1,
+        [ICMP_ADDRESS] = 1 
+};
 /* Called when a new connection for this protocol found. */
 static int icmp_new(struct ip_conntrack *conntrack,
                    const struct sk_buff *skb)
 {
-        static u_int8_t valid_new[]
-                = { [ICMP_ECHO] = 1,
-                    [ICMP_TIMESTAMP] = 1,
-                    [ICMP_INFO_REQUEST] = 1,
-                    [ICMP_ADDRESS] = 1 };
        if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
            || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
                /* Can't create a new ICMP `conn' with this. */
@@ -158,11 +160,12 @@ icmp_error_message(struct sk_buff *skb,
                return NF_ACCEPT;
        }
-        innerproto = ip_ct_find_proto(inside->ip.protocol);
+        innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
        dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
        /* Are they talking about one of our connections? */
        if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
                DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
+                ip_conntrack_proto_put(innerproto);
                return NF_ACCEPT;
        }
@@ -170,8 +173,10 @@ icmp_error_message(struct sk_buff *skb,
           been preserved inside the ICMP. */
        if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
                DEBUGP("icmp_error_track: Can't invert tuple\n");
+                ip_conntrack_proto_put(innerproto);
                return NF_ACCEPT;
        }
+        ip_conntrack_proto_put(innerproto);
        *ctinfo = IP_CT_RELATED;
@@ -212,7 +217,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
        icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
        if (icmph == NULL) {
                if (LOG_INVALID(IPPROTO_ICMP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                      "ip_ct_icmp: short packet ");
                return -NF_ACCEPT;
        }
@@ -226,13 +231,13 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
                if (!(u16)csum_fold(skb->csum)) 
                        break;
                if (LOG_INVALID(IPPROTO_ICMP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                      "ip_ct_icmp: bad HW ICMP checksum ");
                return -NF_ACCEPT;
        case CHECKSUM_NONE:
                if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
                        if (LOG_INVALID(IPPROTO_ICMP))
-                                nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                                nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                              "ip_ct_icmp: bad ICMP checksum ");
                        return -NF_ACCEPT;
                }
@@ -249,7 +254,7 @@ checksum_skipped:
         */
        if (icmph->type > NR_ICMP_TYPES) {
                if (LOG_INVALID(IPPROTO_ICMP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                      "ip_ct_icmp: invalid ICMP type ");
                return -NF_ACCEPT;
        }
@@ -265,6 +270,47 @@ checksum_skipped:
        return icmp_error_message(skb, ctinfo, hooknum);
 }
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+static int icmp_tuple_to_nfattr(struct sk_buff *skb,
+                                const struct ip_conntrack_tuple *t)
+{
+        NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
+                &t->src.u.icmp.id);
+        NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
+                &t->dst.u.icmp.type);
+        NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
+                &t->dst.u.icmp.code);
+        if (t->dst.u.icmp.type >= sizeof(valid_new) 
+            || !valid_new[t->dst.u.icmp.type])
+                return -EINVAL;
+        return 0;
+nfattr_failure:
+        return -1;
+}
+static int icmp_nfattr_to_tuple(struct nfattr *tb[],
+                                struct ip_conntrack_tuple *tuple)
+{
+        if (!tb[CTA_PROTO_ICMP_TYPE-1]
+            || !tb[CTA_PROTO_ICMP_CODE-1]
+            || !tb[CTA_PROTO_ICMP_ID-1])
+                return -1;
+        tuple->dst.u.icmp.type = 
+                        *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
+        tuple->dst.u.icmp.code =
+                        *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
+        tuple->src.u.icmp.id =
+                        *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+        return 0;
+}
+#endif
 struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
 {
        .proto                  = IPPROTO_ICMP,
@@ -276,4 +322,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
        .packet                 = icmp_packet,
        .new                    = icmp_new,
        .error                  = icmp_error,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+        .tuple_to_nfattr        = icmp_tuple_to_nfattr,
+        .nfattr_to_tuple        = icmp_nfattr_to_tuple,
+#endif
 };
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 31d75390bf12..a875f35e576d 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -404,6 +404,8 @@ static int sctp_packet(struct ip_conntrack *conntrack,
                }
                conntrack->proto.sctp.state = newconntrack;
+                if (oldsctpstate != newconntrack)
+                        ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
                write_unlock_bh(&sctp_lock);
        }
@@ -503,7 +505,12 @@ static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
        .packet          = sctp_packet, 
        .new             = sctp_new, 
        .destroy         = NULL, 
-        .me              = THIS_MODULE 
+        .me              = THIS_MODULE,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+        .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
+        .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
+#endif
 };
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 809dfed766d4..f23ef1f88c46 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -336,6 +336,23 @@ static int tcp_print_conntrack(struct seq_file *s,
        return seq_printf(s, "%s ", tcp_conntrack_names[state]);
 }
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
+                         const struct ip_conntrack *ct)
+{
+        read_lock_bh(&tcp_lock);
+        NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
+                &ct->proto.tcp.state);
+        read_unlock_bh(&tcp_lock);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+#endif
 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 {
        if (tcph->rst) return TCP_RST_SET;
@@ -699,7 +716,7 @@ static int tcp_in_window(struct ip_ct_tcp *state,
                res = 1;
        } else {
                if (LOG_INVALID(IPPROTO_TCP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                        "ip_ct_tcp: %s ",
                        before(seq, sender->td_maxend + 1) ?
                        after(end, sender->td_end - receiver->td_maxwin - 1) ?
@@ -798,7 +815,7 @@ static int tcp_error(struct sk_buff *skb,
                                sizeof(_tcph), &_tcph);
        if (th == NULL) {
                if (LOG_INVALID(IPPROTO_TCP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                "ip_ct_tcp: short packet ");
                return -NF_ACCEPT;
        }
@@ -806,7 +823,7 @@ static int tcp_error(struct sk_buff *skb,
        /* Not whole TCP header or malformed packet */
        if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
                if (LOG_INVALID(IPPROTO_TCP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                "ip_ct_tcp: truncated/malformed packet ");
                return -NF_ACCEPT;
        }
@@ -823,7 +840,7 @@ static int tcp_error(struct sk_buff *skb,
                                 skb->ip_summed == CHECKSUM_HW ? skb->csum
                                 : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
                if (LOG_INVALID(IPPROTO_TCP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                  "ip_ct_tcp: bad TCP checksum ");
                return -NF_ACCEPT;
        }
@@ -832,7 +849,7 @@ static int tcp_error(struct sk_buff *skb,
        tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
        if (!tcp_valid_flags[tcpflags]) {
                if (LOG_INVALID(IPPROTO_TCP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                  "ip_ct_tcp: invalid TCP flag combination ");
                return -NF_ACCEPT;
        }
@@ -880,8 +897,9 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                         */
                        write_unlock_bh(&tcp_lock);
                        if (LOG_INVALID(IPPROTO_TCP))
-                                nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                                nf_log_packet(PF_INET, 0, skb, NULL, NULL,
-                                          "ip_ct_tcp: killing out of sync session ");
+                                              NULL, "ip_ct_tcp: "
+                                              "killing out of sync session ");
                        if (del_timer(&conntrack->timeout))
                                conntrack->timeout.function((unsigned long)
                                                            conntrack);
@@ -895,7 +913,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                
                write_unlock_bh(&tcp_lock);
                if (LOG_INVALID(IPPROTO_TCP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                  "ip_ct_tcp: invalid packet ignored ");
                return NF_ACCEPT;
        case TCP_CONNTRACK_MAX:
@@ -905,7 +923,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                       old_state);
                write_unlock_bh(&tcp_lock);
                if (LOG_INVALID(IPPROTO_TCP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                  "ip_ct_tcp: invalid state ");
                return -NF_ACCEPT;
        case TCP_CONNTRACK_SYN_SENT:
@@ -926,7 +944,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                        write_unlock_bh(&tcp_lock);
                        if (LOG_INVALID(IPPROTO_TCP))
                                nf_log_packet(PF_INET, 0, skb, NULL, NULL,
-                                              "ip_ct_tcp: invalid SYN");
+                                              NULL, "ip_ct_tcp: invalid SYN");
                        return -NF_ACCEPT;
                }
        case TCP_CONNTRACK_CLOSE:
@@ -973,6 +991,10 @@ static int tcp_packet(struct ip_conntrack *conntrack,
                  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
        write_unlock_bh(&tcp_lock);
+        ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+        if (new_state != old_state)
+                ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
        if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
                /* If only reply is a RST, we can consider ourselves not to
                   have an established connection: this is a fairly common
@@ -1096,4 +1118,10 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
        .packet                 = tcp_packet,
        .new                    = tcp_new,
        .error                  = tcp_error,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+        .to_nfattr              = tcp_to_nfattr,
+        .tuple_to_nfattr        = ip_ct_port_tuple_to_nfattr,
+        .nfattr_to_tuple        = ip_ct_port_nfattr_to_tuple,
+#endif
 };
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 8c1eaba098d4..f2dcac7c7660 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -73,7 +73,8 @@ static int udp_packet(struct ip_conntrack *conntrack,
                ip_ct_refresh_acct(conntrack, ctinfo, skb, 
                                   ip_ct_udp_timeout_stream);
                /* Also, more likely to be important, and not a probe */
-                set_bit(IPS_ASSURED_BIT, &conntrack->status);
+                if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
+                        ip_conntrack_event_cache(IPCT_STATUS, skb);
        } else
                ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
@@ -97,7 +98,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
        hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
        if (hdr == NULL) {
                if (LOG_INVALID(IPPROTO_UDP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                  "ip_ct_udp: short packet ");
                return -NF_ACCEPT;
        }
@@ -105,7 +106,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
        /* Truncated/malformed packets */
        if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
                if (LOG_INVALID(IPPROTO_UDP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                  "ip_ct_udp: truncated/malformed packet ");
                return -NF_ACCEPT;
        }
@@ -125,7 +126,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
                                 skb->ip_summed == CHECKSUM_HW ? skb->csum
                                 : skb_checksum(skb, iph->ihl*4, udplen, 0))) {
                if (LOG_INVALID(IPPROTO_UDP))
-                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, 
+                        nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
                                  "ip_ct_udp: bad UDP checksum ");
                return -NF_ACCEPT;
        }
@@ -144,4 +145,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_udp =
        .packet                 = udp_packet,
        .new                    = udp_new,
        .error                  = udp_error,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+        .tuple_to_nfattr        = ip_ct_port_tuple_to_nfattr,
+        .nfattr_to_tuple        = ip_ct_port_nfattr_to_tuple,
+#endif
 };
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 42dc95102873..ee5895afd0c3 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -5,7 +5,7 @@
 */
 /* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
@@ -147,8 +147,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
        if (DIRECTION(hash))
                return 0;
-        proto = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+        proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-                               .tuple.dst.protonum);
        IP_NF_ASSERT(proto);
        if (seq_printf(s, "%-8s %u %ld ",
@@ -185,7 +184,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
                        return -ENOSPC;
 #if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-        if (seq_printf(s, "mark=%lu ", conntrack->mark))
+        if (seq_printf(s, "mark=%u ", conntrack->mark))
                return -ENOSPC;
 #endif
@@ -283,7 +282,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
        seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
        print_tuple(s, &expect->tuple,
-                    ip_ct_find_proto(expect->tuple.dst.protonum));
+                    __ip_conntrack_proto_find(expect->tuple.dst.protonum));
        return seq_putc(s, '\n');
 }
@@ -432,6 +431,13 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
                                        const struct net_device *out,
                                        int (*okfn)(struct sk_buff *))
 {
+#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
+        /* Previously seen (loopback)?  Ignore.  Do this before
+           fragment check. */
+        if ((*pskb)->nfct)
+                return NF_ACCEPT;
+#endif
        /* Gather fragments. */
        if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
                *pskb = ip_ct_gather_frags(*pskb,
@@ -882,6 +888,7 @@ static int init_or_cleanup(int init)
        return ret;
 cleanup:
+        synchronize_net();
 #ifdef CONFIG_SYSCTL
        unregister_sysctl_table(ip_ct_sysctl_header);
 cleanup_localinops:
@@ -964,6 +971,14 @@ void need_ip_conntrack(void)
 {
 }
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+EXPORT_SYMBOL_GPL(ip_conntrack_chain);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
+EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
+EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
+EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
+EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
+#endif
 EXPORT_SYMBOL(ip_conntrack_protocol_register);
 EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
 EXPORT_SYMBOL(ip_ct_get_tuple);
@@ -975,12 +990,16 @@ EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_iterate_cleanup);
 EXPORT_SYMBOL(ip_ct_refresh_acct);
-EXPORT_SYMBOL(ip_ct_protos);
-EXPORT_SYMBOL(ip_ct_find_proto);
 EXPORT_SYMBOL(ip_conntrack_expect_alloc);
-EXPORT_SYMBOL(ip_conntrack_expect_free);
+EXPORT_SYMBOL(ip_conntrack_expect_put);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
 EXPORT_SYMBOL(ip_conntrack_expect_related);
 EXPORT_SYMBOL(ip_conntrack_unexpect_related);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
+EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
+EXPORT_SYMBOL_GPL(__ip_ct_expect_unlink_destroy);
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
 EXPORT_SYMBOL(ip_ct_gather_frags);
 EXPORT_SYMBOL(ip_conntrack_htable_size);
@@ -988,7 +1007,28 @@ EXPORT_SYMBOL(ip_conntrack_lock);
 EXPORT_SYMBOL(ip_conntrack_hash);
 EXPORT_SYMBOL(ip_conntrack_untracked);
 EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_put);
 #ifdef CONFIG_IP_NF_NAT_NEEDED
 EXPORT_SYMBOL(ip_conntrack_tcp_update);
 #endif
+EXPORT_SYMBOL_GPL(ip_conntrack_flush);
+EXPORT_SYMBOL_GPL(__ip_conntrack_find);
+EXPORT_SYMBOL_GPL(ip_conntrack_alloc);
+EXPORT_SYMBOL_GPL(ip_conntrack_free);
+EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert);
+EXPORT_SYMBOL_GPL(ip_ct_remove_expectations);
+EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get);
+EXPORT_SYMBOL_GPL(ip_conntrack_helper_put);
+EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname);
+EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get);
+EXPORT_SYMBOL_GPL(ip_conntrack_proto_put);
+EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find);
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr);
+EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple);
+#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index 992fac3e36ee..f8ff170f390a 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -65,7 +65,7 @@ static int tftp_help(struct sk_buff **pskb,
                DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
                DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-                exp = ip_conntrack_expect_alloc();
+                exp = ip_conntrack_expect_alloc(ct);
                if (exp == NULL)
                        return NF_DROP;
@@ -75,17 +75,15 @@ static int tftp_help(struct sk_buff **pskb,
                exp->mask.dst.u.udp.port = 0xffff;
                exp->mask.dst.protonum = 0xff;
                exp->expectfn = NULL;
-                exp->master = ct;
                DEBUGP("expect: ");
                DUMP_TUPLE(&exp->tuple);
                DUMP_TUPLE(&exp->mask);
                if (ip_nat_tftp_hook)
                        ret = ip_nat_tftp_hook(pskb, ctinfo, exp);
-                else if (ip_conntrack_expect_related(exp) != 0) {
+                else if (ip_conntrack_expect_related(exp) != 0)
-                        ip_conntrack_expect_free(exp);
                        ret = NF_DROP;
-                }
+                ip_conntrack_expect_put(exp);
                break;
        case TFTP_OPCODE_DATA:
        case TFTP_OPCODE_ACK:
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
index da1f412583ed..706c8074f422 100644
--- a/net/ipv4/netfilter/ip_nat_amanda.c
+++ b/net/ipv4/netfilter/ip_nat_amanda.c
@@ -56,10 +56,8 @@ static unsigned int help(struct sk_buff **pskb,
                        break;
        }
-        if (port == 0) {
+        if (port == 0)
-                ip_conntrack_expect_free(exp);
                return NF_DROP;
-        }
        sprintf(buffer, "%u", port);
        ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 739b6dde1c82..1adedb743f60 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -47,8 +47,39 @@ DEFINE_RWLOCK(ip_nat_lock);
 static unsigned int ip_nat_htable_size;
 static struct list_head *bysource;
+#define MAX_IP_NAT_PROTO 256
 struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+static inline struct ip_nat_protocol *
+__ip_nat_proto_find(u_int8_t protonum)
+{
+        return ip_nat_protos[protonum];
+}
+struct ip_nat_protocol *
+ip_nat_proto_find_get(u_int8_t protonum)
+{
+        struct ip_nat_protocol *p;
+        /* we need to disable preemption to make sure 'p' doesn't get
+         * removed until we've grabbed the reference */
+        preempt_disable();
+        p = __ip_nat_proto_find(protonum);
+        if (p) {
+                if (!try_module_get(p->me))
+                        p = &ip_nat_unknown_protocol;
+        }
+        preempt_enable();
+        return p;
+}
+void
+ip_nat_proto_put(struct ip_nat_protocol *p)
+{
+        module_put(p->me);
+}
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
@@ -103,7 +134,8 @@ static int
 in_range(const struct ip_conntrack_tuple *tuple,
         const struct ip_nat_range *range)
 {
-        struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum);
+        struct ip_nat_protocol *proto = 
+                                __ip_nat_proto_find(tuple->dst.protonum);
        /* If we are supposed to map IPs, then we must be in the
           range specified, otherwise let this drag us onto a new src IP. */
@@ -216,8 +248,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
                 struct ip_conntrack *conntrack,
                 enum ip_nat_manip_type maniptype)
 {
-        struct ip_nat_protocol *proto
+        struct ip_nat_protocol *proto;
-                = ip_nat_find_proto(orig_tuple->dst.protonum);
        /* 1) If this srcip/proto/src-proto-part is currently mapped,
           and that same mapping gives a unique tuple within the given
@@ -242,14 +273,20 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
        /* 3) The per-protocol part of the manip is made to map into
           the range to make a unique tuple. */
+        proto = ip_nat_proto_find_get(orig_tuple->dst.protonum);
        /* Only bother mapping if it's not already in range and unique */
        if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
             || proto->in_range(tuple, maniptype, &range->min, &range->max))
-            && !ip_nat_used_tuple(tuple, conntrack))
+            && !ip_nat_used_tuple(tuple, conntrack)) {
+                ip_nat_proto_put(proto);
                return;
+        }
        /* Last change: get protocol to try to obtain unique tuple. */
        proto->unique_tuple(tuple, range, maniptype, conntrack);
+        ip_nat_proto_put(proto);
 }
 unsigned int
@@ -320,17 +357,20 @@ manip_pkt(u_int16_t proto,
          enum ip_nat_manip_type maniptype)
 {
        struct iphdr *iph;
+        struct ip_nat_protocol *p;
-        (*pskb)->nfcache |= NFC_ALTERED;
+        if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
-        if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph)))
                return 0;
        iph = (void *)(*pskb)->data + iphdroff;
        /* Manipulate protcol part. */
-        if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff,
+        p = ip_nat_proto_find_get(proto);
-                                                 target, maniptype))
+        if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
+                ip_nat_proto_put(p);
                return 0;
+        }
+        ip_nat_proto_put(p);
        iph = (void *)(*pskb)->data + iphdroff;
@@ -391,7 +431,7 @@ int icmp_reply_translation(struct sk_buff **pskb,
        struct ip_conntrack_tuple inner, target;
        int hdrlen = (*pskb)->nh.iph->ihl * 4;
-        if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside)))
+        if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
                return 0;
        inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
@@ -426,7 +466,8 @@ int icmp_reply_translation(struct sk_buff **pskb,
        if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
                             sizeof(struct icmphdr) + inside->ip.ihl*4,
-                             &inner, ip_ct_find_proto(inside->ip.protocol)))
+                             &inner,
+                             __ip_conntrack_proto_find(inside->ip.protocol)))
                return 0;
        /* Change inner back to look like incoming packet.  We do the
@@ -496,6 +537,49 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
        synchronize_net();
 }
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+int
+ip_nat_port_range_to_nfattr(struct sk_buff *skb, 
+                            const struct ip_nat_range *range)
+{
+        NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t),
+                &range->min.tcp.port);
+        NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t),
+                &range->max.tcp.port);
+        return 0;
+nfattr_failure:
+        return -1;
+}
+int
+ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
+{
+        int ret = 0;
+        
+        /* we have to return whether we actually parsed something or not */
+        if (tb[CTA_PROTONAT_PORT_MIN-1]) {
+                ret = 1;
+                range->min.tcp.port = 
+                        *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
+        }
+        
+        if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
+                if (ret) 
+                        range->max.tcp.port = range->min.tcp.port;
+        } else {
+                ret = 1;
+                range->max.tcp.port = 
+                        *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
+        }
+        return ret;
+}
+#endif
 int __init ip_nat_init(void)
 {
        size_t i;
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index c6000e794ad6..d83757a70d9f 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -143,10 +143,8 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb,
                        break;
        }
-        if (port == 0) {
+        if (port == 0)
-                ip_conntrack_expect_free(exp);
                return NF_DROP;
-        }
        if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
                          seq)) {
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 158f34f32c04..d2dd5d313556 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -168,7 +168,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
        struct tcphdr *tcph;
        int datalen;
-        if (!skb_ip_make_writable(pskb, (*pskb)->len))
+        if (!skb_make_writable(pskb, (*pskb)->len))
                return 0;
        if (rep_len > match_len
@@ -228,7 +228,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
                               match_offset + match_len)
                return 0;
-        if (!skb_ip_make_writable(pskb, (*pskb)->len))
+        if (!skb_make_writable(pskb, (*pskb)->len))
                return 0;
        if (rep_len > match_len
@@ -315,7 +315,7 @@ ip_nat_sack_adjust(struct sk_buff **pskb,
        optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
        optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
-        if (!skb_ip_make_writable(pskb, optend))
+        if (!skb_make_writable(pskb, optend))
                return 0;
        dir = CTINFO2DIR(ctinfo);
@@ -363,7 +363,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
        this_way = &ct->nat.info.seq[dir];
        other_way = &ct->nat.info.seq[!dir];
-        if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+        if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
                return 0;
        tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index 9c1ca3381d56..de31942babe3 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -65,10 +65,8 @@ static unsigned int help(struct sk_buff **pskb,
                        break;
        }
-        if (port == 0) {
+        if (port == 0)
-                ip_conntrack_expect_free(exp);
                return NF_DROP;
-        }
        /*      strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
         *      strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index a558cf0eee8a..938719043999 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -35,16 +35,17 @@ icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
                  const struct ip_conntrack *conntrack)
 {
        static u_int16_t id;
-        unsigned int range_size
+        unsigned int range_size;
-                = (unsigned int)range->max.icmp.id - range->min.icmp.id + 1;
        unsigned int i;
+        range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
        /* If no range specified... */
        if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
                range_size = 0xFFFF;
        for (i = 0; i < range_size; i++, id++) {
-                tuple->src.u.icmp.id = range->min.icmp.id + (id % range_size);
+                tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
+                                             (id % range_size));
                if (!ip_nat_used_tuple(tuple, conntrack))
                        return 1;
        }
@@ -61,7 +62,7 @@ icmp_manip_pkt(struct sk_buff **pskb,
        struct icmphdr *hdr;
        unsigned int hdroff = iphdroff + iph->ihl*4;
-        if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
+        if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
                return 0;
        hdr = (struct icmphdr *)((*pskb)->data + hdroff);
@@ -105,11 +106,18 @@ icmp_print_range(char *buffer, const struct ip_nat_range *range)
        else return 0;
 }
-struct ip_nat_protocol ip_nat_protocol_icmp
+struct ip_nat_protocol ip_nat_protocol_icmp = {
-= { "ICMP", IPPROTO_ICMP,
+        .name                   = "ICMP",
-    icmp_manip_pkt,
+        .protonum               = IPPROTO_ICMP,
-    icmp_in_range,
+        .me                     = THIS_MODULE,
-    icmp_unique_tuple,
+        .manip_pkt              = icmp_manip_pkt,
-    icmp_print,
+        .in_range               = icmp_in_range,
-    icmp_print_range
+        .unique_tuple           = icmp_unique_tuple,
+        .print                  = icmp_print,
+        .print_range            = icmp_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+        .range_to_nfattr        = ip_nat_port_range_to_nfattr,
+        .nfattr_to_range        = ip_nat_port_nfattr_to_range,
+#endif
 };
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index a91cfceff272..1d381bf68574 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -12,6 +12,7 @@
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/if.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
 #include <linux/netfilter_ipv4/ip_nat.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_nat_protocol.h>
@@ -40,7 +41,8 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
                 enum ip_nat_manip_type maniptype,
                 const struct ip_conntrack *conntrack)
 {
-        static u_int16_t port, *portptr;
+        static u_int16_t port;
+        u_int16_t *portptr;
        unsigned int range_size, min, i;
        if (maniptype == IP_NAT_MANIP_SRC)
@@ -101,7 +103,7 @@ tcp_manip_pkt(struct sk_buff **pskb,
        if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
                hdrsize = sizeof(struct tcphdr);
-        if (!skb_ip_make_writable(pskb, hdroff + hdrsize))
+        if (!skb_make_writable(pskb, hdroff + hdrsize))
                return 0;
        iph = (struct iphdr *)((*pskb)->data + iphdroff);
@@ -168,11 +170,18 @@ tcp_print_range(char *buffer, const struct ip_nat_range *range)
        else return 0;
 }
-struct ip_nat_protocol ip_nat_protocol_tcp
+struct ip_nat_protocol ip_nat_protocol_tcp = {
-= { "TCP", IPPROTO_TCP,
+        .name                   = "TCP",
-    tcp_manip_pkt,
+        .protonum               = IPPROTO_TCP,
-    tcp_in_range,
+        .me                     = THIS_MODULE,
-    tcp_unique_tuple,
+        .manip_pkt              = tcp_manip_pkt,
-    tcp_print,
+        .in_range               = tcp_in_range,
-    tcp_print_range
+        .unique_tuple           = tcp_unique_tuple,
+        .print                  = tcp_print,
+        .print_range            = tcp_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+        .range_to_nfattr        = ip_nat_port_range_to_nfattr,
+        .nfattr_to_range        = ip_nat_port_nfattr_to_range,
+#endif
 };
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index c669e3b5f5d0..c4906e1aa24a 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -41,7 +41,8 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple,
                 enum ip_nat_manip_type maniptype,
                 const struct ip_conntrack *conntrack)
 {
-        static u_int16_t port, *portptr;
+        static u_int16_t port;
+        u_int16_t *portptr;
        unsigned int range_size, min, i;
        if (maniptype == IP_NAT_MANIP_SRC)
@@ -93,7 +94,7 @@ udp_manip_pkt(struct sk_buff **pskb,
        u32 oldip, newip;
        u16 *portptr, newport;
-        if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
+        if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
                return 0;
        iph = (struct iphdr *)((*pskb)->data + iphdroff);
@@ -155,11 +156,18 @@ udp_print_range(char *buffer, const struct ip_nat_range *range)
        else return 0;
 }
-struct ip_nat_protocol ip_nat_protocol_udp
+struct ip_nat_protocol ip_nat_protocol_udp = {
-= { "UDP", IPPROTO_UDP,
+        .name                   = "UDP",
-    udp_manip_pkt,
+        .protonum               = IPPROTO_UDP,
-    udp_in_range,
+        .me                     = THIS_MODULE,
-    udp_unique_tuple,
+        .manip_pkt              = udp_manip_pkt,
-    udp_print,
+        .in_range               = udp_in_range,
-    udp_print_range
+        .unique_tuple           = udp_unique_tuple,
+        .print                  = udp_print,
+        .print_range            = udp_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+        .range_to_nfattr        = ip_nat_port_range_to_nfattr,
+        .nfattr_to_range        = ip_nat_port_nfattr_to_range,
+#endif
 };
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index f5525bd58d16..99bbef56f84e 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -61,10 +61,11 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range)
 }
 struct ip_nat_protocol ip_nat_unknown_protocol = {
-        "unknown", 0,
+        .name                   = "unknown",
-        unknown_manip_pkt,
+        .me                     = THIS_MODULE,
-        unknown_in_range,
+        .manip_pkt              = unknown_manip_pkt,
-        unknown_unique_tuple,
+        .in_range               = unknown_in_range,
-        unknown_print,
+        .unique_tuple           = unknown_unique_tuple,
-        unknown_print_range
+        .print                  = unknown_print,
+        .print_range            = unknown_print_range
 };
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 2a48b6e635ae..93b2c5111bb2 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1275,7 +1275,7 @@ static int help(struct sk_buff **pskb,
                 return NF_DROP;
        }
-        if (!skb_ip_make_writable(pskb, (*pskb)->len))
+        if (!skb_make_writable(pskb, (*pskb)->len))
                return NF_DROP;
        spin_lock_bh(&snmp_lock);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index bc59d0d6e89e..89db052add81 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -73,8 +73,6 @@ ip_nat_fn(unsigned int hooknum,
        IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
                       & htons(IP_MF|IP_OFFSET)));
-        (*pskb)->nfcache |= NFC_UNKNOWN;
        /* If we had a hardware checksum before, it's now invalid */
        if ((*pskb)->ip_summed == CHECKSUM_HW)
                if (skb_checksum_help(*pskb, (out == NULL)))
@@ -102,6 +100,10 @@ ip_nat_fn(unsigned int hooknum,
                return NF_ACCEPT;
        }
+        /* Don't try to NAT if this packet is not conntracked */
+        if (ct == &ip_conntrack_untracked)
+                return NF_ACCEPT;
        switch (ctinfo) {
        case IP_CT_RELATED:
        case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -392,6 +394,8 @@ module_exit(fini);
 EXPORT_SYMBOL(ip_nat_setup_info);
 EXPORT_SYMBOL(ip_nat_protocol_register);
 EXPORT_SYMBOL(ip_nat_protocol_unregister);
+EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
+EXPORT_SYMBOL_GPL(ip_nat_proto_put);
 EXPORT_SYMBOL(ip_nat_cheat_check);
 EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
 EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
index 0343e0d64674..2215317c76b7 100644
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -45,10 +45,8 @@ static unsigned int help(struct sk_buff **pskb,
        exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
        exp->dir = IP_CT_DIR_REPLY;
        exp->expectfn = ip_nat_follow_master;
-        if (ip_conntrack_expect_related(exp) != 0) {
+        if (ip_conntrack_expect_related(exp) != 0)
-                ip_conntrack_expect_free(exp);
                return NF_DROP;
-        }
        return NF_ACCEPT;
 }
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index eda1fba431a4..d54f14d926f6 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -43,17 +43,10 @@
 #define NET_IPQ_QMAX 2088
 #define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
-struct ipq_rt_info {
-        __u8 tos;
-        __u32 daddr;
-        __u32 saddr;
-};
 struct ipq_queue_entry {
        struct list_head list;
        struct nf_info *info;
        struct sk_buff *skb;
-        struct ipq_rt_info rt_info;
 };
 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
@@ -214,6 +207,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
                break;
        
        case IPQ_COPY_PACKET:
+                if (entry->skb->ip_summed == CHECKSUM_HW &&
+                    (*errp = skb_checksum_help(entry->skb,
+                                               entry->info->outdev == NULL))) {
+                        read_unlock_bh(&queue_lock);
+                        return NULL;
+                }
                if (copy_range == 0 || copy_range > entry->skb->len)
                        data_len = entry->skb->len;
                else
@@ -241,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
        pmsg->packet_id       = (unsigned long )entry;
        pmsg->data_len        = data_len;
-        pmsg->timestamp_sec   = entry->skb->stamp.tv_sec;
+        pmsg->timestamp_sec   = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec;
-        pmsg->timestamp_usec  = entry->skb->stamp.tv_usec;
+        pmsg->timestamp_usec  = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec;
        pmsg->mark            = entry->skb->nfmark;
        pmsg->hook            = entry->info->hook;
        pmsg->hw_protocol     = entry->skb->protocol;
@@ -281,7 +280,8 @@ nlmsg_failure:
 }
 static int
-ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
+                   unsigned int queuenum, void *data)
 {
        int status = -EINVAL;
        struct sk_buff *nskb;
@@ -299,14 +299,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
        entry->info = info;
        entry->skb = skb;
-        if (entry->info->hook == NF_IP_LOCAL_OUT) {
-                struct iphdr *iph = skb->nh.iph;
-                entry->rt_info.tos = iph->tos;
-                entry->rt_info.daddr = iph->daddr;
-                entry->rt_info.saddr = iph->saddr;
-        }
        nskb = ipq_build_packet_message(entry, &status);
        if (nskb == NULL)
                goto err_out_free;
@@ -382,23 +374,11 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
                }
                skb_put(e->skb, diff);
        }
-        if (!skb_ip_make_writable(&e->skb, v->data_len))
+        if (!skb_make_writable(&e->skb, v->data_len))
                return -ENOMEM;
        memcpy(e->skb->data, v->payload, v->data_len);
-        e->skb->nfcache |= NFC_ALTERED;
+        e->skb->ip_summed = CHECKSUM_NONE;
-        /*
-         * Extra routing may needed on local out, as the QUEUE target never
-         * returns control to the table.
-         */
-        if (e->info->hook == NF_IP_LOCAL_OUT) {
-                struct iphdr *iph = e->skb->nh.iph;
-                if (!(iph->tos == e->rt_info.tos
-                      && iph->daddr == e->rt_info.daddr
-                      && iph->saddr == e->rt_info.saddr))
-                        return ip_route_me_harder(&e->skb);
-        }
        return 0;
 }
@@ -676,6 +656,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
 }
 #endif /* CONFIG_PROC_FS */
+static struct nf_queue_handler nfqh = {
+        .name   = "ip_queue",
+        .outfn  = &ipq_enqueue_packet,
+};
 static int
 init_or_cleanup(int init)
 {
@@ -686,7 +671,8 @@ init_or_cleanup(int init)
                goto cleanup;
        netlink_register_notifier(&ipq_nl_notifier);
-        ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk);
+        ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
+                                      THIS_MODULE);
        if (ipqnl == NULL) {
                printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
                goto cleanup_netlink_notifier;
@@ -703,7 +689,7 @@ init_or_cleanup(int init)
        register_netdevice_notifier(&ipq_dev_notifier);
        ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
        
-        status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL);
+        status = nf_register_queue_handler(PF_INET, &nfqh);
        if (status < 0) {
                printk(KERN_ERR "ip_queue: failed to register queue handler\n");
                goto cleanup_sysctl;
@@ -711,7 +697,7 @@ init_or_cleanup(int init)
        return status;
 cleanup:
-        nf_unregister_queue_handler(PF_INET);
+        nf_unregister_queue_handlers(&nfqh);
        synchronize_net();
        ipq_flush(NF_DROP);
        
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c88dfcd38c56..eef99a1b5de6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -312,7 +312,6 @@ ipt_do_table(struct sk_buff **pskb,
        do {
                IP_NF_ASSERT(e);
                IP_NF_ASSERT(back);
-                (*pskb)->nfcache |= e->nfcache;
                if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
                        struct ipt_entry_target *t;
@@ -341,8 +340,8 @@ ipt_do_table(struct sk_buff **pskb,
                                                         back->comefrom);
                                        continue;
                                }
-                                if (table_base + v
+                                if (table_base + v != (void *)e + e->next_offset
-                                    != (void *)e + e->next_offset) {
+                                    && !(e->ip.flags & IPT_F_GOTO)) {
                                        /* Save old back ptr in next entry */
                                        struct ipt_entry *next
                                                = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c
index 9842e6e23184..dab78d8bd494 100644
--- a/net/ipv4/netfilter/ipt_CLASSIFY.c
+++ b/net/ipv4/netfilter/ipt_CLASSIFY.c
@@ -32,10 +32,8 @@ target(struct sk_buff **pskb,
 {
        const struct ipt_classify_target_info *clinfo = targinfo;
-        if((*pskb)->priority != clinfo->priority) {
+        if((*pskb)->priority != clinfo->priority) 
                (*pskb)->priority = clinfo->priority;
-                (*pskb)->nfcache |= NFC_ALTERED;
-        }
        return IPT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 6706d3a1bc4f..7d38913754b1 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -144,7 +144,7 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
        memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
        c->num_total_nodes = i->num_total_nodes;
        c->num_local_nodes = i->num_local_nodes;
-        memcpy(&c->local_nodes, &i->local_nodes, sizeof(&c->local_nodes));
+        memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes));
        c->hash_mode = i->hash_mode;
        c->hash_initval = i->hash_initval;
        atomic_set(&c->refcount, 1);
@@ -367,7 +367,7 @@ target(struct sk_buff **pskb,
 #ifdef DEBUG_CLUSTERP
        DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 #endif
-        DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark);
+        DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
        if (!clusterip_responsible(cipinfo->config, hash)) {
                DEBUGP("not responsible\n");
                return NF_DROP;
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 30ddd3e18eb7..134638021339 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -40,9 +40,9 @@ target(struct sk_buff **pskb,
       void *userinfo)
 {
        const struct ipt_connmark_target_info *markinfo = targinfo;
-        unsigned long diff;
+        u_int32_t diff;
-        unsigned long nfmark;
+        u_int32_t nfmark;
-        unsigned long newmark;
+        u_int32_t newmark;
        enum ip_conntrack_info ctinfo;
        struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
@@ -61,10 +61,8 @@ target(struct sk_buff **pskb,
            case IPT_CONNMARK_RESTORE:
                nfmark = (*pskb)->nfmark;
                diff = (ct->mark ^ nfmark) & markinfo->mask;
-                if (diff != 0) {
+                if (diff != 0)
                    (*pskb)->nfmark = nfmark ^ diff;
-                    (*pskb)->nfcache |= NFC_ALTERED;
-                }
                break;
            }
        }
@@ -94,6 +92,11 @@ checkentry(const char *tablename,
            }
        }
+        if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
+                printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
+                return 0;
+        }
        return 1;
 }
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
index 3ea4509099f9..6e319570a28c 100644
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -39,7 +39,7 @@ target(struct sk_buff **pskb,
        if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
                u_int16_t diffs[2];
-                if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+                if (!skb_make_writable(pskb, sizeof(struct iphdr)))
                        return NF_DROP;
                diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
@@ -51,7 +51,6 @@ target(struct sk_buff **pskb,
                                                 sizeof(diffs),
                                                 (*pskb)->nh.iph->check
                                                 ^ 0xFFFF));
-                (*pskb)->nfcache |= NFC_ALTERED;
        }
        return IPT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index ada9911118e9..a1319693f648 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -31,7 +31,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
            != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
                u_int16_t diffs[2];
-                if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+                if (!skb_make_writable(pskb, sizeof(struct iphdr)))
                        return 0;
                diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
@@ -43,7 +43,6 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
                                                 sizeof(diffs),
                                                 (*pskb)->nh.iph->check
                                                 ^0xFFFF));
-                (*pskb)->nfcache |= NFC_ALTERED;
        } 
        return 1;
 }
@@ -61,16 +60,20 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
        if (!tcph)
                return 0;
-        if (!(einfo->operation & IPT_ECN_OP_SET_ECE
+        if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) ||
-              || tcph->ece == einfo->proto.tcp.ece)
+             tcph->ece == einfo->proto.tcp.ece) &&
-            && (!(einfo->operation & IPT_ECN_OP_SET_CWR
+            ((!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
-                  || tcph->cwr == einfo->proto.tcp.cwr)))
+             tcph->cwr == einfo->proto.tcp.cwr)))
                return 1;
-        if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+        if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
                return 0;
        tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
+        if ((*pskb)->ip_summed == CHECKSUM_HW &&
+            skb_checksum_help(*pskb, inward))
+                return 0;
        diffs[0] = ((u_int16_t *)tcph)[6];
        if (einfo->operation & IPT_ECN_OP_SET_ECE)
                tcph->ece = einfo->proto.tcp.ece;
@@ -79,14 +82,10 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
        diffs[1] = ((u_int16_t *)tcph)[6];
        diffs[0] = diffs[0] ^ 0xFFFF;
-        if ((*pskb)->ip_summed != CHECKSUM_HW)
+        if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
                tcph->check = csum_fold(csum_partial((char *)diffs,
                                                     sizeof(diffs),
                                                     tcph->check^0xFFFF));
-        else
-                if (skb_checksum_help(*pskb, inward))
-                        return 0;
-        (*pskb)->nfcache |= NFC_ALTERED;
        return 1;
 }
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index ef08733d26da..92ed050fac69 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -27,10 +27,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables syslog logging module");
-static unsigned int nflog = 1;
-module_param(nflog, int, 0400);
-MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
- 
 #if 0
 #define DEBUGP printk
 #else
@@ -41,11 +37,17 @@ MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
 static DEFINE_SPINLOCK(log_lock);
 /* One level of recursion won't kill us */
-static void dump_packet(const struct ipt_log_info *info,
+static void dump_packet(const struct nf_loginfo *info,
                        const struct sk_buff *skb,
                        unsigned int iphoff)
 {
        struct iphdr _iph, *ih;
+        unsigned int logflags;
+        if (info->type == NF_LOG_TYPE_LOG)
+                logflags = info->u.log.logflags;
+        else
+                logflags = NF_LOG_MASK;
        ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
        if (ih == NULL) {
@@ -76,7 +78,7 @@ static void dump_packet(const struct ipt_log_info *info,
        if (ntohs(ih->frag_off) & IP_OFFSET)
                printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
-        if ((info->logflags & IPT_LOG_IPOPT)
+        if ((logflags & IPT_LOG_IPOPT)
            && ih->ihl * 4 > sizeof(struct iphdr)) {
                unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op;
                unsigned int i, optsize;
@@ -119,7 +121,7 @@ static void dump_packet(const struct ipt_log_info *info,
                printk("SPT=%u DPT=%u ",
                       ntohs(th->source), ntohs(th->dest));
                /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
-                if (info->logflags & IPT_LOG_TCPSEQ)
+                if (logflags & IPT_LOG_TCPSEQ)
                        printk("SEQ=%u ACK=%u ",
                               ntohl(th->seq), ntohl(th->ack_seq));
                /* Max length: 13 "WINDOW=65535 " */
@@ -146,7 +148,7 @@ static void dump_packet(const struct ipt_log_info *info,
                /* Max length: 11 "URGP=65535 " */
                printk("URGP=%u ", ntohs(th->urg_ptr));
-                if ((info->logflags & IPT_LOG_TCPOPT)
+                if ((logflags & IPT_LOG_TCPOPT)
                    && th->doff * 4 > sizeof(struct tcphdr)) {
                        unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
                        unsigned char *op;
@@ -328,7 +330,7 @@ static void dump_packet(const struct ipt_log_info *info,
        }
        /* Max length: 15 "UID=4294967295 " */
-        if ((info->logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
+        if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
                read_lock_bh(&skb->sk->sk_callback_lock);
                if (skb->sk->sk_socket && skb->sk->sk_socket->file)
                        printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
@@ -349,19 +351,31 @@ static void dump_packet(const struct ipt_log_info *info,
        /* maxlen = 230+   91  + 230 + 252 = 803 */
 }
+struct nf_loginfo default_loginfo = {
+        .type   = NF_LOG_TYPE_LOG,
+        .u = {
+                .log = {
+                        .level    = 0,
+                        .logflags = NF_LOG_MASK,
+                },
+        },
+};
 static void
-ipt_log_packet(unsigned int hooknum,
+ipt_log_packet(unsigned int pf,
+               unsigned int hooknum,
               const struct sk_buff *skb,
               const struct net_device *in,
               const struct net_device *out,
-               const struct ipt_log_info *loginfo,
+               const struct nf_loginfo *loginfo,
-               const char *level_string,
               const char *prefix)
 {
+        if (!loginfo)
+                loginfo = &default_loginfo;
        spin_lock_bh(&log_lock);
-        printk(level_string);
+        printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
-        printk("%sIN=%s OUT=%s ",
+               prefix,
-               prefix == NULL ? loginfo->prefix : prefix,
               in ? in->name : "",
               out ? out->name : "");
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -405,28 +419,15 @@ ipt_log_target(struct sk_buff **pskb,
               void *userinfo)
 {
        const struct ipt_log_info *loginfo = targinfo;
-        char level_string[4] = "< >";
+        struct nf_loginfo li;
-        level_string[1] = '0' + (loginfo->level % 8);
+        li.type = NF_LOG_TYPE_LOG;
-        ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL);
+        li.u.log.level = loginfo->level;
+        li.u.log.logflags = loginfo->logflags;
-        return IPT_CONTINUE;
+        nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix);
-}
-static void
+        return IPT_CONTINUE;
-ipt_logfn(unsigned int hooknum,
-          const struct sk_buff *skb,
-          const struct net_device *in,
-          const struct net_device *out,
-          const char *prefix)
-{
-        struct ipt_log_info loginfo = { 
-                .level = 0, 
-                .logflags = IPT_LOG_MASK, 
-                .prefix = "" 
-        };
-        ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix);
 }
 static int ipt_log_checkentry(const char *tablename,
@@ -464,20 +465,29 @@ static struct ipt_target ipt_log_reg = {
        .me             = THIS_MODULE,
 };
+static struct nf_logger ipt_log_logger ={
+        .name           = "ipt_LOG",
+        .logfn          = &ipt_log_packet,
+        .me             = THIS_MODULE,
+};
 static int __init init(void)
 {
        if (ipt_register_target(&ipt_log_reg))
                return -EINVAL;
-        if (nflog)
+        if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
-                nf_log_register(PF_INET, &ipt_logfn);
+                printk(KERN_WARNING "ipt_LOG: not logging via system console "
+                       "since somebody else already registered for PF_INET\n");
+                /* we cannot make module load fail here, since otherwise
+                 * iptables userspace would abort */
+        }
        
        return 0;
 }
 static void __exit fini(void)
 {
-        if (nflog)
+        nf_log_unregister_logger(&ipt_log_logger);
-                nf_log_unregister(PF_INET, &ipt_logfn);
        ipt_unregister_target(&ipt_log_reg);
 }
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
index 33c6f9b63b8d..52b4f2c296bf 100644
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -29,10 +29,9 @@ target_v0(struct sk_buff **pskb,
 {
        const struct ipt_mark_target_info *markinfo = targinfo;
-        if((*pskb)->nfmark != markinfo->mark) {
+        if((*pskb)->nfmark != markinfo->mark)
                (*pskb)->nfmark = markinfo->mark;
-                (*pskb)->nfcache |= NFC_ALTERED;
-        }
        return IPT_CONTINUE;
 }
@@ -61,10 +60,9 @@ target_v1(struct sk_buff **pskb,
                break;
        }
-        if((*pskb)->nfmark != mark) {
+        if((*pskb)->nfmark != mark)
                (*pskb)->nfmark = mark;
-                (*pskb)->nfcache |= NFC_ALTERED;
-        }
        return IPT_CONTINUE;
 }
@@ -76,6 +74,8 @@ checkentry_v0(const char *tablename,
              unsigned int targinfosize,
              unsigned int hook_mask)
 {
+        struct ipt_mark_target_info *markinfo = targinfo;
        if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
                printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
                       targinfosize,
@@ -88,6 +88,11 @@ checkentry_v0(const char *tablename,
                return 0;
        }
+        if (markinfo->mark > 0xffffffff) {
+                printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+                return 0;
+        }
        return 1;
 }
@@ -120,6 +125,11 @@ checkentry_v1(const char *tablename,
                return 0;
        }
+        if (markinfo->mark > 0xffffffff) {
+                printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+                return 0;
+        }
        return 1;
 }
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 91e74502c3d3..2f3e181c8e97 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -86,11 +86,6 @@ masquerade_target(struct sk_buff **pskb,
        IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
-        /* FIXME: For the moment, don't do local packets, breaks
-           testsuite for 2.3.49 --RR */
-        if ((*pskb)->sk)
-                return NF_ACCEPT;
        ct = ip_conntrack_get(*pskb, &ctinfo);
        IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
                            || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 06254b29d034..e6e7b6095363 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -46,7 +46,8 @@ check(const char *tablename,
                DEBUGP(MODULENAME":check: size %u.\n", targinfosize);
                return 0;
        }
-        if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING))) {
+        if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) |
+                          (1 << NF_IP_LOCAL_OUT))) {
                DEBUGP(MODULENAME":check: bad hooks %x.\n", hook_mask);
                return 0;
        }
@@ -76,12 +77,13 @@ target(struct sk_buff **pskb,
        struct ip_nat_range newrange;
        IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
-                     || hooknum == NF_IP_POST_ROUTING);
+                     || hooknum == NF_IP_POST_ROUTING
+                     || hooknum == NF_IP_LOCAL_OUT);
        ct = ip_conntrack_get(*pskb, &ctinfo);
        netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
-        if (hooknum == NF_IP_PRE_ROUTING)
+        if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
                new_ip = (*pskb)->nh.iph->daddr & ~netmask;
        else
                new_ip = (*pskb)->nh.iph->saddr & ~netmask;
diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c
new file mode 100644
index 000000000000..3cedc9be8807
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_NFQUEUE.c
@@ -0,0 +1,70 @@
+/* iptables module for using new netfilter netlink queue
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as 
+ * published by the Free Software Foundation.
+ * 
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables NFQUEUE target");
+MODULE_LICENSE("GPL");
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+        const struct ipt_NFQ_info *tinfo = targinfo;
+        return NF_QUEUE_NR(tinfo->queuenum);
+}
+static int
+checkentry(const char *tablename,
+           const struct ipt_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+        if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) {
+                printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
+                       targinfosize,
+                       IPT_ALIGN(sizeof(struct ipt_NFQ_info)));
+                return 0;
+        }
+        return 1;
+}
+static struct ipt_target ipt_NFQ_reg = {
+        .name           = "NFQUEUE",
+        .target         = target,
+        .checkentry     = checkentry,
+        .me             = THIS_MODULE,
+};
+static int __init init(void)
+{
+        return ipt_register_target(&ipt_NFQ_reg);
+}
+static void __exit fini(void)
+{
+        ipt_unregister_target(&ipt_NFQ_reg);
+}
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 915696446020..f115a84a4ac6 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -156,7 +156,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
        /* This packet will not be the same as the other: clear nf fields */
        nf_reset(nskb);
-        nskb->nfcache = 0;
        nskb->nfmark = 0;
 #ifdef CONFIG_BRIDGE_NETFILTER
        nf_bridge_put(nskb->nf_bridge);
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 1049050b2bfb..8db70d6908c3 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -58,7 +58,11 @@ ipt_tcpmss_target(struct sk_buff **pskb,
        unsigned int i;
        u_int8_t *opt;
-        if (!skb_ip_make_writable(pskb, (*pskb)->len))
+        if (!skb_make_writable(pskb, (*pskb)->len))
+                return NF_DROP;
+        if ((*pskb)->ip_summed == CHECKSUM_HW &&
+            skb_checksum_help(*pskb, out == NULL))
                return NF_DROP;
        iph = (*pskb)->nh.iph;
@@ -186,10 +190,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
               newmss);
 retmodified:
-        /* We never hw checksum SYN packets.  */
-        BUG_ON((*pskb)->ip_summed == CHECKSUM_HW);
-        (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED;
        return IPT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 85c70d240f8b..deadb36d4428 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -33,7 +33,7 @@ target(struct sk_buff **pskb,
        if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
                u_int16_t diffs[2];
-                if (!skb_ip_make_writable(pskb, sizeof(struct iphdr)))
+                if (!skb_make_writable(pskb, sizeof(struct iphdr)))
                        return NF_DROP;
                diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
@@ -46,7 +46,6 @@ target(struct sk_buff **pskb,
                                                 sizeof(diffs),
                                                 (*pskb)->nh.iph->check
                                                 ^0xFFFF));
-                (*pskb)->nfcache |= NFC_ALTERED;
        }
        return IPT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
new file mode 100644
index 000000000000..b9ae6a9382f3
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -0,0 +1,119 @@
+/* TTL modification target for IP tables
+ * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_TTL.h>
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("IP tables TTL modification module");
+MODULE_LICENSE("GPL");
+static unsigned int 
+ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, 
+                const struct net_device *out, unsigned int hooknum, 
+                const void *targinfo, void *userinfo)
+{
+        struct iphdr *iph;
+        const struct ipt_TTL_info *info = targinfo;
+        u_int16_t diffs[2];
+        int new_ttl;
+        if (!skb_make_writable(pskb, (*pskb)->len))
+                return NF_DROP;
+        iph = (*pskb)->nh.iph;
+        switch (info->mode) {
+                case IPT_TTL_SET:
+                        new_ttl = info->ttl;
+                        break;
+                case IPT_TTL_INC:
+                        new_ttl = iph->ttl + info->ttl;
+                        if (new_ttl > 255)
+                                new_ttl = 255;
+                        break;
+                case IPT_TTL_DEC:
+                        new_ttl = iph->ttl - info->ttl;
+                        if (new_ttl < 0)
+                                new_ttl = 0;
+                        break;
+                default:
+                        new_ttl = iph->ttl;
+                        break;
+        }
+        if (new_ttl != iph->ttl) {
+                diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF;
+                iph->ttl = new_ttl;
+                diffs[1] = htons(((unsigned)iph->ttl) << 8);
+                iph->check = csum_fold(csum_partial((char *)diffs,
+                                                    sizeof(diffs),
+                                                    iph->check^0xFFFF));
+        }
+        return IPT_CONTINUE;
+}
+static int ipt_ttl_checkentry(const char *tablename,
+                const struct ipt_entry *e,
+                void *targinfo,
+                unsigned int targinfosize,
+                unsigned int hook_mask)
+{
+        struct ipt_TTL_info *info = targinfo;
+        if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) {
+                printk(KERN_WARNING "ipt_TTL: targinfosize %u != %Zu\n",
+                                targinfosize,
+                                IPT_ALIGN(sizeof(struct ipt_TTL_info)));
+                return 0;
+        }
+        if (strcmp(tablename, "mangle")) {
+                printk(KERN_WARNING "ipt_TTL: can only be called from "
+                        "\"mangle\" table, not \"%s\"\n", tablename);
+                return 0;
+        }
+        if (info->mode > IPT_TTL_MAXMODE) {
+                printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", 
+                        info->mode);
+                return 0;
+        }
+        if ((info->mode != IPT_TTL_SET) && (info->ttl == 0))
+                return 0;
+        return 1;
+}
+static struct ipt_target ipt_TTL = { 
+        .name           = "TTL",
+        .target         = ipt_ttl_target, 
+        .checkentry     = ipt_ttl_checkentry, 
+        .me             = THIS_MODULE,
+};
+static int __init init(void)
+{
+        return ipt_register_target(&ipt_TTL);
+}
+static void __exit fini(void)
+{
+        ipt_unregister_target(&ipt_TTL);
+}
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 52a0076302a7..e2c14f3cb2fc 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -62,6 +62,7 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
 MODULE_DESCRIPTION("iptables userspace logging module");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
 #define ULOG_NL_EVENT           111             /* Harald's favorite number */
 #define ULOG_MAXNLGROUPS        32              /* numer of nlgroups */
@@ -115,10 +116,10 @@ static void ulog_send(unsigned int nlgroupnum)
        if (ub->qlen > 1)
                ub->lastnlh->nlmsg_type = NLMSG_DONE;
-        NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum);
+        NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
-        DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n",
+        DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n",
-                ub->qlen, nlgroupnum);
+                ub->qlen, nlgroupnum + 1);
-        netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC);
+        netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
        ub->qlen = 0;
        ub->skb = NULL;
@@ -219,13 +220,13 @@ static void ipt_ulog_packet(unsigned int hooknum,
        pm = NLMSG_DATA(nlh);
        /* We might not have a timestamp, get one */
-        if (skb->stamp.tv_sec == 0)
+        if (skb->tstamp.off_sec == 0)
-                do_gettimeofday((struct timeval *)&skb->stamp);
+                __net_timestamp((struct sk_buff *)skb);
        /* copy hook, prefix, timestamp, payload, etc. */
        pm->data_len = copy_len;
-        pm->timestamp_sec = skb->stamp.tv_sec;
+        pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec;
-        pm->timestamp_usec = skb->stamp.tv_usec;
+        pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec;
        pm->mark = skb->nfmark;
        pm->hook = hooknum;
        if (prefix != NULL)
@@ -303,18 +304,27 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
        return IPT_CONTINUE;
 }
 
-static void ipt_logfn(unsigned int hooknum,
+static void ipt_logfn(unsigned int pf,
+                      unsigned int hooknum,
                      const struct sk_buff *skb,
                      const struct net_device *in,
                      const struct net_device *out,
+                      const struct nf_loginfo *li,
                      const char *prefix)
 {
-        struct ipt_ulog_info loginfo = { 
+        struct ipt_ulog_info loginfo;
-                .nl_group = ULOG_DEFAULT_NLGROUP,
-                .copy_range = 0,
+        if (!li || li->type != NF_LOG_TYPE_ULOG) {
-                .qthreshold = ULOG_DEFAULT_QTHRESHOLD,
+                loginfo.nl_group = ULOG_DEFAULT_NLGROUP;
-                .prefix = ""
+                loginfo.copy_range = 0;
-        };
+                loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD;
+                loginfo.prefix[0] = '\0';
+        } else {
+                loginfo.nl_group = li->u.ulog.group;
+                loginfo.copy_range = li->u.ulog.copy_len;
+                loginfo.qthreshold = li->u.ulog.qthreshold;
+                strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
+        }
        ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
@@ -354,6 +364,12 @@ static struct ipt_target ipt_ulog_reg = {
        .me             = THIS_MODULE,
 };
+static struct nf_logger ipt_ulog_logger = {
+        .name           = "ipt_ULOG",
+        .logfn          = &ipt_logfn,
+        .me             = THIS_MODULE,
+};
 static int __init init(void)
 {
        int i;
@@ -372,7 +388,8 @@ static int __init init(void)
                ulog_buffers[i].timer.data = i;
        }
-        nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL);
+        nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
+                                        THIS_MODULE);
        if (!nflognl)
                return -ENOMEM;
@@ -381,7 +398,7 @@ static int __init init(void)
                return -EINVAL;
        }
        if (nflog)
-                nf_log_register(PF_INET, &ipt_logfn);
+                nf_log_register(PF_INET, &ipt_ulog_logger);
        
        return 0;
 }
@@ -394,7 +411,7 @@ static void __exit fini(void)
        DEBUGP("ipt_ULOG: cleanup_module\n");
        if (nflog)
-                nf_log_unregister(PF_INET, &ipt_logfn);
+                nf_log_unregister_logger(&ipt_ulog_logger);
        ipt_unregister_target(&ipt_ulog_reg);
        sock_release(nflognl->sk_socket);
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c
new file mode 100644
index 000000000000..df4a42c6da22
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_connbytes.c
@@ -0,0 +1,162 @@
+/* Kernel module to match connection tracking byte counter.
+ * GPL (C) 2002 Martin Devera (devik@cdi.cz).
+ *
+ * 2004-07-20 Harald Welte <laforge@netfilter.org>
+ *      - reimplemented to use per-connection accounting counters
+ *      - add functionality to match number of packets
+ *      - add functionality to match average packet size
+ *      - add support to match directions seperately
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_connbytes.h>
+#include <asm/div64.h>
+#include <asm/bitops.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
+/* 64bit divisor, dividend and result. dynamic precision */
+static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
+{
+        u_int32_t d = divisor;
+        if (divisor > 0xffffffffULL) {
+                unsigned int shift = fls(divisor >> 32);
+                d = divisor >> shift;
+                dividend >>= shift;
+        }
+        do_div(dividend, d);
+        return dividend;
+}
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+        const struct ipt_connbytes_info *sinfo = matchinfo;
+        enum ip_conntrack_info ctinfo;
+        struct ip_conntrack *ct;
+        u_int64_t what = 0;     /* initialize to make gcc happy */
+        if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)))
+                return 0; /* no match */
+        switch (sinfo->what) {
+        case IPT_CONNBYTES_PKTS:
+                switch (sinfo->direction) {
+                case IPT_CONNBYTES_DIR_ORIGINAL:
+                        what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
+                        break;
+                case IPT_CONNBYTES_DIR_REPLY:
+                        what = ct->counters[IP_CT_DIR_REPLY].packets;
+                        break;
+                case IPT_CONNBYTES_DIR_BOTH:
+                        what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
+                        what += ct->counters[IP_CT_DIR_REPLY].packets;
+                        break;
+                }
+                break;
+        case IPT_CONNBYTES_BYTES:
+                switch (sinfo->direction) {
+                case IPT_CONNBYTES_DIR_ORIGINAL:
+                        what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
+                        break;
+                case IPT_CONNBYTES_DIR_REPLY:
+                        what = ct->counters[IP_CT_DIR_REPLY].bytes;
+                        break;
+                case IPT_CONNBYTES_DIR_BOTH:
+                        what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
+                        what += ct->counters[IP_CT_DIR_REPLY].bytes;
+                        break;
+                }
+                break;
+        case IPT_CONNBYTES_AVGPKT:
+                switch (sinfo->direction) {
+                case IPT_CONNBYTES_DIR_ORIGINAL:
+                        what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes,
+                                        ct->counters[IP_CT_DIR_ORIGINAL].packets);
+                        break;
+                case IPT_CONNBYTES_DIR_REPLY:
+                        what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes,
+                                        ct->counters[IP_CT_DIR_REPLY].packets);
+                        break;
+                case IPT_CONNBYTES_DIR_BOTH:
+                        {
+                                u_int64_t bytes;
+                                u_int64_t pkts;
+                                bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes +
+                                        ct->counters[IP_CT_DIR_REPLY].bytes;
+                                pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+
+                                        ct->counters[IP_CT_DIR_REPLY].packets;
+                                /* FIXME_THEORETICAL: what to do if sum
+                                 * overflows ? */
+                                what = div64_64(bytes, pkts);
+                        }
+                        break;
+                }
+                break;
+        }
+        if (sinfo->count.to)
+                return (what <= sinfo->count.to && what >= sinfo->count.from);
+        else
+                return (what >= sinfo->count.from);
+}
+static int check(const char *tablename,
+                 const struct ipt_ip *ip,
+                 void *matchinfo,
+                 unsigned int matchsize,
+                 unsigned int hook_mask)
+{
+        const struct ipt_connbytes_info *sinfo = matchinfo;
+        if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info)))
+                return 0;
+        if (sinfo->what != IPT_CONNBYTES_PKTS &&
+            sinfo->what != IPT_CONNBYTES_BYTES &&
+            sinfo->what != IPT_CONNBYTES_AVGPKT)
+                return 0;
+        if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL &&
+            sinfo->direction != IPT_CONNBYTES_DIR_REPLY &&
+            sinfo->direction != IPT_CONNBYTES_DIR_BOTH)
+                return 0;
+        return 1;
+}
+static struct ipt_match state_match = {
+        .name           = "connbytes",
+        .match          = &match,
+        .checkentry     = &check,
+        .me             = THIS_MODULE
+};
+static int __init init(void)
+{
+        return ipt_register_match(&state_match);
+}
+static void __exit fini(void)
+{
+        ipt_unregister_match(&state_match);
+}
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
index 2706f96cea55..bf8de47ce004 100644
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ b/net/ipv4/netfilter/ipt_connmark.c
@@ -54,9 +54,16 @@ checkentry(const char *tablename,
           unsigned int matchsize,
           unsigned int hook_mask)
 {
+        struct ipt_connmark_info *cm = 
+                                (struct ipt_connmark_info *)matchinfo;
        if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
                return 0;
+        if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
+                printk(KERN_WARNING "connmark: only support 32bit mark\n");
+                return 0;
+        }
        return 1;
 }
diff --git a/net/ipv4/netfilter/ipt_dccp.c b/net/ipv4/netfilter/ipt_dccp.c
new file mode 100644
index 000000000000..ad3278bba6c1
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_dccp.c
@@ -0,0 +1,176 @@
+/*
+ * iptables module for DCCP protocol header matching
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <net/ip.h>
+#include <linux/dccp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_dccp.h>
+#define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
+                                  || (!!((invflag) & (option)) ^ (cond)))
+static unsigned char *dccp_optbuf;
+static DEFINE_SPINLOCK(dccp_buflock);
+static inline int
+dccp_find_option(u_int8_t option,
+                 const struct sk_buff *skb,
+                 const struct dccp_hdr *dh,
+                 int *hotdrop)
+{
+        /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+        unsigned char *op;
+        unsigned int optoff = __dccp_hdr_len(dh);
+        unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh);
+        unsigned int i;
+        if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) {
+                *hotdrop = 1;
+                return 0;
+        }
+        if (!optlen)
+                return 0;
+        spin_lock_bh(&dccp_buflock);
+        op = skb_header_pointer(skb,
+                                skb->nh.iph->ihl*4 + optoff,
+                                optlen, dccp_optbuf);
+        if (op == NULL) {
+                /* If we don't have the whole header, drop packet. */
+                spin_unlock_bh(&dccp_buflock);
+                *hotdrop = 1;
+                return 0;
+        }
+        for (i = 0; i < optlen; ) {
+                if (op[i] == option) {
+                        spin_unlock_bh(&dccp_buflock);
+                        return 1;
+                }
+                if (op[i] < 2) 
+                        i++;
+                else 
+                        i += op[i+1]?:1;
+        }
+        spin_unlock_bh(&dccp_buflock);
+        return 0;
+}
+static inline int
+match_types(const struct dccp_hdr *dh, u_int16_t typemask)
+{
+        return (typemask & (1 << dh->dccph_type));
+}
+static inline int
+match_option(u_int8_t option, const struct sk_buff *skb,
+             const struct dccp_hdr *dh, int *hotdrop)
+{
+        return dccp_find_option(option, skb, dh, hotdrop);
+}
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const void *matchinfo,
+      int offset,
+      int *hotdrop)
+{
+        const struct ipt_dccp_info *info = 
+                                (const struct ipt_dccp_info *)matchinfo;
+        struct dccp_hdr _dh, *dh;
+        if (offset)
+                return 0;
+        
+        dh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_dh), &_dh);
+        if (dh == NULL) {
+                *hotdrop = 1;
+                return 0;
+        }
+        return  DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) 
+                        && (ntohs(dh->dccph_sport) <= info->spts[1])), 
+                        IPT_DCCP_SRC_PORTS, info->flags, info->invflags)
+                && DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0]) 
+                        && (ntohs(dh->dccph_dport) <= info->dpts[1])), 
+                        IPT_DCCP_DEST_PORTS, info->flags, info->invflags)
+                && DCCHECK(match_types(dh, info->typemask),
+                           IPT_DCCP_TYPE, info->flags, info->invflags)
+                && DCCHECK(match_option(info->option, skb, dh, hotdrop),
+                           IPT_DCCP_OPTION, info->flags, info->invflags);
+}
+static int
+checkentry(const char *tablename,
+           const struct ipt_ip *ip,
+           void *matchinfo,
+           unsigned int matchsize,
+           unsigned int hook_mask)
+{
+        const struct ipt_dccp_info *info;
+        info = (const struct ipt_dccp_info *)matchinfo;
+        return ip->proto == IPPROTO_DCCP
+                && !(ip->invflags & IPT_INV_PROTO)
+                && matchsize == IPT_ALIGN(sizeof(struct ipt_dccp_info))
+                && !(info->flags & ~IPT_DCCP_VALID_FLAGS)
+                && !(info->invflags & ~IPT_DCCP_VALID_FLAGS)
+                && !(info->invflags & ~info->flags);
+}
+static struct ipt_match dccp_match = 
+{ 
+        .name           = "dccp",
+        .match          = &match,
+        .checkentry     = &checkentry,
+        .me             = THIS_MODULE,
+};
+static int __init init(void)
+{
+        int ret;
+        /* doff is 8 bits, so the maximum option size is (4*256).  Don't put
+         * this in BSS since DaveM is worried about locked TLB's for kernel
+         * BSS. */
+        dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
+        if (!dccp_optbuf)
+                return -ENOMEM;
+        ret = ipt_register_match(&dccp_match);
+        if (ret)
+                kfree(dccp_optbuf);
+        return ret;
+}
+static void __exit fini(void)
+{
+        ipt_unregister_match(&dccp_match);
+        kfree(dccp_optbuf);
+}
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Match for DCCP protocol packets");
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 564b49bfebcf..2dd1cccbdab9 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -94,7 +94,7 @@ struct ipt_hashlimit_htable {
 static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */
 static DECLARE_MUTEX(hlimit_mutex);     /* additional checkentry protection */
 static HLIST_HEAD(hashlimit_htables);
-static kmem_cache_t *hashlimit_cachep;
+static kmem_cache_t *hashlimit_cachep __read_mostly;
 static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
 {
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c
index 8955728127b9..00bef6cdd3f8 100644
--- a/net/ipv4/netfilter/ipt_mark.c
+++ b/net/ipv4/netfilter/ipt_mark.c
@@ -37,9 +37,16 @@ checkentry(const char *tablename,
           unsigned int matchsize,
           unsigned int hook_mask)
 {
+        struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo;
        if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info)))
                return 0;
+        if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
+                printk(KERN_WARNING "mark: only supports 32bit mark\n");
+                return 0;
+        }
        return 1;
 }
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 3b9065e06381..c1889f88262b 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -21,106 +21,6 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables owner match");
 static int
-match_comm(const struct sk_buff *skb, const char *comm)
-{
-        struct task_struct *g, *p;
-        struct files_struct *files;
-        int i;
-        read_lock(&tasklist_lock);
-        do_each_thread(g, p) {
-                if(strncmp(p->comm, comm, sizeof(p->comm)))
-                        continue;
-                task_lock(p);
-                files = p->files;
-                if(files) {
-                        spin_lock(&files->file_lock);
-                        for (i=0; i < files->max_fds; i++) {
-                                if (fcheck_files(files, i) ==
-                                    skb->sk->sk_socket->file) {
-                                        spin_unlock(&files->file_lock);
-                                        task_unlock(p);
-                                        read_unlock(&tasklist_lock);
-                                        return 1;
-                                }
-                        }
-                        spin_unlock(&files->file_lock);
-                }
-                task_unlock(p);
-        } while_each_thread(g, p);
-        read_unlock(&tasklist_lock);
-        return 0;
-}
-static int
-match_pid(const struct sk_buff *skb, pid_t pid)
-{
-        struct task_struct *p;
-        struct files_struct *files;
-        int i;
-        read_lock(&tasklist_lock);
-        p = find_task_by_pid(pid);
-        if (!p)
-                goto out;
-        task_lock(p);
-        files = p->files;
-        if(files) {
-                spin_lock(&files->file_lock);
-                for (i=0; i < files->max_fds; i++) {
-                        if (fcheck_files(files, i) ==
-                            skb->sk->sk_socket->file) {
-                                spin_unlock(&files->file_lock);
-                                task_unlock(p);
-                                read_unlock(&tasklist_lock);
-                                return 1;
-                        }
-                }
-                spin_unlock(&files->file_lock);
-        }
-        task_unlock(p);
-out:
-        read_unlock(&tasklist_lock);
-        return 0;
-}
-static int
-match_sid(const struct sk_buff *skb, pid_t sid)
-{
-        struct task_struct *g, *p;
-        struct file *file = skb->sk->sk_socket->file;
-        int i, found=0;
-        read_lock(&tasklist_lock);
-        do_each_thread(g, p) {
-                struct files_struct *files;
-                if (p->signal->session != sid)
-                        continue;
-                task_lock(p);
-                files = p->files;
-                if (files) {
-                        spin_lock(&files->file_lock);
-                        for (i=0; i < files->max_fds; i++) {
-                                if (fcheck_files(files, i) == file) {
-                                        found = 1;
-                                        break;
-                                }
-                        }
-                        spin_unlock(&files->file_lock);
-                }
-                task_unlock(p);
-                if (found)
-                        goto out;
-        } while_each_thread(g, p);
-out:
-        read_unlock(&tasklist_lock);
-        return found;
-}
-static int
 match(const struct sk_buff *skb,
      const struct net_device *in,
      const struct net_device *out,
@@ -145,24 +45,6 @@ match(const struct sk_buff *skb,
                        return 0;
        }
-        if(info->match & IPT_OWNER_PID) {
-                if (!match_pid(skb, info->pid) ^
-                    !!(info->invert & IPT_OWNER_PID))
-                        return 0;
-        }
-        if(info->match & IPT_OWNER_SID) {
-                if (!match_sid(skb, info->sid) ^
-                    !!(info->invert & IPT_OWNER_SID))
-                        return 0;
-        }
-        if(info->match & IPT_OWNER_COMM) {
-                if (!match_comm(skb, info->comm) ^
-                    !!(info->invert & IPT_OWNER_COMM))
-                        return 0;
-        }
        return 1;
 }
@@ -173,6 +55,8 @@ checkentry(const char *tablename,
           unsigned int matchsize,
           unsigned int hook_mask)
 {
+        const struct ipt_owner_info *info = matchinfo;
        if (hook_mask
            & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) {
                printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
@@ -184,15 +68,13 @@ checkentry(const char *tablename,
                       IPT_ALIGN(sizeof(struct ipt_owner_info)));
                return 0;
        }
-#ifdef CONFIG_SMP
-        /* files->file_lock can not be used in a BH */
+        if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
-        if (((struct ipt_owner_info *)matchinfo)->match
+                printk("ipt_owner: pid, sid and command matching "
-            & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
+                       "not supported anymore\n");
-                printk("ipt_owner: pid, sid and command matching is broken "
-                       "on SMP.\n");
                return 0;
        }
-#endif
        return 1;
 }
diff --git a/net/ipv4/netfilter/ipt_string.c b/net/ipv4/netfilter/ipt_string.c
new file mode 100644
index 000000000000..b5def204d798
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_string.c
@@ -0,0 +1,91 @@
+/* String matching match for iptables
+ * 
+ * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_string.h>
+#include <linux/textsearch.h>
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
+MODULE_DESCRIPTION("IP tables string match module");
+MODULE_LICENSE("GPL");
+static int match(const struct sk_buff *skb,
+                 const struct net_device *in,
+                 const struct net_device *out,
+                 const void *matchinfo,
+                 int offset,
+                 int *hotdrop)
+{
+        struct ts_state state;
+        struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo;
+        memset(&state, 0, sizeof(struct ts_state));
+        return (skb_find_text((struct sk_buff *)skb, conf->from_offset, 
+                             conf->to_offset, conf->config, &state) 
+                             != UINT_MAX) && !conf->invert;
+}
+#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m)
+static int checkentry(const char *tablename,
+                      const struct ipt_ip *ip,
+                      void *matchinfo,
+                      unsigned int matchsize,
+                      unsigned int hook_mask)
+{
+        struct ipt_string_info *conf = matchinfo;
+        struct ts_config *ts_conf;
+        if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info)))
+                return 0;
+        /* Damn, can't handle this case properly with iptables... */
+        if (conf->from_offset > conf->to_offset)
+                return 0;
+        ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
+                                     GFP_KERNEL, TS_AUTOLOAD);
+        if (IS_ERR(ts_conf))
+                return 0;
+        conf->config = ts_conf;
+        return 1;
+}
+static void destroy(void *matchinfo, unsigned int matchsize)
+{
+        textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
+}
+static struct ipt_match string_match = {
+        .name           = "string",
+        .match          = match,
+        .checkentry     = checkentry,
+        .destroy        = destroy,
+        .me             = THIS_MODULE
+};
+static int __init init(void)
+{
+        return ipt_register_match(&string_match);
+}
+static void __exit fini(void)
+{
+        ipt_unregister_match(&string_match);
+}
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 912bbcc7f415..f7943ba1f43c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,10 @@ static int fold_prot_inuse(struct proto *proto)
 */
 static int sockstat_seq_show(struct seq_file *seq, void *v)
 {
-        /* From net/socket.c */
-        extern void socket_seq_show(struct seq_file *seq);
        socket_seq_show(seq);
        seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
                   fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count),
-                   tcp_tw_count, atomic_read(&tcp_sockets_allocated),
+                   tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
                   atomic_read(&tcp_memory_allocated));
        seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
        seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 0db405a869f2..291831e792af 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -40,7 +40,6 @@
 #include <linux/timer.h>
 #include <net/ip.h>
 #include <net/protocol.h>
-#include <net/tcp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
 #include <net/icmp.h>
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index d1835b1bc8c4..304bb0a1d4f0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -59,7 +59,6 @@
 #include <linux/netdevice.h>
 #include <linux/in_route.h>
 #include <linux/route.h>
-#include <linux/tcp.h>
 #include <linux/skbuff.h>
 #include <net/dst.h>
 #include <net/sock.h>
@@ -71,6 +70,7 @@
 #include <net/udp.h>
 #include <net/raw.h>
 #include <net/snmp.h>
+#include <net/tcp_states.h>
 #include <net/inet_common.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
@@ -150,10 +150,11 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
 * RFC 1122: SHOULD pass TOS value up to the transport layer.
 * -> It does. And not only TOS, but all IP header.
 */
-void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
+int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
 {
        struct sock *sk;
        struct hlist_head *head;
+        int delivered = 0;
        read_lock(&raw_v4_lock);
        head = &raw_v4_htable[hash];
@@ -164,6 +165,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
                             skb->dev->ifindex);
        while (sk) {
+                delivered = 1;
                if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
                        struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
@@ -177,6 +179,7 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
        }
 out:
        read_unlock(&raw_v4_lock);
+        return delivered;
 }
 void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 726ea5e8180a..8c0b14e3beec 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -240,7 +240,9 @@ static unsigned			rt_hash_mask;
 static int                      rt_hash_log;
 static unsigned int             rt_hash_rnd;
-struct rt_cache_stat *rt_cache_stat;
+static struct rt_cache_stat *rt_cache_stat;
+#define RT_CACHE_STAT_INC(field)                                          \
+                (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
 static int rt_intern_hash(unsigned hash, struct rtable *rth,
                                struct rtable **res);
@@ -1685,7 +1687,7 @@ static void ip_handle_martian_source(struct net_device *dev,
                printk(KERN_WARNING "martian source %u.%u.%u.%u from "
                        "%u.%u.%u.%u, on dev %s\n",
                        NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
-                if (dev->hard_header_len) {
+                if (dev->hard_header_len && skb->mac.raw) {
                        int i;
                        unsigned char *p = skb->mac.raw;
                        printk(KERN_WARNING "ll header: ");
@@ -2600,6 +2602,8 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
        return ip_route_output_slow(rp, flp);
 }
+EXPORT_SYMBOL_GPL(__ip_route_output_key);
 int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
 {
        int err;
@@ -2618,6 +2622,8 @@ int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk,
        return 0;
 }
+EXPORT_SYMBOL_GPL(ip_route_output_flow);
 int ip_route_output_key(struct rtable **rp, struct flowi *flp)
 {
        return ip_route_output_flow(rp, flp, NULL, 0);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 72d014442185..a34e60ea48a1 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -169,8 +169,6 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
        return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
 }
-extern struct request_sock_ops tcp_request_sock_ops;
 static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
                                           struct request_sock *req,
                                           struct dst_entry *dst)
@@ -180,7 +178,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
        child = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
        if (child)
-                tcp_acceptq_queue(sk, req, child);
+                inet_csk_reqsk_queue_add(sk, req, child);
        else
                reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e32894532416..652685623519 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -11,7 +11,9 @@
 #include <linux/module.h>
 #include <linux/sysctl.h>
 #include <linux/config.h>
+#include <linux/igmp.h>
 #include <net/snmp.h>
+#include <net/icmp.h>
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/tcp.h>
@@ -19,36 +21,6 @@
 /* From af_inet.c */
 extern int sysctl_ip_nonlocal_bind;
-/* From icmp.c */
-extern int sysctl_icmp_echo_ignore_all;
-extern int sysctl_icmp_echo_ignore_broadcasts;
-extern int sysctl_icmp_ignore_bogus_error_responses;
-extern int sysctl_icmp_errors_use_inbound_ifaddr;
-/* From ip_fragment.c */
-extern int sysctl_ipfrag_low_thresh;
-extern int sysctl_ipfrag_high_thresh; 
-extern int sysctl_ipfrag_time;
-extern int sysctl_ipfrag_secret_interval;
-/* From ip_output.c */
-extern int sysctl_ip_dynaddr;
-/* From icmp.c */
-extern int sysctl_icmp_ratelimit;
-extern int sysctl_icmp_ratemask;
-/* From igmp.c */
-extern int sysctl_igmp_max_memberships;
-extern int sysctl_igmp_max_msf;
-/* From inetpeer.c */
-extern int inet_peer_threshold;
-extern int inet_peer_minttl;
-extern int inet_peer_maxttl;
-extern int inet_peer_gc_mintime;
-extern int inet_peer_gc_maxtime;
 #ifdef CONFIG_SYSCTL
 static int tcp_retr1_max = 255; 
 static int ip_local_port_range_min[] = { 1, 1 };
@@ -57,8 +29,6 @@ static int ip_local_port_range_max[] = { 65535, 65535 };
 struct ipv4_config ipv4_config;
-extern ctl_table ipv4_route_table[];
 #ifdef CONFIG_SYSCTL
 static
@@ -136,10 +106,11 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
        return ret;
 }
-int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int nlen,
+static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
-                                  void __user *oldval, size_t __user *oldlenp,
+                                         int nlen, void __user *oldval,
-                                  void __user *newval, size_t newlen,
+                                         size_t __user *oldlenp,
-                                  void **context)
+                                         void __user *newval, size_t newlen,
+                                         void **context)
 {
        char val[TCP_CA_NAME_MAX];
        ctl_table tbl = {
@@ -259,7 +230,7 @@ ctl_table ipv4_table[] = {
        {
                .ctl_name       = NET_TCP_MAX_TW_BUCKETS,
                .procname       = "tcp_max_tw_buckets",
-                .data           = &sysctl_tcp_max_tw_buckets,
+                .data           = &tcp_death_row.sysctl_max_tw_buckets,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec
@@ -363,7 +334,7 @@ ctl_table ipv4_table[] = {
        {
                .ctl_name       = NET_TCP_TW_RECYCLE,
                .procname       = "tcp_tw_recycle",
-                .data           = &sysctl_tcp_tw_recycle,
+                .data           = &tcp_death_row.sysctl_tw_recycle,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ddb6ce4ecff2..f3f0013a9580 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -269,13 +269,12 @@
 int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
-DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
+DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
-kmem_cache_t *tcp_bucket_cachep;
-kmem_cache_t *tcp_timewait_cachep;
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL_GPL(tcp_orphan_count);
 int sysctl_tcp_mem[3];
 int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
 int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
@@ -311,15 +310,6 @@ void tcp_enter_memory_pressure(void)
 EXPORT_SYMBOL(tcp_enter_memory_pressure);
 /*
- * LISTEN is a special case for poll..
- */
-static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
-                                               poll_table *wait)
-{
-        return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
-}
-/*
 *      Wait for a TCP event.
 *
 *      Note that we don't need to lock the socket, as the upper poll layers
@@ -334,7 +324,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
        poll_wait(file, sk->sk_sleep, wait);
        if (sk->sk_state == TCP_LISTEN)
-                return tcp_listen_poll(sk, wait);
+                return inet_csk_listen_poll(sk);
        /* Socket is not locked. We are protected from async events
           by poll logic and correct handling of state changes
@@ -457,109 +447,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
        return put_user(answ, (int __user *)arg);
 }
-int tcp_listen_start(struct sock *sk)
-{
-        struct inet_sock *inet = inet_sk(sk);
-        struct tcp_sock *tp = tcp_sk(sk);
-        int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
-        if (rc != 0)
-                return rc;
-        sk->sk_max_ack_backlog = 0;
-        sk->sk_ack_backlog = 0;
-        tcp_delack_init(tp);
-        /* There is race window here: we announce ourselves listening,
-         * but this transition is still not validated by get_port().
-         * It is OK, because this socket enters to hash table only
-         * after validation is complete.
-         */
-        sk->sk_state = TCP_LISTEN;
-        if (!sk->sk_prot->get_port(sk, inet->num)) {
-                inet->sport = htons(inet->num);
-                sk_dst_reset(sk);
-                sk->sk_prot->hash(sk);
-                return 0;
-        }
-        sk->sk_state = TCP_CLOSE;
-        reqsk_queue_destroy(&tp->accept_queue);
-        return -EADDRINUSE;
-}
-/*
- *      This routine closes sockets which have been at least partially
- *      opened, but not yet accepted.
- */
-static void tcp_listen_stop (struct sock *sk)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct listen_sock *lopt;
-        struct request_sock *acc_req;
-        struct request_sock *req;
-        int i;
-        tcp_delete_keepalive_timer(sk);
-        /* make all the listen_opt local to us */
-        lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
-        acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
-        if (lopt->qlen) {
-                for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
-                        while ((req = lopt->syn_table[i]) != NULL) {
-                                lopt->syn_table[i] = req->dl_next;
-                                lopt->qlen--;
-                                reqsk_free(req);
-                /* Following specs, it would be better either to send FIN
-                 * (and enter FIN-WAIT-1, it is normal close)
-                 * or to send active reset (abort).
-                 * Certainly, it is pretty dangerous while synflood, but it is
-                 * bad justification for our negligence 8)
-                 * To be honest, we are not able to make either
-                 * of the variants now.                 --ANK
-                 */
-                        }
-                }
-        }
-        BUG_TRAP(!lopt->qlen);
-        kfree(lopt);
-        while ((req = acc_req) != NULL) {
-                struct sock *child = req->sk;
-                acc_req = req->dl_next;
-                local_bh_disable();
-                bh_lock_sock(child);
-                BUG_TRAP(!sock_owned_by_user(child));
-                sock_hold(child);
-                tcp_disconnect(child, O_NONBLOCK);
-                sock_orphan(child);
-                atomic_inc(&tcp_orphan_count);
-                tcp_destroy_sock(child);
-                bh_unlock_sock(child);
-                local_bh_enable();
-                sock_put(child);
-                sk_acceptq_removed(sk);
-                __reqsk_free(req);
-        }
-        BUG_TRAP(!sk->sk_ack_backlog);
-}
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
        TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
@@ -584,7 +471,7 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
        sk_charge_skb(sk, skb);
        if (!sk->sk_send_head)
                sk->sk_send_head = skb;
-        else if (tp->nonagle&TCP_NAGLE_PUSH)
+        if (tp->nonagle & TCP_NAGLE_PUSH)
                tp->nonagle &= ~TCP_NAGLE_PUSH; 
 }
@@ -665,8 +552,7 @@ new_segment:
                        tcp_mark_push(tp, skb);
                        goto new_segment;
                }
-                if (sk->sk_forward_alloc < copy &&
+                if (!sk_stream_wmem_schedule(sk, copy))
-                    !sk_stream_mem_schedule(sk, copy, 0))
                        goto wait_for_memory;
                
                if (can_coalesce) {
@@ -883,19 +769,23 @@ new_segment:
                                        if (off == PAGE_SIZE) {
                                                put_page(page);
                                                TCP_PAGE(sk) = page = NULL;
+                                                off = 0;
                                        }
-                                }
+                                } else
+                                        off = 0;
+                                if (copy > PAGE_SIZE - off)
+                                        copy = PAGE_SIZE - off;
+                                if (!sk_stream_wmem_schedule(sk, copy))
+                                        goto wait_for_memory;
                                if (!page) {
                                        /* Allocate new cache page. */
                                        if (!(page = sk_stream_alloc_page(sk)))
                                                goto wait_for_memory;
-                                        off = 0;
                                }
-                                if (copy > PAGE_SIZE - off)
-                                        copy = PAGE_SIZE - off;
                                /* Time to copy data. We are close to
                                 * the end! */
                                err = skb_copy_to_page(sk, from, skb, page,
@@ -975,7 +865,7 @@ do_fault:
        if (!skb->len) {
                if (sk->sk_send_head == skb)
                        sk->sk_send_head = NULL;
-                __skb_unlink(skb, skb->list);
+                __skb_unlink(skb, &sk->sk_write_queue);
                sk_stream_free_skb(sk, skb);
        }
@@ -1057,20 +947,21 @@ static void cleanup_rbuf(struct sock *sk, int copied)
        BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
 #endif
-        if (tcp_ack_scheduled(tp)) {
+        if (inet_csk_ack_scheduled(sk)) {
+                const struct inet_connection_sock *icsk = inet_csk(sk);
                   /* Delayed ACKs frequently hit locked sockets during bulk
                    * receive. */
-                if (tp->ack.blocked ||
+                if (icsk->icsk_ack.blocked ||
                    /* Once-per-two-segments ACK was not sent by tcp_input.c */
-                    tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss ||
+                    tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
                    /*
                     * If this read emptied read buffer, we send ACK, if
                     * connection is not bidirectional, user drained
                     * receive buffer and there was a small segment
                     * in queue.
                     */
-                    (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) &&
+                    (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
-                     !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
+                     !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
                        time_to_ack = 1;
        }
@@ -1572,40 +1463,6 @@ void tcp_shutdown(struct sock *sk, int how)
        }
 }
-/*
- * At this point, there should be no process reference to this
- * socket, and thus no user references at all.  Therefore we
- * can assume the socket waitqueue is inactive and nobody will
- * try to jump onto it.
- */
-void tcp_destroy_sock(struct sock *sk)
-{
-        BUG_TRAP(sk->sk_state == TCP_CLOSE);
-        BUG_TRAP(sock_flag(sk, SOCK_DEAD));
-        /* It cannot be in hash table! */
-        BUG_TRAP(sk_unhashed(sk));
-        /* If it has not 0 inet_sk(sk)->num, it must be bound */
-        BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash);
-        sk->sk_prot->destroy(sk);
-        sk_stream_kill_queues(sk);
-        xfrm_sk_free_policy(sk);
-#ifdef INET_REFCNT_DEBUG
-        if (atomic_read(&sk->sk_refcnt) != 1) {
-                printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n",
-                       sk, atomic_read(&sk->sk_refcnt));
-        }
-#endif
-        atomic_dec(&tcp_orphan_count);
-        sock_put(sk);
-}
 void tcp_close(struct sock *sk, long timeout)
 {
        struct sk_buff *skb;
@@ -1618,7 +1475,7 @@ void tcp_close(struct sock *sk, long timeout)
                tcp_set_state(sk, TCP_CLOSE);
                /* Special case. */
-                tcp_listen_stop(sk);
+                inet_csk_listen_stop(sk);
                goto adjudge_to_death;
        }
@@ -1721,12 +1578,12 @@ adjudge_to_death:
                        tcp_send_active_reset(sk, GFP_ATOMIC);
                        NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
                } else {
-                        int tmo = tcp_fin_time(tp);
+                        const int tmo = tcp_fin_time(sk);
                        if (tmo > TCP_TIMEWAIT_LEN) {
-                                tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));
+                                inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
                        } else {
-                                atomic_inc(&tcp_orphan_count);
+                                atomic_inc(sk->sk_prot->orphan_count);
                                tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
                                goto out;
                        }
@@ -1734,7 +1591,7 @@ adjudge_to_death:
        }
        if (sk->sk_state != TCP_CLOSE) {
                sk_stream_mem_reclaim(sk);
-                if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
+                if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
                    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
                     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
                        if (net_ratelimit())
@@ -1745,10 +1602,10 @@ adjudge_to_death:
                        NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
                }
        }
-        atomic_inc(&tcp_orphan_count);
+        atomic_inc(sk->sk_prot->orphan_count);
        if (sk->sk_state == TCP_CLOSE)
-                tcp_destroy_sock(sk);
+                inet_csk_destroy_sock(sk);
        /* Otherwise, socket is reprieved until protocol close. */
 out:
@@ -1769,6 +1626,7 @@ static inline int tcp_need_reset(int state)
 int tcp_disconnect(struct sock *sk, int flags)
 {
        struct inet_sock *inet = inet_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        int err = 0;
        int old_state = sk->sk_state;
@@ -1778,7 +1636,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        /* ABORT function of RFC793 */
        if (old_state == TCP_LISTEN) {
-                tcp_listen_stop(sk);
+                inet_csk_listen_stop(sk);
        } else if (tcp_need_reset(old_state) ||
                   (tp->snd_nxt != tp->write_seq &&
                    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -1805,125 +1663,34 @@ int tcp_disconnect(struct sock *sk, int flags)
        tp->srtt = 0;
        if ((tp->write_seq += tp->max_window + 2) == 0)
                tp->write_seq = 1;
-        tp->backoff = 0;
+        icsk->icsk_backoff = 0;
        tp->snd_cwnd = 2;
-        tp->probes_out = 0;
+        icsk->icsk_probes_out = 0;
        tp->packets_out = 0;
        tp->snd_ssthresh = 0x7fffffff;
        tp->snd_cwnd_cnt = 0;
-        tcp_set_ca_state(tp, TCP_CA_Open);
+        tcp_set_ca_state(sk, TCP_CA_Open);
        tcp_clear_retrans(tp);
-        tcp_delack_init(tp);
+        inet_csk_delack_init(sk);
        sk->sk_send_head = NULL;
        tp->rx_opt.saw_tstamp = 0;
        tcp_sack_reset(&tp->rx_opt);
        __sk_dst_reset(sk);
-        BUG_TRAP(!inet->num || tp->bind_hash);
+        BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
        sk->sk_error_report(sk);
        return err;
 }
 /*
- *      Wait for an incoming connection, avoid race
- *      conditions. This must be called with the socket locked.
- */
-static int wait_for_connect(struct sock *sk, long timeo)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        DEFINE_WAIT(wait);
-        int err;
-        /*
-         * True wake-one mechanism for incoming connections: only
-         * one process gets woken up, not the 'whole herd'.
-         * Since we do not 'race & poll' for established sockets
-         * anymore, the common case will execute the loop only once.
-         *
-         * Subtle issue: "add_wait_queue_exclusive()" will be added
-         * after any current non-exclusive waiters, and we know that
-         * it will always _stay_ after any new non-exclusive waiters
-         * because all non-exclusive waiters are added at the
-         * beginning of the wait-queue. As such, it's ok to "drop"
-         * our exclusiveness temporarily when we get woken up without
-         * having to remove and re-insert us on the wait queue.
-         */
-        for (;;) {
-                prepare_to_wait_exclusive(sk->sk_sleep, &wait,
-                                          TASK_INTERRUPTIBLE);
-                release_sock(sk);
-                if (reqsk_queue_empty(&tp->accept_queue))
-                        timeo = schedule_timeout(timeo);
-                lock_sock(sk);
-                err = 0;
-                if (!reqsk_queue_empty(&tp->accept_queue))
-                        break;
-                err = -EINVAL;
-                if (sk->sk_state != TCP_LISTEN)
-                        break;
-                err = sock_intr_errno(timeo);
-                if (signal_pending(current))
-                        break;
-                err = -EAGAIN;
-                if (!timeo)
-                        break;
-        }
-        finish_wait(sk->sk_sleep, &wait);
-        return err;
-}
-/*
- *      This will accept the next outstanding connection.
- */
-struct sock *tcp_accept(struct sock *sk, int flags, int *err)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct sock *newsk;
-        int error;
-        lock_sock(sk);
-        /* We need to make sure that this socket is listening,
-         * and that it has something pending.
-         */
-        error = -EINVAL;
-        if (sk->sk_state != TCP_LISTEN)
-                goto out_err;
-        /* Find already established connection */
-        if (reqsk_queue_empty(&tp->accept_queue)) {
-                long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
-                /* If this is a non blocking socket don't sleep */
-                error = -EAGAIN;
-                if (!timeo)
-                        goto out_err;
-                error = wait_for_connect(sk, timeo);
-                if (error)
-                        goto out_err;
-        }
-        newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
-        BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
-out:
-        release_sock(sk);
-        return newsk;
-out_err:
-        newsk = NULL;
-        *err = error;
-        goto out;
-}
-/*
 *      Socket option code for TCP.
 */
 int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                   int optlen)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
        int val;
        int err = 0;
@@ -1945,7 +1712,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                name[val] = 0;
                lock_sock(sk);
-                err = tcp_set_congestion_control(tp, name);
+                err = tcp_set_congestion_control(sk, name);
                release_sock(sk);
                return err;
        }
@@ -2022,7 +1789,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                                        elapsed = tp->keepalive_time - elapsed;
                                else
                                        elapsed = 0;
-                                tcp_reset_keepalive_timer(sk, elapsed);
+                                inet_csk_reset_keepalive_timer(sk, elapsed);
                        }
                }
                break;
@@ -2042,7 +1809,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                if (val < 1 || val > MAX_TCP_SYNCNT)
                        err = -EINVAL;
                else
-                        tp->syn_retries = val;
+                        icsk->icsk_syn_retries = val;
                break;
        case TCP_LINGER2:
@@ -2055,15 +1822,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                break;
        case TCP_DEFER_ACCEPT:
-                tp->defer_accept = 0;
+                icsk->icsk_accept_queue.rskq_defer_accept = 0;
                if (val > 0) {
                        /* Translate value in seconds to number of
                         * retransmits */
-                        while (tp->defer_accept < 32 &&
+                        while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
                               val > ((TCP_TIMEOUT_INIT / HZ) <<
-                                       tp->defer_accept))
+                                       icsk->icsk_accept_queue.rskq_defer_accept))
-                                tp->defer_accept++;
+                                icsk->icsk_accept_queue.rskq_defer_accept++;
-                        tp->defer_accept++;
+                        icsk->icsk_accept_queue.rskq_defer_accept++;
                }
                break;
@@ -2081,16 +1848,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
        case TCP_QUICKACK:
                if (!val) {
-                        tp->ack.pingpong = 1;
+                        icsk->icsk_ack.pingpong = 1;
                } else {
-                        tp->ack.pingpong = 0;
+                        icsk->icsk_ack.pingpong = 0;
                        if ((1 << sk->sk_state) &
                            (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
-                            tcp_ack_scheduled(tp)) {
+                            inet_csk_ack_scheduled(sk)) {
-                                tp->ack.pending |= TCP_ACK_PUSHED;
+                                icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
                                cleanup_rbuf(sk, 1);
                                if (!(val & 1))
-                                        tp->ack.pingpong = 1;
+                                        icsk->icsk_ack.pingpong = 1;
                        }
                }
                break;
@@ -2107,15 +1874,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp;
        memset(info, 0, sizeof(*info));
        info->tcpi_state = sk->sk_state;
-        info->tcpi_ca_state = tp->ca_state;
+        info->tcpi_ca_state = icsk->icsk_ca_state;
-        info->tcpi_retransmits = tp->retransmits;
+        info->tcpi_retransmits = icsk->icsk_retransmits;
-        info->tcpi_probes = tp->probes_out;
+        info->tcpi_probes = icsk->icsk_probes_out;
-        info->tcpi_backoff = tp->backoff;
+        info->tcpi_backoff = icsk->icsk_backoff;
        if (tp->rx_opt.tstamp_ok)
                info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
@@ -2130,10 +1898,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        if (tp->ecn_flags&TCP_ECN_OK)
                info->tcpi_options |= TCPI_OPT_ECN;
-        info->tcpi_rto = jiffies_to_usecs(tp->rto);
+        info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
-        info->tcpi_ato = jiffies_to_usecs(tp->ack.ato);
+        info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
        info->tcpi_snd_mss = tp->mss_cache;
-        info->tcpi_rcv_mss = tp->ack.rcv_mss;
+        info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
        info->tcpi_unacked = tp->packets_out;
        info->tcpi_sacked = tp->sacked_out;
@@ -2142,7 +1910,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_fackets = tp->fackets_out;
        info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
-        info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime);
+        info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
        info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
        info->tcpi_pmtu = tp->pmtu_cookie;
@@ -2165,6 +1933,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info);
 int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                   int __user *optlen)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        int val, len;
@@ -2202,7 +1971,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
                break;
        case TCP_SYNCNT:
-                val = tp->syn_retries ? : sysctl_tcp_syn_retries;
+                val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
                break;
        case TCP_LINGER2:
                val = tp->linger2;
@@ -2210,8 +1979,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                        val = (val ? : sysctl_tcp_fin_timeout) / HZ;
                break;
        case TCP_DEFER_ACCEPT:
-                val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) <<
+                val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
-                                               (tp->defer_accept - 1));
+                        ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
                break;
        case TCP_WINDOW_CLAMP:
                val = tp->window_clamp;
@@ -2232,7 +2001,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                return 0;
        }
        case TCP_QUICKACK:
-                val = !tp->ack.pingpong;
+                val = !icsk->icsk_ack.pingpong;
                break;
        case TCP_CONGESTION:
@@ -2241,7 +2010,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
                len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
                if (put_user(len, optlen))
                        return -EFAULT;
-                if (copy_to_user(optval, tp->ca_ops->name, len))
+                if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
                        return -EFAULT;
                return 0;
        default:
@@ -2278,79 +2047,72 @@ void __init tcp_init(void)
                __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
                                           sizeof(skb->cb));
-        tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
+        tcp_hashinfo.bind_bucket_cachep =
-                                              sizeof(struct tcp_bind_bucket),
+                kmem_cache_create("tcp_bind_bucket",
-                                              0, SLAB_HWCACHE_ALIGN,
+                                  sizeof(struct inet_bind_bucket), 0,
-                                              NULL, NULL);
+                                  SLAB_HWCACHE_ALIGN, NULL, NULL);
-        if (!tcp_bucket_cachep)
+        if (!tcp_hashinfo.bind_bucket_cachep)
                panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
-        tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
-                                                sizeof(struct tcp_tw_bucket),
-                                                0, SLAB_HWCACHE_ALIGN,
-                                                NULL, NULL);
-        if (!tcp_timewait_cachep)
-                panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
        /* Size and allocate the main established and bind bucket
         * hash tables.
         *
         * The methodology is similar to that of the buffer cache.
         */
-        tcp_ehash = (struct tcp_ehash_bucket *)
+        tcp_hashinfo.ehash =
                alloc_large_system_hash("TCP established",
-                                        sizeof(struct tcp_ehash_bucket),
+                                        sizeof(struct inet_ehash_bucket),
                                        thash_entries,
                                        (num_physpages >= 128 * 1024) ?
                                                (25 - PAGE_SHIFT) :
                                                (27 - PAGE_SHIFT),
                                        HASH_HIGHMEM,
-                                        &tcp_ehash_size,
+                                        &tcp_hashinfo.ehash_size,
                                        NULL,
                                        0);
-        tcp_ehash_size = (1 << tcp_ehash_size) >> 1;
+        tcp_hashinfo.ehash_size = (1 << tcp_hashinfo.ehash_size) >> 1;
-        for (i = 0; i < (tcp_ehash_size << 1); i++) {
+        for (i = 0; i < (tcp_hashinfo.ehash_size << 1); i++) {
-                rwlock_init(&tcp_ehash[i].lock);
+                rwlock_init(&tcp_hashinfo.ehash[i].lock);
-                INIT_HLIST_HEAD(&tcp_ehash[i].chain);
+                INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
        }
-        tcp_bhash = (struct tcp_bind_hashbucket *)
+        tcp_hashinfo.bhash =
                alloc_large_system_hash("TCP bind",
-                                        sizeof(struct tcp_bind_hashbucket),
+                                        sizeof(struct inet_bind_hashbucket),
-                                        tcp_ehash_size,
+                                        tcp_hashinfo.ehash_size,
                                        (num_physpages >= 128 * 1024) ?
                                                (25 - PAGE_SHIFT) :
                                                (27 - PAGE_SHIFT),
                                        HASH_HIGHMEM,
-                                        &tcp_bhash_size,
+                                        &tcp_hashinfo.bhash_size,
                                        NULL,
                                        64 * 1024);
-        tcp_bhash_size = 1 << tcp_bhash_size;
+        tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
-        for (i = 0; i < tcp_bhash_size; i++) {
+        for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
-                spin_lock_init(&tcp_bhash[i].lock);
+                spin_lock_init(&tcp_hashinfo.bhash[i].lock);
-                INIT_HLIST_HEAD(&tcp_bhash[i].chain);
+                INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
        }
        /* Try to be a bit smarter and adjust defaults depending
         * on available memory.
         */
        for (order = 0; ((1 << order) << PAGE_SHIFT) <
-                        (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket));
+                        (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));
                        order++)
                ;
        if (order >= 4) {
                sysctl_local_port_range[0] = 32768;
                sysctl_local_port_range[1] = 61000;
-                sysctl_tcp_max_tw_buckets = 180000;
+                tcp_death_row.sysctl_max_tw_buckets = 180000;
                sysctl_tcp_max_orphans = 4096 << (order - 4);
                sysctl_max_syn_backlog = 1024;
        } else if (order < 3) {
                sysctl_local_port_range[0] = 1024 * (3 - order);
-                sysctl_tcp_max_tw_buckets >>= (3 - order);
+                tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
                sysctl_tcp_max_orphans >>= (3 - order);
                sysctl_max_syn_backlog = 128;
        }
-        tcp_port_rover = sysctl_local_port_range[0] - 1;
+        tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1;
        sysctl_tcp_mem[0] =  768 << order;
        sysctl_tcp_mem[1] = 1024 << order;
@@ -2365,14 +2127,12 @@ void __init tcp_init(void)
        printk(KERN_INFO "TCP: Hash tables configured "
               "(established %d bind %d)\n",
-               tcp_ehash_size << 1, tcp_bhash_size);
+               tcp_hashinfo.ehash_size << 1, tcp_hashinfo.bhash_size);
        tcp_register_congestion_control(&tcp_reno);
 }
-EXPORT_SYMBOL(tcp_accept);
 EXPORT_SYMBOL(tcp_close);
-EXPORT_SYMBOL(tcp_destroy_sock);
 EXPORT_SYMBOL(tcp_disconnect);
 EXPORT_SYMBOL(tcp_getsockopt);
 EXPORT_SYMBOL(tcp_ioctl);
@@ -2384,4 +2144,3 @@ EXPORT_SYMBOL(tcp_sendpage);
 EXPORT_SYMBOL(tcp_setsockopt);
 EXPORT_SYMBOL(tcp_shutdown);
 EXPORT_SYMBOL(tcp_statistics);
-EXPORT_SYMBOL(tcp_timewait_cachep);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index ec38d45d6649..b940346de4e7 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca)
        ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
 }
-static void bictcp_init(struct tcp_sock *tp)
+static void bictcp_init(struct sock *sk)
 {
-        bictcp_reset(tcp_ca(tp));
+        bictcp_reset(inet_csk_ca(sk));
        if (initial_ssthresh)
-                tp->snd_ssthresh = initial_ssthresh;
+                tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 /*
@@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 /* Detect low utilization in congestion avoidance */
-static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag)
+static inline void bictcp_low_utilization(struct sock *sk, int flag)
 {
-        struct bictcp *ca = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct bictcp *ca = inet_csk_ca(sk);
        u32 dist, delay;
        /* No time stamp */
@@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag)
 }
-static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack,
+static void bictcp_cong_avoid(struct sock *sk, u32 ack,
                              u32 seq_rtt, u32 in_flight, int data_acked)
 {
-        struct bictcp *ca = tcp_ca(tp);
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct bictcp *ca = inet_csk_ca(sk);
-        bictcp_low_utilization(tp, data_acked);
+        bictcp_low_utilization(sk, data_acked);
        if (in_flight < tp->snd_cwnd)
                return;
@@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack,
 *      behave like Reno until low_window is reached,
 *      then increase congestion window slowly
 */
-static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp)
+static u32 bictcp_recalc_ssthresh(struct sock *sk)
 {
-        struct bictcp *ca = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct bictcp *ca = inet_csk_ca(sk);
        ca->epoch_start = 0;    /* end of epoch */
@@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp)
                return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 }
-static u32 bictcp_undo_cwnd(struct tcp_sock *tp)
+static u32 bictcp_undo_cwnd(struct sock *sk)
 {
-        struct bictcp *ca = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        const struct bictcp *ca = inet_csk_ca(sk);
        return max(tp->snd_cwnd, ca->last_max_cwnd);
 }
-static u32 bictcp_min_cwnd(struct tcp_sock *tp)
+static u32 bictcp_min_cwnd(struct sock *sk)
 {
+        const struct tcp_sock *tp = tcp_sk(sk);
        return tp->snd_ssthresh;
 }
-static void bictcp_state(struct tcp_sock *tp, u8 new_state)
+static void bictcp_state(struct sock *sk, u8 new_state)
 {
        if (new_state == TCP_CA_Loss)
-                bictcp_reset(tcp_ca(tp));
+                bictcp_reset(inet_csk_ca(sk));
 }
 /* Track delayed acknowledgement ratio using sliding window
 * ratio = (15*ratio + sample) / 16
 */
-static void bictcp_acked(struct tcp_sock *tp, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt)
 {
-        if (cnt > 0 &&  tp->ca_state == TCP_CA_Open) {
+        const struct inet_connection_sock *icsk = inet_csk(sk);
-                struct bictcp *ca = tcp_ca(tp);
+        if (cnt > 0 &&  icsk->icsk_ca_state == TCP_CA_Open) {
+                struct bictcp *ca = inet_csk_ca(sk);
                cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
                ca->delayed_ack += cnt;
        }
@@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = {
 static int __init bictcp_register(void)
 {
-        BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE);
+        BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
        return tcp_register_congestion_control(&bictcp);
 }
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4970d10a7785..bbf2d6624e89 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
 /* Assign choice of congestion control. */
-void tcp_init_congestion_control(struct tcp_sock *tp)
+void tcp_init_congestion_control(struct sock *sk)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_congestion_ops *ca;
-        if (tp->ca_ops != &tcp_init_congestion_ops)
+        if (icsk->icsk_ca_ops != &tcp_init_congestion_ops)
                return;
        rcu_read_lock();
        list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
                if (try_module_get(ca->owner)) {
-                        tp->ca_ops = ca;
+                        icsk->icsk_ca_ops = ca;
                        break;
                }
        }
        rcu_read_unlock();
-        if (tp->ca_ops->init)
+        if (icsk->icsk_ca_ops->init)
-                tp->ca_ops->init(tp);
+                icsk->icsk_ca_ops->init(sk);
 }
 /* Manage refcounts on socket close. */
-void tcp_cleanup_congestion_control(struct tcp_sock *tp)
+void tcp_cleanup_congestion_control(struct sock *sk)
 {
-        if (tp->ca_ops->release)
+        struct inet_connection_sock *icsk = inet_csk(sk);
-                tp->ca_ops->release(tp);
-        module_put(tp->ca_ops->owner);
+        if (icsk->icsk_ca_ops->release)
+                icsk->icsk_ca_ops->release(sk);
+        module_put(icsk->icsk_ca_ops->owner);
 }
 /* Used by sysctl to change default congestion control */
@@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name)
 }
 /* Change congestion control for socket */
-int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
+int tcp_set_congestion_control(struct sock *sk, const char *name)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_congestion_ops *ca;
        int err = 0;
        rcu_read_lock();
        ca = tcp_ca_find(name);
-        if (ca == tp->ca_ops)
+        if (ca == icsk->icsk_ca_ops)
                goto out;
        if (!ca)
@@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
                err = -EBUSY;
        else {
-                tcp_cleanup_congestion_control(tp);
+                tcp_cleanup_congestion_control(sk);
-                tp->ca_ops = ca;
+                icsk->icsk_ca_ops = ca;
-                if (tp->ca_ops->init)
+                if (icsk->icsk_ca_ops->init)
-                        tp->ca_ops->init(tp);
+                        icsk->icsk_ca_ops->init(sk);
        }
 out:
        rcu_read_unlock();
@@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name)
 /* This is Jacobson's slow start and congestion avoidance.
 * SIGCOMM '88, p. 328.
 */
-void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight,
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
                         int flag)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        if (in_flight < tp->snd_cwnd)
                return;
@@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight,
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 /* Slow start threshold is half the congestion window (min 2) */
-u32 tcp_reno_ssthresh(struct tcp_sock *tp)
+u32 tcp_reno_ssthresh(struct sock *sk)
 {
+        const struct tcp_sock *tp = tcp_sk(sk);
        return max(tp->snd_cwnd >> 1U, 2U);
 }
 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
 /* Lower bound on congestion window. */
-u32 tcp_reno_min_cwnd(struct tcp_sock *tp)
+u32 tcp_reno_min_cwnd(struct sock *sk)
 {
+        const struct tcp_sock *tp = tcp_sk(sk);
        return tp->snd_ssthresh/2;
 }
 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index f66945cb158f..c148c1081880 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -1,5 +1,5 @@
 /*
- * tcp_diag.c   Module for monitoring TCP sockets.
+ * tcp_diag.c   Module for monitoring TCP transport protocols sockets.
 *
 * Version:     $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
 *
@@ -12,779 +12,43 @@
 */
 #include <linux/config.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/fcntl.h>
-#include <linux/random.h>
-#include <linux/cache.h>
-#include <linux/init.h>
-#include <linux/time.h>
-#include <net/icmp.h>
-#include <net/tcp.h>
-#include <net/ipv6.h>
-#include <net/inet_common.h>
-#include <linux/inet.h>
-#include <linux/stddef.h>
-#include <linux/tcp_diag.h>
-struct tcpdiag_entry
+#include <linux/module.h>
-{
+#include <linux/inet_diag.h>
-        u32 *saddr;
-        u32 *daddr;
-        u16 sport;
-        u16 dport;
-        u16 family;
-        u16 userlocks;
-};
-static struct sock *tcpnl;
+#include <linux/tcp.h>
-#define TCPDIAG_PUT(skb, attrtype, attrlen) \
+#include <net/tcp.h>
-        RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
-static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
+static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
-                        int ext, u32 pid, u32 seq, u16 nlmsg_flags)
+                              void *_info)
 {
-        struct inet_sock *inet = inet_sk(sk);
+        const struct tcp_sock *tp = tcp_sk(sk);
-        struct tcp_sock *tp = tcp_sk(sk);
+        struct tcp_info *info = _info;
-        struct tcpdiagmsg *r;
-        struct nlmsghdr  *nlh;
-        struct tcp_info  *info = NULL;
-        struct tcpdiag_meminfo  *minfo = NULL;
-        unsigned char    *b = skb->tail;
-        nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
-        nlh->nlmsg_flags = nlmsg_flags;
-        r = NLMSG_DATA(nlh);
-        if (sk->sk_state != TCP_TIME_WAIT) {
-                if (ext & (1<<(TCPDIAG_MEMINFO-1)))
-                        minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo));
-                if (ext & (1<<(TCPDIAG_INFO-1)))
-                        info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info));
-                
-                if (ext & (1<<(TCPDIAG_CONG-1))) {
-                        size_t len = strlen(tp->ca_ops->name);
-                        strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1),
-                               tp->ca_ops->name);
-                }
-        }
-        r->tcpdiag_family = sk->sk_family;
-        r->tcpdiag_state = sk->sk_state;
-        r->tcpdiag_timer = 0;
-        r->tcpdiag_retrans = 0;
-        r->id.tcpdiag_if = sk->sk_bound_dev_if;
-        r->id.tcpdiag_cookie[0] = (u32)(unsigned long)sk;
-        r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
-        if (r->tcpdiag_state == TCP_TIME_WAIT) {
-                struct tcp_tw_bucket *tw = (struct tcp_tw_bucket*)sk;
-                long tmo = tw->tw_ttd - jiffies;
-                if (tmo < 0)
-                        tmo = 0;
-                r->id.tcpdiag_sport = tw->tw_sport;
-                r->id.tcpdiag_dport = tw->tw_dport;
-                r->id.tcpdiag_src[0] = tw->tw_rcv_saddr;
-                r->id.tcpdiag_dst[0] = tw->tw_daddr;
-                r->tcpdiag_state = tw->tw_substate;
-                r->tcpdiag_timer = 3;
-                r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ;
-                r->tcpdiag_rqueue = 0;
-                r->tcpdiag_wqueue = 0;
-                r->tcpdiag_uid = 0;
-                r->tcpdiag_inode = 0;
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-                if (r->tcpdiag_family == AF_INET6) {
-                        ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
-                                       &tw->tw_v6_rcv_saddr);
-                        ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
-                                       &tw->tw_v6_daddr);
-                }
-#endif
-                nlh->nlmsg_len = skb->tail - b;
-                return skb->len;
-        }
-        r->id.tcpdiag_sport = inet->sport;
-        r->id.tcpdiag_dport = inet->dport;
-        r->id.tcpdiag_src[0] = inet->rcv_saddr;
-        r->id.tcpdiag_dst[0] = inet->daddr;
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-        if (r->tcpdiag_family == AF_INET6) {
-                struct ipv6_pinfo *np = inet6_sk(sk);
-                ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
-                               &np->rcv_saddr);
-                ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
-                               &np->daddr);
-        }
-#endif
-#define EXPIRES_IN_MS(tmo)  ((tmo-jiffies)*1000+HZ-1)/HZ
-        if (tp->pending == TCP_TIME_RETRANS) {
-                r->tcpdiag_timer = 1;
-                r->tcpdiag_retrans = tp->retransmits;
-                r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
-        } else if (tp->pending == TCP_TIME_PROBE0) {
-                r->tcpdiag_timer = 4;
-                r->tcpdiag_retrans = tp->probes_out;
-                r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
-        } else if (timer_pending(&sk->sk_timer)) {
-                r->tcpdiag_timer = 2;
-                r->tcpdiag_retrans = tp->probes_out;
-                r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
-        } else {
-                r->tcpdiag_timer = 0;
-                r->tcpdiag_expires = 0;
-        }
-#undef EXPIRES_IN_MS
-        r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq;
+        r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq;
-        r->tcpdiag_wqueue = tp->write_seq - tp->snd_una;
+        r->idiag_wqueue = tp->write_seq - tp->snd_una;
-        r->tcpdiag_uid = sock_i_uid(sk);
+        if (info != NULL)
-        r->tcpdiag_inode = sock_i_ino(sk);
-        if (minfo) {
-                minfo->tcpdiag_rmem = atomic_read(&sk->sk_rmem_alloc);
-                minfo->tcpdiag_wmem = sk->sk_wmem_queued;
-                minfo->tcpdiag_fmem = sk->sk_forward_alloc;
-                minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc);
-        }
-        if (info) 
                tcp_get_info(sk, info);
-        if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info)
-                tp->ca_ops->get_info(tp, ext, skb);
-        nlh->nlmsg_len = skb->tail - b;
-        return skb->len;
-rtattr_failure:
-nlmsg_failure:
-        skb_trim(skb, b - skb->data);
-        return -1;
-}
-extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport,
-                                  int dif);
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
-                                  struct in6_addr *daddr, u16 dport,
-                                  int dif);
-#else
-static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
-                                         struct in6_addr *daddr, u16 dport,
-                                         int dif)
-{
-        return NULL;
-}
-#endif
-static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
-{
-        int err;
-        struct sock *sk;
-        struct tcpdiagreq *req = NLMSG_DATA(nlh);
-        struct sk_buff *rep;
-        if (req->tcpdiag_family == AF_INET) {
-                sk = tcp_v4_lookup(req->id.tcpdiag_dst[0], req->id.tcpdiag_dport,
-                                   req->id.tcpdiag_src[0], req->id.tcpdiag_sport,
-                                   req->id.tcpdiag_if);
-        }
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-        else if (req->tcpdiag_family == AF_INET6) {
-                sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport,
-                                   (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport,
-                                   req->id.tcpdiag_if);
-        }
-#endif
-        else {
-                return -EINVAL;
-        }
-        if (sk == NULL)
-                return -ENOENT;
-        err = -ESTALE;
-        if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE ||
-             req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) &&
-            ((u32)(unsigned long)sk != req->id.tcpdiag_cookie[0] ||
-             (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.tcpdiag_cookie[1]))
-                goto out;
-        err = -ENOMEM;
-        rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+
-                                    sizeof(struct tcpdiag_meminfo)+
-                                    sizeof(struct tcp_info)+64), GFP_KERNEL);
-        if (!rep)
-                goto out;
-        if (tcpdiag_fill(rep, sk, req->tcpdiag_ext,
-                         NETLINK_CB(in_skb).pid,
-                         nlh->nlmsg_seq, 0) <= 0)
-                BUG();
-        err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-        if (err > 0)
-                err = 0;
-out:
-        if (sk) {
-                if (sk->sk_state == TCP_TIME_WAIT)
-                        tcp_tw_put((struct tcp_tw_bucket*)sk);
-                else
-                        sock_put(sk);
-        }
-        return err;
-}
-static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
-{
-        int words = bits >> 5;
-        bits &= 0x1f;
-        if (words) {
-                if (memcmp(a1, a2, words << 2))
-                        return 0;
-        }
-        if (bits) {
-                __u32 w1, w2;
-                __u32 mask;
-                w1 = a1[words];
-                w2 = a2[words];
-                mask = htonl((0xffffffff) << (32 - bits));
-                if ((w1 ^ w2) & mask)
-                        return 0;
-        }
-        return 1;
-}
-static int tcpdiag_bc_run(const void *bc, int len,
-                          const struct tcpdiag_entry *entry)
-{
-        while (len > 0) {
-                int yes = 1;
-                const struct tcpdiag_bc_op *op = bc;
-                switch (op->code) {
-                case TCPDIAG_BC_NOP:
-                        break;
-                case TCPDIAG_BC_JMP:
-                        yes = 0;
-                        break;
-                case TCPDIAG_BC_S_GE:
-                        yes = entry->sport >= op[1].no;
-                        break;
-                case TCPDIAG_BC_S_LE:
-                        yes = entry->dport <= op[1].no;
-                        break;
-                case TCPDIAG_BC_D_GE:
-                        yes = entry->dport >= op[1].no;
-                        break;
-                case TCPDIAG_BC_D_LE:
-                        yes = entry->dport <= op[1].no;
-                        break;
-                case TCPDIAG_BC_AUTO:
-                        yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
-                        break;
-                case TCPDIAG_BC_S_COND:
-                case TCPDIAG_BC_D_COND:
-                {
-                        struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1);
-                        u32 *addr;
-                        if (cond->port != -1 &&
-                            cond->port != (op->code == TCPDIAG_BC_S_COND ?
-                                             entry->sport : entry->dport)) {
-                                yes = 0;
-                                break;
-                        }
-                        
-                        if (cond->prefix_len == 0)
-                                break;
-                        if (op->code == TCPDIAG_BC_S_COND)
-                                addr = entry->saddr;
-                        else
-                                addr = entry->daddr;
-                        if (bitstring_match(addr, cond->addr, cond->prefix_len))
-                                break;
-                        if (entry->family == AF_INET6 &&
-                            cond->family == AF_INET) {
-                                if (addr[0] == 0 && addr[1] == 0 &&
-                                    addr[2] == htonl(0xffff) &&
-                                    bitstring_match(addr+3, cond->addr, cond->prefix_len))
-                                        break;
-                        }
-                        yes = 0;
-                        break;
-                }
-                }
-                if (yes) { 
-                        len -= op->yes;
-                        bc += op->yes;
-                } else {
-                        len -= op->no;
-                        bc += op->no;
-                }
-        }
-        return (len == 0);
-}
-static int valid_cc(const void *bc, int len, int cc)
-{
-        while (len >= 0) {
-                const struct tcpdiag_bc_op *op = bc;
-                if (cc > len)
-                        return 0;
-                if (cc == len)
-                        return 1;
-                if (op->yes < 4)
-                        return 0;
-                len -= op->yes;
-                bc  += op->yes;
-        }
-        return 0;
-}
-static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len)
-{
-        const unsigned char *bc = bytecode;
-        int  len = bytecode_len;
-        while (len > 0) {
-                struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
-//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
-                switch (op->code) {
-                case TCPDIAG_BC_AUTO:
-                case TCPDIAG_BC_S_COND:
-                case TCPDIAG_BC_D_COND:
-                case TCPDIAG_BC_S_GE:
-                case TCPDIAG_BC_S_LE:
-                case TCPDIAG_BC_D_GE:
-                case TCPDIAG_BC_D_LE:
-                        if (op->yes < 4 || op->yes > len+4)
-                                return -EINVAL;
-                case TCPDIAG_BC_JMP:
-                        if (op->no < 4 || op->no > len+4)
-                                return -EINVAL;
-                        if (op->no < len &&
-                            !valid_cc(bytecode, bytecode_len, len-op->no))
-                                return -EINVAL;
-                        break;
-                case TCPDIAG_BC_NOP:
-                        if (op->yes < 4 || op->yes > len+4)
-                                return -EINVAL;
-                        break;
-                default:
-                        return -EINVAL;
-                }
-                bc += op->yes;
-                len -= op->yes;
-        }
-        return len == 0 ? 0 : -EINVAL;
-}
-static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
-                             struct netlink_callback *cb)
-{
-        struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
-        if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
-                struct tcpdiag_entry entry;
-                struct rtattr *bc = (struct rtattr *)(r + 1);
-                struct inet_sock *inet = inet_sk(sk);
-                entry.family = sk->sk_family;
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-                if (entry.family == AF_INET6) {
-                        struct ipv6_pinfo *np = inet6_sk(sk);
-                        entry.saddr = np->rcv_saddr.s6_addr32;
-                        entry.daddr = np->daddr.s6_addr32;
-                } else
-#endif
-                {
-                        entry.saddr = &inet->rcv_saddr;
-                        entry.daddr = &inet->daddr;
-                }
-                entry.sport = inet->num;
-                entry.dport = ntohs(inet->dport);
-                entry.userlocks = sk->sk_userlocks;
-                if (!tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
-                        return 0;
-        }
-        return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid,
-                            cb->nlh->nlmsg_seq, NLM_F_MULTI);
 }
-static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
+static struct inet_diag_handler tcp_diag_handler = {
-                            struct request_sock *req,
+        .idiag_hashinfo  = &tcp_hashinfo,
-                            u32 pid, u32 seq)
+        .idiag_get_info  = tcp_diag_get_info,
-{
+        .idiag_type      = TCPDIAG_GETSOCK,
-        const struct inet_request_sock *ireq = inet_rsk(req);
+        .idiag_info_size = sizeof(struct tcp_info),
-        struct inet_sock *inet = inet_sk(sk);
+};
-        unsigned char *b = skb->tail;
-        struct tcpdiagmsg *r;
-        struct nlmsghdr *nlh;
-        long tmo;
-        nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
-        nlh->nlmsg_flags = NLM_F_MULTI;
-        r = NLMSG_DATA(nlh);
-        r->tcpdiag_family = sk->sk_family;
-        r->tcpdiag_state = TCP_SYN_RECV;
-        r->tcpdiag_timer = 1;
-        r->tcpdiag_retrans = req->retrans;
-        r->id.tcpdiag_if = sk->sk_bound_dev_if;
-        r->id.tcpdiag_cookie[0] = (u32)(unsigned long)req;
-        r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
-        tmo = req->expires - jiffies;
-        if (tmo < 0)
-                tmo = 0;
-        r->id.tcpdiag_sport = inet->sport;
-        r->id.tcpdiag_dport = ireq->rmt_port;
-        r->id.tcpdiag_src[0] = ireq->loc_addr;
-        r->id.tcpdiag_dst[0] = ireq->rmt_addr;
-        r->tcpdiag_expires = jiffies_to_msecs(tmo),
-        r->tcpdiag_rqueue = 0;
-        r->tcpdiag_wqueue = 0;
-        r->tcpdiag_uid = sock_i_uid(sk);
-        r->tcpdiag_inode = 0;
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-        if (r->tcpdiag_family == AF_INET6) {
-                ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
-                               &tcp6_rsk(req)->loc_addr);
-                ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
-                               &tcp6_rsk(req)->rmt_addr);
-        }
-#endif
-        nlh->nlmsg_len = skb->tail - b;
-        return skb->len;
-nlmsg_failure:
-        skb_trim(skb, b - skb->data);
-        return -1;
-}
-static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
-                             struct netlink_callback *cb)
-{
-        struct tcpdiag_entry entry;
-        struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct listen_sock *lopt;
-        struct rtattr *bc = NULL;
-        struct inet_sock *inet = inet_sk(sk);
-        int j, s_j;
-        int reqnum, s_reqnum;
-        int err = 0;
-        s_j = cb->args[3];
-        s_reqnum = cb->args[4];
-        if (s_j > 0)
-                s_j--;
-        entry.family = sk->sk_family;
-        read_lock_bh(&tp->accept_queue.syn_wait_lock);
-        lopt = tp->accept_queue.listen_opt;
-        if (!lopt || !lopt->qlen)
-                goto out;
-        if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
-                bc = (struct rtattr *)(r + 1);
-                entry.sport = inet->num;
-                entry.userlocks = sk->sk_userlocks;
-        }
-        for (j = s_j; j < TCP_SYNQ_HSIZE; j++) {
-                struct request_sock *req, *head = lopt->syn_table[j];
-                reqnum = 0;
-                for (req = head; req; reqnum++, req = req->dl_next) {
-                        struct inet_request_sock *ireq = inet_rsk(req);
-                        if (reqnum < s_reqnum)
-                                continue;
-                        if (r->id.tcpdiag_dport != ireq->rmt_port &&
-                            r->id.tcpdiag_dport)
-                                continue;
-                        if (bc) {
-                                entry.saddr =
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-                                        (entry.family == AF_INET6) ?
-                                        tcp6_rsk(req)->loc_addr.s6_addr32 :
-#endif
-                                        &ireq->loc_addr;
-                                entry.daddr = 
-#ifdef CONFIG_IP_TCPDIAG_IPV6
-                                        (entry.family == AF_INET6) ?
-                                        tcp6_rsk(req)->rmt_addr.s6_addr32 :
-#endif
-                                        &ireq->rmt_addr;
-                                entry.dport = ntohs(ireq->rmt_port);
-                                if (!tcpdiag_bc_run(RTA_DATA(bc),
-                                                    RTA_PAYLOAD(bc), &entry))
-                                        continue;
-                        }
-                        err = tcpdiag_fill_req(skb, sk, req,
-                                               NETLINK_CB(cb->skb).pid,
-                                               cb->nlh->nlmsg_seq);
-                        if (err < 0) {
-                                cb->args[3] = j + 1;
-                                cb->args[4] = reqnum;
-                                goto out;
-                        }
-                }
-                s_reqnum = 0;
-        }
-out:
-        read_unlock_bh(&tp->accept_queue.syn_wait_lock);
-        return err;
-}
-static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
-        int i, num;
-        int s_i, s_num;
-        struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
-        s_i = cb->args[1];
-        s_num = num = cb->args[2];
-        if (cb->args[0] == 0) {
-                if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
-                        goto skip_listen_ht;
-                tcp_listen_lock();
-                for (i = s_i; i < TCP_LHTABLE_SIZE; i++) {
-                        struct sock *sk;
-                        struct hlist_node *node;
-                        num = 0;
-                        sk_for_each(sk, node, &tcp_listening_hash[i]) {
-                                struct inet_sock *inet = inet_sk(sk);
-                                if (num < s_num) {
-                                        num++;
-                                        continue;
-                                }
-                                if (r->id.tcpdiag_sport != inet->sport &&
-                                    r->id.tcpdiag_sport)
-                                        goto next_listen;
-                                if (!(r->tcpdiag_states&TCPF_LISTEN) ||
-                                    r->id.tcpdiag_dport ||
-                                    cb->args[3] > 0)
-                                        goto syn_recv;
-                                if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
-                                        tcp_listen_unlock();
-                                        goto done;
-                                }
-syn_recv:
-                                if (!(r->tcpdiag_states&TCPF_SYN_RECV))
-                                        goto next_listen;
-                                if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
-                                        tcp_listen_unlock();
-                                        goto done;
-                                }
-next_listen:
-                                cb->args[3] = 0;
-                                cb->args[4] = 0;
-                                ++num;
-                        }
-                        s_num = 0;
-                        cb->args[3] = 0;
-                        cb->args[4] = 0;
-                }
-                tcp_listen_unlock();
-skip_listen_ht:
-                cb->args[0] = 1;
-                s_i = num = s_num = 0;
-        }
-        if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV)))
-                return skb->len;
-        for (i = s_i; i < tcp_ehash_size; i++) {
-                struct tcp_ehash_bucket *head = &tcp_ehash[i];
-                struct sock *sk;
-                struct hlist_node *node;
-                if (i > s_i)
-                        s_num = 0;
-                read_lock_bh(&head->lock);
-                num = 0;
-                sk_for_each(sk, node, &head->chain) {
-                        struct inet_sock *inet = inet_sk(sk);
-                        if (num < s_num)
-                                goto next_normal;
-                        if (!(r->tcpdiag_states & (1 << sk->sk_state)))
-                                goto next_normal;
-                        if (r->id.tcpdiag_sport != inet->sport &&
-                            r->id.tcpdiag_sport)
-                                goto next_normal;
-                        if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport)
-                                goto next_normal;
-                        if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
-                                read_unlock_bh(&head->lock);
-                                goto done;
-                        }
-next_normal:
-                        ++num;
-                }
-                if (r->tcpdiag_states&TCPF_TIME_WAIT) {
-                        sk_for_each(sk, node,
-                                    &tcp_ehash[i + tcp_ehash_size].chain) {
-                                struct inet_sock *inet = inet_sk(sk);
-                                if (num < s_num)
-                                        goto next_dying;
-                                if (r->id.tcpdiag_sport != inet->sport &&
-                                    r->id.tcpdiag_sport)
-                                        goto next_dying;
-                                if (r->id.tcpdiag_dport != inet->dport &&
-                                    r->id.tcpdiag_dport)
-                                        goto next_dying;
-                                if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
-                                        read_unlock_bh(&head->lock);
-                                        goto done;
-                                }
-next_dying:
-                                ++num;
-                        }
-                }
-                read_unlock_bh(&head->lock);
-        }
-done:
-        cb->args[1] = i;
-        cb->args[2] = num;
-        return skb->len;
-}
-static int tcpdiag_dump_done(struct netlink_callback *cb)
-{
-        return 0;
-}
-static __inline__ int
-tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
-{
-        if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
-                return 0;
-        if (nlh->nlmsg_type != TCPDIAG_GETSOCK)
-                goto err_inval;
-        if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len)
-                goto err_inval;
-        if (nlh->nlmsg_flags&NLM_F_DUMP) {
-                if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) {
-                        struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq));
-                        if (rta->rta_type != TCPDIAG_REQ_BYTECODE ||
-                            rta->rta_len < 8 ||
-                            rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq)))
-                                goto err_inval;
-                        if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
-                                goto err_inval;
-                }
-                return netlink_dump_start(tcpnl, skb, nlh,
-                                          tcpdiag_dump,
-                                          tcpdiag_dump_done);
-        } else {
-                return tcpdiag_get_exact(skb, nlh);
-        }
-err_inval:
-        return -EINVAL;
-}
-static inline void tcpdiag_rcv_skb(struct sk_buff *skb)
-{
-        int err;
-        struct nlmsghdr * nlh;
-        if (skb->len >= NLMSG_SPACE(0)) {
-                nlh = (struct nlmsghdr *)skb->data;
-                if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
-                        return;
-                err = tcpdiag_rcv_msg(skb, nlh);
-                if (err || nlh->nlmsg_flags & NLM_F_ACK) 
-                        netlink_ack(skb, nlh, err);
-        }
-}
-static void tcpdiag_rcv(struct sock *sk, int len)
-{
-        struct sk_buff *skb;
-        unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
-        while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
-                tcpdiag_rcv_skb(skb);
-                kfree_skb(skb);
-        }
-}
-static int __init tcpdiag_init(void)
+static int __init tcp_diag_init(void)
 {
-        tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv);
+        return inet_diag_register(&tcp_diag_handler);
-        if (tcpnl == NULL)
-                return -ENOMEM;
-        return 0;
 }
-static void __exit tcpdiag_exit(void)
+static void __exit tcp_diag_exit(void)
 {
-        sock_release(tcpnl->sk_socket);
+        inet_diag_unregister(&tcp_diag_handler);
 }
-module_init(tcpdiag_init);
+module_init(tcp_diag_init);
-module_exit(tcpdiag_exit);
+module_exit(tcp_diag_exit);
 MODULE_LICENSE("GPL");
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 36c51f8136bf..6acc04bde080 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -98,9 +98,10 @@ struct hstcp {
        u32     ai;
 };
-static void hstcp_init(struct tcp_sock *tp)
+static void hstcp_init(struct sock *sk)
 {
-        struct hstcp *ca = tcp_ca(tp);
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct hstcp *ca = inet_csk_ca(sk);
        ca->ai = 0;
@@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp)
        tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
 }
-static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt,
+static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
                             u32 in_flight, int good)
 {
-        struct hstcp *ca = tcp_ca(tp);
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct hstcp *ca = inet_csk_ca(sk);
        if (in_flight < tp->snd_cwnd)
                return;
@@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt,
        }
 }
-static u32 hstcp_ssthresh(struct tcp_sock *tp)
+static u32 hstcp_ssthresh(struct sock *sk)
 {
-        struct hstcp *ca = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        const struct hstcp *ca = inet_csk_ca(sk);
        /* Do multiplicative decrease */
        return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
@@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = {
 static int __init hstcp_register(void)
 {
-        BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE);
+        BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
        return tcp_register_congestion_control(&tcp_highspeed);
 }
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 40168275acf9..e47b37984e95 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca)
        ca->snd_cwnd_cnt2 = 0;
 }
-static u32 htcp_cwnd_undo(struct tcp_sock *tp)
+static u32 htcp_cwnd_undo(struct sock *sk)
 {
-        struct htcp *ca = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct htcp *ca = inet_csk_ca(sk);
        ca->ccount = ca->undo_ccount;
        ca->maxRTT = ca->undo_maxRTT;
        ca->old_maxB = ca->undo_old_maxB;
        return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta);
 }
-static inline void measure_rtt(struct tcp_sock *tp)
+static inline void measure_rtt(struct sock *sk)
 {
-        struct htcp *ca = tcp_ca(tp);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct htcp *ca = inet_csk_ca(sk);
        u32 srtt = tp->srtt>>3;
        /* keep track of minimum RTT seen so far, minRTT is zero at first */
@@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp)
                ca->minRTT = srtt;
        /* max RTT */
-        if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) {
+        if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) {
                if (ca->maxRTT < ca->minRTT)
                        ca->maxRTT = ca->minRTT;
                if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50)
@@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp)
        }
 }
-static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
 {
-        struct htcp *ca = tcp_ca(tp);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct htcp *ca = inet_csk_ca(sk);
        u32 now = tcp_time_stamp;
        /* achieved throughput calculations */
-        if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) {
+        if (icsk->icsk_ca_state != TCP_CA_Open &&
+            icsk->icsk_ca_state != TCP_CA_Disorder) {
                ca->packetcount = 0;
                ca->lasttime = now;
                return;
@@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca)
 * that point do we really have a real sense of maxRTT (the queues en route
 * were getting just too full now).
 */
-static void htcp_param_update(struct tcp_sock *tp)
+static void htcp_param_update(struct sock *sk)
 {
-        struct htcp *ca = tcp_ca(tp);
+        struct htcp *ca = inet_csk_ca(sk);
        u32 minRTT = ca->minRTT;
        u32 maxRTT = ca->maxRTT;
@@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp)
                ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100;
 }
-static u32 htcp_recalc_ssthresh(struct tcp_sock *tp)
+static u32 htcp_recalc_ssthresh(struct sock *sk)
 {
-        struct htcp *ca = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
-        htcp_param_update(tp);
+        const struct htcp *ca = inet_csk_ca(sk);
+        htcp_param_update(sk);
        return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
 }
-static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                            u32 in_flight, int data_acked)
 {
-        struct htcp *ca = tcp_ca(tp);
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct htcp *ca = inet_csk_ca(sk);
        if (in_flight < tp->snd_cwnd)
                return;
@@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
                if (tp->snd_cwnd < tp->snd_cwnd_clamp)
                        tp->snd_cwnd++;
        } else {
-                measure_rtt(tp);
+                measure_rtt(sk);
                /* keep track of number of round-trip times since last backoff event */
                if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) {
@@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
 }
 /* Lower bound on congestion window. */
-static u32 htcp_min_cwnd(struct tcp_sock *tp)
+static u32 htcp_min_cwnd(struct sock *sk)
 {
+        const struct tcp_sock *tp = tcp_sk(sk);
        return tp->snd_ssthresh;
 }
-static void htcp_init(struct tcp_sock *tp)
+static void htcp_init(struct sock *sk)
 {
-        struct htcp *ca = tcp_ca(tp);
+        struct htcp *ca = inet_csk_ca(sk);
        memset(ca, 0, sizeof(struct htcp));
        ca->alpha = ALPHA_BASE;
        ca->beta = BETA_MIN;
 }
-static void htcp_state(struct tcp_sock *tp, u8 new_state)
+static void htcp_state(struct sock *sk, u8 new_state)
 {
        switch (new_state) {
        case TCP_CA_CWR:
        case TCP_CA_Recovery:
        case TCP_CA_Loss:
-                htcp_reset(tcp_ca(tp));
+                htcp_reset(inet_csk_ca(sk));
                break;
        }
 }
@@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = {
 static int __init htcp_register(void)
 {
-        BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE);
+        BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
        BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
        if (!use_bandwidth_switch)
                htcp.pkts_acked = NULL;
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 13a66342c304..77add63623df 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)");
 /* This is called to refresh values for hybla parameters */
-static inline void hybla_recalc_param (struct tcp_sock *tp)
+static inline void hybla_recalc_param (struct sock *sk)
 {
-        struct hybla *ca = tcp_ca(tp);
+        struct hybla *ca = inet_csk_ca(sk);
-        ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8);
+        ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8);
        ca->rho = ca->rho_3ls >> 3;
        ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
        ca->rho2 = ca->rho2_7ls >>7;
 }
-static void hybla_init(struct tcp_sock *tp)
+static void hybla_init(struct sock *sk)
 {
-        struct hybla *ca = tcp_ca(tp);
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct hybla *ca = inet_csk_ca(sk);
        ca->rho = 0;
        ca->rho2 = 0;
@@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp)
        tp->snd_cwnd_clamp = 65535;
        /* 1st Rho measurement based on initial srtt */
-        hybla_recalc_param(tp);
+        hybla_recalc_param(sk);
        /* set minimum rtt as this is the 1st ever seen */
        ca->minrtt = tp->srtt;
        tp->snd_cwnd = ca->rho;
 }
-static void hybla_state(struct tcp_sock *tp, u8 ca_state)
+static void hybla_state(struct sock *sk, u8 ca_state)
 {
-        struct hybla *ca = tcp_ca(tp);
+        struct hybla *ca = inet_csk_ca(sk);
        ca->hybla_en = (ca_state == TCP_CA_Open);
 }
@@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds)
 *     o Give cwnd a new value based on the model proposed
 *     o remember increments <1
 */
-static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                            u32 in_flight, int flag)
 {
-        struct hybla *ca = tcp_ca(tp);
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct hybla *ca = inet_csk_ca(sk);
        u32 increment, odd, rho_fractions;
        int is_slowstart = 0;
        /*  Recalculate rho only if this srtt is the lowest */
        if (tp->srtt < ca->minrtt){
-                hybla_recalc_param(tp);
+                hybla_recalc_param(sk);
                ca->minrtt = tp->srtt;
        }
        if (!ca->hybla_en)
-                return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag);
+                return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
        if (in_flight < tp->snd_cwnd)
                return;
        if (ca->rho == 0)
-                hybla_recalc_param(tp);
+                hybla_recalc_param(sk);
        rho_fractions = ca->rho_3ls - (ca->rho << 3);
@@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = {
 static int __init hybla_register(void)
 {
-        BUG_ON(sizeof(struct hybla) > TCP_CA_PRIV_SIZE);
+        BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
        return tcp_register_congestion_control(&tcp_hybla);
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 53a8a5399f1e..29222b964951 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -114,20 +114,21 @@ int sysctl_tcp_moderate_rcvbuf = 1;
 /* Adapt the MSS value used to make delayed ack decision to the 
 * real world.
 */ 
-static inline void tcp_measure_rcv_mss(struct tcp_sock *tp,
+static inline void tcp_measure_rcv_mss(struct sock *sk,
-                                       struct sk_buff *skb)
+                                       const struct sk_buff *skb)
 {
-        unsigned int len, lss;
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        const unsigned int lss = icsk->icsk_ack.last_seg_size; 
+        unsigned int len;
-        lss = tp->ack.last_seg_size; 
+        icsk->icsk_ack.last_seg_size = 0; 
-        tp->ack.last_seg_size = 0; 
        /* skb->len may jitter because of SACKs, even if peer
         * sends good full-sized frames.
         */
        len = skb->len;
-        if (len >= tp->ack.rcv_mss) {
+        if (len >= icsk->icsk_ack.rcv_mss) {
-                tp->ack.rcv_mss = len;
+                icsk->icsk_ack.rcv_mss = len;
        } else {
                /* Otherwise, we make more careful check taking into account,
                 * that SACKs block is variable.
@@ -147,41 +148,44 @@ static inline void tcp_measure_rcv_mss(struct tcp_sock *tp,
                         * tcp header plus fixed timestamp option length.
                         * Resulting "len" is MSS free of SACK jitter.
                         */
-                        len -= tp->tcp_header_len;
+                        len -= tcp_sk(sk)->tcp_header_len;
-                        tp->ack.last_seg_size = len;
+                        icsk->icsk_ack.last_seg_size = len;
                        if (len == lss) {
-                                tp->ack.rcv_mss = len;
+                                icsk->icsk_ack.rcv_mss = len;
                                return;
                        }
                }
-                tp->ack.pending |= TCP_ACK_PUSHED;
+                icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
        }
 }
-static void tcp_incr_quickack(struct tcp_sock *tp)
+static void tcp_incr_quickack(struct sock *sk)
 {
-        unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss);
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
        if (quickacks==0)
                quickacks=2;
-        if (quickacks > tp->ack.quick)
+        if (quickacks > icsk->icsk_ack.quick)
-                tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+                icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
 }
-void tcp_enter_quickack_mode(struct tcp_sock *tp)
+void tcp_enter_quickack_mode(struct sock *sk)
 {
-        tcp_incr_quickack(tp);
+        struct inet_connection_sock *icsk = inet_csk(sk);
-        tp->ack.pingpong = 0;
+        tcp_incr_quickack(sk);
-        tp->ack.ato = TCP_ATO_MIN;
+        icsk->icsk_ack.pingpong = 0;
+        icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
 /* Send ACKs quickly, if "quick" count is not exhausted
 * and the session is not interactive.
 */
-static __inline__ int tcp_in_quickack_mode(struct tcp_sock *tp)
+static inline int tcp_in_quickack_mode(const struct sock *sk)
 {
-        return (tp->ack.quick && !tp->ack.pingpong);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
 }
 /* Buffer size and advertised window tuning.
@@ -224,8 +228,8 @@ static void tcp_fixup_sndbuf(struct sock *sk)
 */
 /* Slow part of check#2. */
-static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
-                             struct sk_buff *skb)
+                             const struct sk_buff *skb)
 {
        /* Optimize this! */
        int truesize = tcp_win_from_space(skb->truesize)/2;
@@ -233,7 +237,7 @@ static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
        while (tp->rcv_ssthresh <= window) {
                if (truesize <= skb->len)
-                        return 2*tp->ack.rcv_mss;
+                        return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
                truesize >>= 1;
                window >>= 1;
@@ -260,7 +264,7 @@ static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
                if (incr) {
                        tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
-                        tp->ack.quick |= 1;
+                        inet_csk(sk)->icsk_ack.quick |= 1;
                }
        }
 }
@@ -321,11 +325,12 @@ static void tcp_init_buffer_space(struct sock *sk)
 /* 5. Recalculate window clamp after socket hit its memory bounds. */
 static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct sk_buff *skb;
        unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
        int ofo_win = 0;
-        tp->ack.quick = 0;
+        icsk->icsk_ack.quick = 0;
        skb_queue_walk(&tp->out_of_order_queue, skb) {
                ofo_win += skb->len;
@@ -346,8 +351,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
                app_win += ofo_win;
                if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
                        app_win >>= 1;
-                if (app_win > tp->ack.rcv_mss)
+                if (app_win > icsk->icsk_ack.rcv_mss)
-                        app_win -= tp->ack.rcv_mss;
+                        app_win -= icsk->icsk_ack.rcv_mss;
                app_win = max(app_win, 2U*tp->advmss);
                if (!ofo_win)
@@ -415,11 +420,12 @@ new_measure:
        tp->rcv_rtt_est.time = tcp_time_stamp;
 }
-static inline void tcp_rcv_rtt_measure_ts(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        if (tp->rx_opt.rcv_tsecr &&
            (TCP_SKB_CB(skb)->end_seq -
-             TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss))
+             TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
                tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
 }
@@ -492,41 +498,42 @@ new_measure:
 */
 static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now;
-        tcp_schedule_ack(tp);
+        inet_csk_schedule_ack(sk);
-        tcp_measure_rcv_mss(tp, skb);
+        tcp_measure_rcv_mss(sk, skb);
        tcp_rcv_rtt_measure(tp);
        
        now = tcp_time_stamp;
-        if (!tp->ack.ato) {
+        if (!icsk->icsk_ack.ato) {
                /* The _first_ data packet received, initialize
                 * delayed ACK engine.
                 */
-                tcp_incr_quickack(tp);
+                tcp_incr_quickack(sk);
-                tp->ack.ato = TCP_ATO_MIN;
+                icsk->icsk_ack.ato = TCP_ATO_MIN;
        } else {
-                int m = now - tp->ack.lrcvtime;
+                int m = now - icsk->icsk_ack.lrcvtime;
                if (m <= TCP_ATO_MIN/2) {
                        /* The fastest case is the first. */
-                        tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2;
+                        icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
-                } else if (m < tp->ack.ato) {
+                } else if (m < icsk->icsk_ack.ato) {
-                        tp->ack.ato = (tp->ack.ato>>1) + m;
+                        icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
-                        if (tp->ack.ato > tp->rto)
+                        if (icsk->icsk_ack.ato > icsk->icsk_rto)
-                                tp->ack.ato = tp->rto;
+                                icsk->icsk_ack.ato = icsk->icsk_rto;
-                } else if (m > tp->rto) {
+                } else if (m > icsk->icsk_rto) {
                        /* Too long gap. Apparently sender falled to
                         * restart window, so that we send ACKs quickly.
                         */
-                        tcp_incr_quickack(tp);
+                        tcp_incr_quickack(sk);
                        sk_stream_mem_reclaim(sk);
                }
        }
-        tp->ack.lrcvtime = now;
+        icsk->icsk_ack.lrcvtime = now;
        TCP_ECN_check_ce(tp, skb);
@@ -543,8 +550,10 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 * To save cycles in the RFC 1323 implementation it was better to break
 * it up into three procedures. -- erics
 */
-static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt)
+static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        long m = mrtt; /* RTT */
        /*      The following amusing code comes from Jacobson's
@@ -604,15 +613,16 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt)
                tp->rtt_seq = tp->snd_nxt;
        }
-        if (tp->ca_ops->rtt_sample)
+        if (icsk->icsk_ca_ops->rtt_sample)
-                tp->ca_ops->rtt_sample(tp, *usrtt);
+                icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
 }
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
 * routine referred to above.
 */
-static inline void tcp_set_rto(struct tcp_sock *tp)
+static inline void tcp_set_rto(struct sock *sk)
 {
+        const struct tcp_sock *tp = tcp_sk(sk);
        /* Old crap is replaced with new one. 8)
         *
         * More seriously:
@@ -623,7 +633,7 @@ static inline void tcp_set_rto(struct tcp_sock *tp)
         *    is invisible. Actually, Linux-2.4 also generates erratic
         *    ACKs in some curcumstances.
         */
-        tp->rto = (tp->srtt >> 3) + tp->rttvar;
+        inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
        /* 2. Fixups made earlier cannot be right.
         *    If we do not estimate RTO correctly without them,
@@ -635,10 +645,10 @@ static inline void tcp_set_rto(struct tcp_sock *tp)
 /* NOTE: clamping at TCP_RTO_MIN is not required, current algo
 * guarantees that rto is higher.
 */
-static inline void tcp_bound_rto(struct tcp_sock *tp)
+static inline void tcp_bound_rto(struct sock *sk)
 {
-        if (tp->rto > TCP_RTO_MAX)
+        if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
-                tp->rto = TCP_RTO_MAX;
+                inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
 }
 /* Save metrics learned by this TCP session.
@@ -656,9 +666,10 @@ void tcp_update_metrics(struct sock *sk)
        dst_confirm(dst);
        if (dst && (dst->flags&DST_HOST)) {
+                const struct inet_connection_sock *icsk = inet_csk(sk);
                int m;
-                if (tp->backoff || !tp->srtt) {
+                if (icsk->icsk_backoff || !tp->srtt) {
                        /* This session failed to estimate rtt. Why?
                         * Probably, no packets returned in time.
                         * Reset our results.
@@ -707,7 +718,7 @@ void tcp_update_metrics(struct sock *sk)
                            tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
                                dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
                } else if (tp->snd_cwnd > tp->snd_ssthresh &&
-                           tp->ca_state == TCP_CA_Open) {
+                           icsk->icsk_ca_state == TCP_CA_Open) {
                        /* Cong. avoidance phase, cwnd is reliable. */
                        if (!dst_metric_locked(dst, RTAX_SSTHRESH))
                                dst->metrics[RTAX_SSTHRESH-1] =
@@ -801,9 +812,9 @@ static void tcp_init_metrics(struct sock *sk)
                tp->mdev = dst_metric(dst, RTAX_RTTVAR);
                tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
        }
-        tcp_set_rto(tp);
+        tcp_set_rto(sk);
-        tcp_bound_rto(tp);
+        tcp_bound_rto(sk);
-        if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
+        if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
                goto reset;
        tp->snd_cwnd = tcp_init_cwnd(tp, dst);
        tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -817,12 +828,14 @@ reset:
        if (!tp->rx_opt.saw_tstamp && tp->srtt) {
                tp->srtt = 0;
                tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
-                tp->rto = TCP_TIMEOUT_INIT;
+                inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
        }
 }
-static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
+static void tcp_update_reordering(struct sock *sk, const int metric,
+                                  const int ts)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        if (metric > tp->reordering) {
                tp->reordering = min(TCP_MAX_REORDERING, metric);
@@ -837,7 +850,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
                        NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
 #if FASTRETRANS_DEBUG > 1
                printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
-                       tp->rx_opt.sack_ok, tp->ca_state,
+                       tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
                       tp->reordering,
                       tp->fackets_out,
                       tp->sacked_out,
@@ -899,6 +912,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
 static int
 tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
 {
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
        struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
@@ -909,14 +923,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
        int flag = 0;
        int i;
-        /* So, SACKs for already sent large segments will be lost.
-         * Not good, but alternative is to resegment the queue. */
-        if (sk->sk_route_caps & NETIF_F_TSO) {
-                sk->sk_route_caps &= ~NETIF_F_TSO;
-                sock_set_flag(sk, SOCK_NO_LARGESEND);
-                tp->mss_cache = tp->mss_cache;
-        }
        if (!tp->sacked_out)
                tp->fackets_out = 0;
        prior_fackets = tp->fackets_out;
@@ -964,20 +970,40 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                        flag |= FLAG_DATA_LOST;
                sk_stream_for_retrans_queue(skb, sk) {
-                        u8 sacked = TCP_SKB_CB(skb)->sacked;
+                        int in_sack, pcount;
-                        int in_sack;
+                        u8 sacked;
                        /* The retransmission queue is always in order, so
                         * we can short-circuit the walk early.
                         */
-                        if(!before(TCP_SKB_CB(skb)->seq, end_seq))
+                        if (!before(TCP_SKB_CB(skb)->seq, end_seq))
                                break;
-                        fack_count += tcp_skb_pcount(skb);
+                        pcount = tcp_skb_pcount(skb);
+                        if (pcount > 1 &&
+                            (after(start_seq, TCP_SKB_CB(skb)->seq) ||
+                             before(end_seq, TCP_SKB_CB(skb)->end_seq))) {
+                                unsigned int pkt_len;
+                                if (after(start_seq, TCP_SKB_CB(skb)->seq))
+                                        pkt_len = (start_seq -
+                                                   TCP_SKB_CB(skb)->seq);
+                                else
+                                        pkt_len = (end_seq -
+                                                   TCP_SKB_CB(skb)->seq);
+                                if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size))
+                                        break;
+                                pcount = tcp_skb_pcount(skb);
+                        }
+                        fack_count += pcount;
                        in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
                                !before(end_seq, TCP_SKB_CB(skb)->end_seq);
+                        sacked = TCP_SKB_CB(skb)->sacked;
                        /* Account D-SACK for retransmitted packet. */
                        if ((dup_sack && in_sack) &&
                            (sacked & TCPCB_RETRANS) &&
@@ -1064,7 +1090,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
         * we have to account for reordering! Ugly,
         * but should help.
         */
-        if (lost_retrans && tp->ca_state == TCP_CA_Recovery) {
+        if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) {
                struct sk_buff *skb;
                sk_stream_for_retrans_queue(skb, sk) {
@@ -1093,8 +1119,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
        tp->left_out = tp->sacked_out + tp->lost_out;
-        if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss)
+        if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss)
-                tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0);
+                tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
 #if FASTRETRANS_DEBUG > 0
        BUG_TRAP((int)tp->sacked_out >= 0);
@@ -1111,17 +1137,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 */
 void tcp_enter_frto(struct sock *sk)
 {
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        tp->frto_counter = 1;
-        if (tp->ca_state <= TCP_CA_Disorder ||
+        if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
            tp->snd_una == tp->high_seq ||
-            (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
+            (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
-                tp->prior_ssthresh = tcp_current_ssthresh(tp);
+                tp->prior_ssthresh = tcp_current_ssthresh(sk);
-                tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
+                tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-                tcp_ca_event(tp, CA_EVENT_FRTO);
+                tcp_ca_event(sk, CA_EVENT_FRTO);
        }
        /* Have to clear retransmission markers here to keep the bookkeeping
@@ -1138,7 +1165,7 @@ void tcp_enter_frto(struct sock *sk)
        }
        tcp_sync_left_out(tp);
-        tcp_set_ca_state(tp, TCP_CA_Open);
+        tcp_set_ca_state(sk, TCP_CA_Open);
        tp->frto_highmark = tp->snd_nxt;
 }
@@ -1184,7 +1211,7 @@ static void tcp_enter_frto_loss(struct sock *sk)
        tp->reordering = min_t(unsigned int, tp->reordering,
                                             sysctl_tcp_reordering);
-        tcp_set_ca_state(tp, TCP_CA_Loss);
+        tcp_set_ca_state(sk, TCP_CA_Loss);
        tp->high_seq = tp->frto_highmark;
        TCP_ECN_queue_cwr(tp);
 }
@@ -1208,16 +1235,17 @@ void tcp_clear_retrans(struct tcp_sock *tp)
 */
 void tcp_enter_loss(struct sock *sk, int how)
 {
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        int cnt = 0;
        /* Reduce ssthresh if it has not yet been made inside this window. */
-        if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
+        if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
-            (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
+            (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
-                tp->prior_ssthresh = tcp_current_ssthresh(tp);
+                tp->prior_ssthresh = tcp_current_ssthresh(sk);
-                tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
+                tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-                tcp_ca_event(tp, CA_EVENT_LOSS);
+                tcp_ca_event(sk, CA_EVENT_LOSS);
        }
        tp->snd_cwnd       = 1;
        tp->snd_cwnd_cnt   = 0;
@@ -1248,12 +1276,12 @@ void tcp_enter_loss(struct sock *sk, int how)
        tp->reordering = min_t(unsigned int, tp->reordering,
                                             sysctl_tcp_reordering);
-        tcp_set_ca_state(tp, TCP_CA_Loss);
+        tcp_set_ca_state(sk, TCP_CA_Loss);
        tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
 }
-static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp)
+static int tcp_check_sack_reneging(struct sock *sk)
 {
        struct sk_buff *skb;
@@ -1265,12 +1293,14 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp)
         */
        if ((skb = skb_peek(&sk->sk_write_queue)) != NULL &&
            (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+                struct inet_connection_sock *icsk = inet_csk(sk);
                NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
                tcp_enter_loss(sk, 1);
-                tp->retransmits++;
+                icsk->icsk_retransmits++;
                tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
-                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                          icsk->icsk_rto, TCP_RTO_MAX);
                return 1;
        }
        return 0;
@@ -1281,15 +1311,15 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
        return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
 }
-static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb)
+static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
 {
-        return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto);
+        return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
 }
 static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
 {
        return tp->packets_out &&
-               tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue));
+               tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue));
 }
 /* Linux NewReno/SACK/FACK/ECN state machine.
@@ -1423,8 +1453,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
 * in assumption of absent reordering, interpret this as reordering.
 * The only another reason could be bug in receiver TCP.
 */
-static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend)
+static void tcp_check_reno_reordering(struct sock *sk, const int addend)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        u32 holes;
        holes = max(tp->lost_out, 1U);
@@ -1432,16 +1463,17 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend)
        if ((tp->sacked_out + holes) > tp->packets_out) {
                tp->sacked_out = tp->packets_out - holes;
-                tcp_update_reordering(tp, tp->packets_out+addend, 0);
+                tcp_update_reordering(sk, tp->packets_out + addend, 0);
        }
 }
 /* Emulate SACKs for SACKless connection: account for a new dupack. */
-static void tcp_add_reno_sack(struct tcp_sock *tp)
+static void tcp_add_reno_sack(struct sock *sk)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        tp->sacked_out++;
-        tcp_check_reno_reordering(tp, 0);
+        tcp_check_reno_reordering(sk, 0);
        tcp_sync_left_out(tp);
 }
@@ -1456,7 +1488,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke
                else
                        tp->sacked_out -= acked-1;
        }
-        tcp_check_reno_reordering(tp, acked);
+        tcp_check_reno_reordering(sk, acked);
        tcp_sync_left_out(tp);
 }
@@ -1509,7 +1541,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
                struct sk_buff *skb;
                sk_stream_for_retrans_queue(skb, sk) {
-                        if (tcp_skb_timedout(tp, skb) &&
+                        if (tcp_skb_timedout(sk, skb) &&
                            !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
                                TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                                tp->lost_out += tcp_skb_pcount(skb);
@@ -1530,14 +1562,16 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
 }
 /* Decrease cwnd each second ack. */
-static void tcp_cwnd_down(struct tcp_sock *tp)
+static void tcp_cwnd_down(struct sock *sk)
 {
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        struct tcp_sock *tp = tcp_sk(sk);
        int decr = tp->snd_cwnd_cnt + 1;
        tp->snd_cwnd_cnt = decr&1;
        decr >>= 1;
-        if (decr && tp->snd_cwnd > tp->ca_ops->min_cwnd(tp))
+        if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk))
                tp->snd_cwnd -= decr;
        tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
@@ -1571,11 +1605,15 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
 #define DBGUNDO(x...) do { } while (0)
 #endif
-static void tcp_undo_cwr(struct tcp_sock *tp, int undo)
+static void tcp_undo_cwr(struct sock *sk, const int undo)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        if (tp->prior_ssthresh) {
-                if (tp->ca_ops->undo_cwnd)
+                const struct inet_connection_sock *icsk = inet_csk(sk);
-                        tp->snd_cwnd = tp->ca_ops->undo_cwnd(tp);
+                if (icsk->icsk_ca_ops->undo_cwnd)
+                        tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
                else
                        tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
@@ -1603,9 +1641,9 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
                /* Happy end! We did not retransmit anything
                 * or our original transmission succeeded.
                 */
-                DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans");
+                DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
-                tcp_undo_cwr(tp, 1);
+                tcp_undo_cwr(sk, 1);
-                if (tp->ca_state == TCP_CA_Loss)
+                if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
                        NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
                else
                        NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);
@@ -1618,7 +1656,7 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
                tcp_moderate_cwnd(tp);
                return 1;
        }
-        tcp_set_ca_state(tp, TCP_CA_Open);
+        tcp_set_ca_state(sk, TCP_CA_Open);
        return 0;
 }
@@ -1627,7 +1665,7 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
 {
        if (tp->undo_marker && !tp->undo_retrans) {
                DBGUNDO(sk, tp, "D-SACK");
-                tcp_undo_cwr(tp, 1);
+                tcp_undo_cwr(sk, 1);
                tp->undo_marker = 0;
                NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
        }
@@ -1648,10 +1686,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
                if (tp->retrans_out == 0)
                        tp->retrans_stamp = 0;
-                tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
+                tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
                DBGUNDO(sk, tp, "Hoe");
-                tcp_undo_cwr(tp, 0);
+                tcp_undo_cwr(sk, 0);
                NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
                /* So... Do not make Hoe's retransmit yet.
@@ -1674,22 +1712,23 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
                DBGUNDO(sk, tp, "partial loss");
                tp->lost_out = 0;
                tp->left_out = tp->sacked_out;
-                tcp_undo_cwr(tp, 1);
+                tcp_undo_cwr(sk, 1);
                NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
-                tp->retransmits = 0;
+                inet_csk(sk)->icsk_retransmits = 0;
                tp->undo_marker = 0;
                if (!IsReno(tp))
-                        tcp_set_ca_state(tp, TCP_CA_Open);
+                        tcp_set_ca_state(sk, TCP_CA_Open);
                return 1;
        }
        return 0;
 }
-static inline void tcp_complete_cwr(struct tcp_sock *tp)
+static inline void tcp_complete_cwr(struct sock *sk)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
        tp->snd_cwnd_stamp = tcp_time_stamp;
-        tcp_ca_event(tp, CA_EVENT_COMPLETE_CWR);
+        tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
 static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
@@ -1700,21 +1739,21 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
                tp->retrans_stamp = 0;
        if (flag&FLAG_ECE)
-                tcp_enter_cwr(tp);
+                tcp_enter_cwr(sk);
-        if (tp->ca_state != TCP_CA_CWR) {
+        if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
                int state = TCP_CA_Open;
                if (tp->left_out || tp->retrans_out || tp->undo_marker)
                        state = TCP_CA_Disorder;
-                if (tp->ca_state != state) {
+                if (inet_csk(sk)->icsk_ca_state != state) {
-                        tcp_set_ca_state(tp, state);
+                        tcp_set_ca_state(sk, state);
                        tp->high_seq = tp->snd_nxt;
                }
                tcp_moderate_cwnd(tp);
        } else {
-                tcp_cwnd_down(tp);
+                tcp_cwnd_down(sk);
        }
 }
@@ -1733,6 +1772,7 @@ static void
 tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                      int prior_packets, int flag)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));
@@ -1750,13 +1790,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                tp->prior_ssthresh = 0;
        /* B. In all the states check for reneging SACKs. */
-        if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
+        if (tp->sacked_out && tcp_check_sack_reneging(sk))
                return;
        /* C. Process data loss notification, provided it is valid. */
        if ((flag&FLAG_DATA_LOST) &&
            before(tp->snd_una, tp->high_seq) &&
-            tp->ca_state != TCP_CA_Open &&
+            icsk->icsk_ca_state != TCP_CA_Open &&
            tp->fackets_out > tp->reordering) {
                tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
                NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
@@ -1767,14 +1807,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
        /* E. Check state exit conditions. State can be terminated
         *    when high_seq is ACKed. */
-        if (tp->ca_state == TCP_CA_Open) {
+        if (icsk->icsk_ca_state == TCP_CA_Open) {
                if (!sysctl_tcp_frto)
                        BUG_TRAP(tp->retrans_out == 0);
                tp->retrans_stamp = 0;
        } else if (!before(tp->snd_una, tp->high_seq)) {
-                switch (tp->ca_state) {
+                switch (icsk->icsk_ca_state) {
                case TCP_CA_Loss:
-                        tp->retransmits = 0;
+                        icsk->icsk_retransmits = 0;
                        if (tcp_try_undo_recovery(sk, tp))
                                return;
                        break;
@@ -1783,8 +1823,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                        /* CWR is to be held something *above* high_seq
                         * is ACKed for CWR bit to reach receiver. */
                        if (tp->snd_una != tp->high_seq) {
-                                tcp_complete_cwr(tp);
+                                tcp_complete_cwr(sk);
-                                tcp_set_ca_state(tp, TCP_CA_Open);
+                                tcp_set_ca_state(sk, TCP_CA_Open);
                        }
                        break;
@@ -1795,7 +1835,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                             * catching for all duplicate ACKs. */
                            IsReno(tp) || tp->snd_una != tp->high_seq) {
                                tp->undo_marker = 0;
-                                tcp_set_ca_state(tp, TCP_CA_Open);
+                                tcp_set_ca_state(sk, TCP_CA_Open);
                        }
                        break;
@@ -1804,17 +1844,17 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                                tcp_reset_reno_sack(tp);
                        if (tcp_try_undo_recovery(sk, tp))
                                return;
-                        tcp_complete_cwr(tp);
+                        tcp_complete_cwr(sk);
                        break;
                }
        }
        /* F. Process state. */
-        switch (tp->ca_state) {
+        switch (icsk->icsk_ca_state) {
        case TCP_CA_Recovery:
                if (prior_snd_una == tp->snd_una) {
                        if (IsReno(tp) && is_dupack)
-                                tcp_add_reno_sack(tp);
+                                tcp_add_reno_sack(sk);
                } else {
                        int acked = prior_packets - tp->packets_out;
                        if (IsReno(tp))
@@ -1824,13 +1864,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                break;
        case TCP_CA_Loss:
                if (flag&FLAG_DATA_ACKED)
-                        tp->retransmits = 0;
+                        icsk->icsk_retransmits = 0;
                if (!tcp_try_undo_loss(sk, tp)) {
                        tcp_moderate_cwnd(tp);
                        tcp_xmit_retransmit_queue(sk);
                        return;
                }
-                if (tp->ca_state != TCP_CA_Open)
+                if (icsk->icsk_ca_state != TCP_CA_Open)
                        return;
                /* Loss is undone; fall through to processing in Open state. */
        default:
@@ -1838,10 +1878,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                        if (tp->snd_una != prior_snd_una)
                                tcp_reset_reno_sack(tp);
                        if (is_dupack)
-                                tcp_add_reno_sack(tp);
+                                tcp_add_reno_sack(sk);
                }
-                if (tp->ca_state == TCP_CA_Disorder)
+                if (icsk->icsk_ca_state == TCP_CA_Disorder)
                        tcp_try_undo_dsack(sk, tp);
                if (!tcp_time_to_recover(sk, tp)) {
@@ -1861,30 +1901,28 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                tp->undo_marker = tp->snd_una;
                tp->undo_retrans = tp->retrans_out;
-                if (tp->ca_state < TCP_CA_CWR) {
+                if (icsk->icsk_ca_state < TCP_CA_CWR) {
                        if (!(flag&FLAG_ECE))
-                                tp->prior_ssthresh = tcp_current_ssthresh(tp);
+                                tp->prior_ssthresh = tcp_current_ssthresh(sk);
-                        tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
+                        tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
                        TCP_ECN_queue_cwr(tp);
                }
                tp->snd_cwnd_cnt = 0;
-                tcp_set_ca_state(tp, TCP_CA_Recovery);
+                tcp_set_ca_state(sk, TCP_CA_Recovery);
        }
        if (is_dupack || tcp_head_timedout(sk, tp))
                tcp_update_scoreboard(sk, tp);
-        tcp_cwnd_down(tp);
+        tcp_cwnd_down(sk);
        tcp_xmit_retransmit_queue(sk);
 }
 /* Read draft-ietf-tcplw-high-performance before mucking
 * with this code. (Superceeds RFC1323)
 */
-static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag)
+static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
 {
-        __u32 seq_rtt;
        /* RTTM Rule: A TSecr value received in a segment is used to
         * update the averaged RTT measurement only if the segment
         * acknowledges some new data, i.e., only if it advances the
@@ -1900,14 +1938,15 @@ static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag)
         * answer arrives rto becomes 120 seconds! If at least one of segments
         * in window is lost... Voila.                          --ANK (010210)
         */
-        seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+        struct tcp_sock *tp = tcp_sk(sk);
-        tcp_rtt_estimator(tp, seq_rtt, usrtt);
+        const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
-        tcp_set_rto(tp);
+        tcp_rtt_estimator(sk, seq_rtt, usrtt);
-        tp->backoff = 0;
+        tcp_set_rto(sk);
-        tcp_bound_rto(tp);
+        inet_csk(sk)->icsk_backoff = 0;
+        tcp_bound_rto(sk);
 }
-static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int flag)
+static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag)
 {
        /* We don't have a timestamp. Can only use
         * packets that are not retransmitted to determine
@@ -1921,27 +1960,29 @@ static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int
        if (flag & FLAG_RETRANS_DATA_ACKED)
                return;
-        tcp_rtt_estimator(tp, seq_rtt, usrtt);
+        tcp_rtt_estimator(sk, seq_rtt, usrtt);
-        tcp_set_rto(tp);
+        tcp_set_rto(sk);
-        tp->backoff = 0;
+        inet_csk(sk)->icsk_backoff = 0;
-        tcp_bound_rto(tp);
+        tcp_bound_rto(sk);
 }
-static inline void tcp_ack_update_rtt(struct tcp_sock *tp,
+static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
-                                      int flag, s32 seq_rtt, u32 *usrtt)
+                                      const s32 seq_rtt, u32 *usrtt)
 {
+        const struct tcp_sock *tp = tcp_sk(sk);
        /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
        if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
-                tcp_ack_saw_tstamp(tp, usrtt, flag);
+                tcp_ack_saw_tstamp(sk, usrtt, flag);
        else if (seq_rtt >= 0)
-                tcp_ack_no_tstamp(tp, seq_rtt, usrtt, flag);
+                tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
 }
-static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
+static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                                  u32 in_flight, int good)
 {
-        tp->ca_ops->cong_avoid(tp, ack, rtt, in_flight, good);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
-        tp->snd_cwnd_stamp = tcp_time_stamp;
+        icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
+        tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
 }
 /* Restart timer after forward progress on connection.
@@ -1951,9 +1992,9 @@ static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
 static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
 {
        if (!tp->packets_out) {
-                tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
+                inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
        } else {
-                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
        }
 }
@@ -2068,9 +2109,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
                                seq_rtt = -1;
                        } else if (seq_rtt < 0)
                                seq_rtt = now - scb->when;
-                        if (seq_usrtt)
+                        if (seq_usrtt) {
-                                *seq_usrtt = (usnow.tv_sec - skb->stamp.tv_sec) * 1000000
+                                struct timeval tv;
-                                        + (usnow.tv_usec - skb->stamp.tv_usec);
+                        
+                                skb_get_timestamp(skb, &tv);
+                                *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000
+                                        + (usnow.tv_usec - tv.tv_usec);
+                        }
                        if (sacked & TCPCB_SACKED_ACKED)
                                tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2085,16 +2130,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
                        seq_rtt = now - scb->when;
                tcp_dec_pcount_approx(&tp->fackets_out, skb);
                tcp_packets_out_dec(tp, skb);
-                __skb_unlink(skb, skb->list);
+                __skb_unlink(skb, &sk->sk_write_queue);
                sk_stream_free_skb(sk, skb);
        }
        if (acked&FLAG_ACKED) {
-                tcp_ack_update_rtt(tp, acked, seq_rtt, seq_usrtt);
+                const struct inet_connection_sock *icsk = inet_csk(sk);
+                tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
                tcp_ack_packets_out(sk, tp);
-                if (tp->ca_ops->pkts_acked)
+                if (icsk->icsk_ca_ops->pkts_acked)
-                        tp->ca_ops->pkts_acked(tp, pkts_acked);
+                        icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
        }
 #if FASTRETRANS_DEBUG > 0
@@ -2102,19 +2148,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
        BUG_TRAP((int)tp->lost_out >= 0);
        BUG_TRAP((int)tp->retrans_out >= 0);
        if (!tp->packets_out && tp->rx_opt.sack_ok) {
+                const struct inet_connection_sock *icsk = inet_csk(sk);
                if (tp->lost_out) {
                        printk(KERN_DEBUG "Leak l=%u %d\n",
-                               tp->lost_out, tp->ca_state);
+                               tp->lost_out, icsk->icsk_ca_state);
                        tp->lost_out = 0;
                }
                if (tp->sacked_out) {
                        printk(KERN_DEBUG "Leak s=%u %d\n",
-                               tp->sacked_out, tp->ca_state);
+                               tp->sacked_out, icsk->icsk_ca_state);
                        tp->sacked_out = 0;
                }
                if (tp->retrans_out) {
                        printk(KERN_DEBUG "Leak r=%u %d\n",
-                               tp->retrans_out, tp->ca_state);
+                               tp->retrans_out, icsk->icsk_ca_state);
                        tp->retrans_out = 0;
                }
        }
@@ -2125,40 +2172,43 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 static void tcp_ack_probe(struct sock *sk)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
        /* Was it a usable window open? */
        if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
                   tp->snd_una + tp->snd_wnd)) {
-                tp->backoff = 0;
+                icsk->icsk_backoff = 0;
-                tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0);
+                inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
                /* Socket must be waked up by subsequent tcp_data_snd_check().
                 * This function is not for random using!
                 */
        } else {
-                tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0,
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
-                                     min(tp->rto << tp->backoff, TCP_RTO_MAX));
+                                          min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
+                                          TCP_RTO_MAX);
        }
 }
-static inline int tcp_ack_is_dubious(struct tcp_sock *tp, int flag)
+static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
 {
        return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
-                tp->ca_state != TCP_CA_Open);
+                inet_csk(sk)->icsk_ca_state != TCP_CA_Open);
 }
-static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag)
+static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
 {
+        const struct tcp_sock *tp = tcp_sk(sk);
        return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
-                !((1<<tp->ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR));
+                !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
 }
 /* Check that window update is acceptable.
 * The function assumes that snd_una<=ack<=snd_next.
 */
-static inline int tcp_may_update_window(struct tcp_sock *tp, u32 ack,
+static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
-                                        u32 ack_seq, u32 nwin)
+                                        const u32 ack_seq, const u32 nwin)
 {
        return (after(ack, tp->snd_una) ||
                after(ack_seq, tp->snd_wl1) ||
@@ -2241,6 +2291,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 /* This routine deals with incoming acks, but not outgoing ones. */
 static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        u32 prior_snd_una = tp->snd_una;
        u32 ack_seq = TCP_SKB_CB(skb)->seq;
@@ -2268,7 +2319,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
                tp->snd_una = ack;
                flag |= FLAG_WIN_UPDATE;
-                tcp_ca_event(tp, CA_EVENT_FAST_ACK);
+                tcp_ca_event(sk, CA_EVENT_FAST_ACK);
                NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS);
        } else {
@@ -2285,7 +2336,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
                if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
                        flag |= FLAG_ECE;
-                tcp_ca_event(tp, CA_EVENT_SLOW_ACK);
+                tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
        }
        /* We passed data and got it acked, remove any soft error
@@ -2301,19 +2352,19 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
        /* See if we can take anything off of the retransmit queue. */
        flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
-                                    tp->ca_ops->rtt_sample ? &seq_usrtt : NULL);
+                                    icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
        if (tp->frto_counter)
                tcp_process_frto(sk, prior_snd_una);
-        if (tcp_ack_is_dubious(tp, flag)) {
+        if (tcp_ack_is_dubious(sk, flag)) {
                /* Advanve CWND, if state allows this. */
-                if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(tp, flag))
+                if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
-                        tcp_cong_avoid(tp, ack,  seq_rtt, prior_in_flight, 0);
+                        tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
                tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
        } else {
                if ((flag & FLAG_DATA_ACKED))
-                        tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 1);
+                        tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
        }
        if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
@@ -2322,7 +2373,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
        return 1;
 no_queue:
-        tp->probes_out = 0;
+        icsk->icsk_probes_out = 0;
        /* If this ack opens up a zero window, clear backoff.  It was
         * being used to time the probes, and is probably far higher than
@@ -2500,8 +2551,9 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 * up to bandwidth of 18Gigabit/sec. 8) ]
 */
-static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb)
+static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        struct tcphdr *th = skb->h.th;
        u32 seq = TCP_SKB_CB(skb)->seq;
        u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -2516,14 +2568,15 @@ static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb)
                !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
                /* 4. ... and sits in replay window. */
-                (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ);
+                (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
 }
-static inline int tcp_paws_discard(struct tcp_sock *tp, struct sk_buff *skb)
+static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb)
 {
+        const struct tcp_sock *tp = tcp_sk(sk);
        return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
                xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
-                !tcp_disordered_ack(tp, skb));
+                !tcp_disordered_ack(sk, skb));
 }
 /* Check segment sequence number for validity.
@@ -2586,7 +2639,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        tcp_schedule_ack(tp);
+        inet_csk_schedule_ack(sk);
        sk->sk_shutdown |= RCV_SHUTDOWN;
        sock_set_flag(sk, SOCK_DONE);
@@ -2596,7 +2649,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
                case TCP_ESTABLISHED:
                        /* Move to CLOSE_WAIT */
                        tcp_set_state(sk, TCP_CLOSE_WAIT);
-                        tp->ack.pingpong = 1;
+                        inet_csk(sk)->icsk_ack.pingpong = 1;
                        break;
                case TCP_CLOSE_WAIT:
@@ -2694,7 +2747,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
        if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
            before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
                NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
-                tcp_enter_quickack_mode(tp);
+                tcp_enter_quickack_mode(sk);
                if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
                        u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -2853,7 +2906,7 @@ static void tcp_ofo_queue(struct sock *sk)
                if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
                        SOCK_DEBUG(sk, "ofo packet was already received \n");
-                        __skb_unlink(skb, skb->list);
+                        __skb_unlink(skb, &tp->out_of_order_queue);
                        __kfree_skb(skb);
                        continue;
                }
@@ -2861,7 +2914,7 @@ static void tcp_ofo_queue(struct sock *sk)
                           tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
                           TCP_SKB_CB(skb)->end_seq);
-                __skb_unlink(skb, skb->list);
+                __skb_unlink(skb, &tp->out_of_order_queue);
                __skb_queue_tail(&sk->sk_receive_queue, skb);
                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                if(skb->h.th->fin)
@@ -2942,7 +2995,7 @@ queue_and_out:
                         * gap in queue is filled.
                         */
                        if (skb_queue_empty(&tp->out_of_order_queue))
-                                tp->ack.pingpong = 0;
+                                inet_csk(sk)->icsk_ack.pingpong = 0;
                }
                if (tp->rx_opt.num_sacks)
@@ -2963,8 +3016,8 @@ queue_and_out:
                tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 out_of_window:
-                tcp_enter_quickack_mode(tp);
+                tcp_enter_quickack_mode(sk);
-                tcp_schedule_ack(tp);
+                inet_csk_schedule_ack(sk);
 drop:
                __kfree_skb(skb);
                return;
@@ -2974,7 +3027,7 @@ drop:
        if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
                goto out_of_window;
-        tcp_enter_quickack_mode(tp);
+        tcp_enter_quickack_mode(sk);
        if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
                /* Partial packet, seq < rcv_next < end_seq */
@@ -3003,7 +3056,7 @@ drop:
        /* Disable header prediction. */
        tp->pred_flags = 0;
-        tcp_schedule_ack(tp);
+        inet_csk_schedule_ack(sk);
        SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
                   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
@@ -3027,7 +3080,7 @@ drop:
                u32 end_seq = TCP_SKB_CB(skb)->end_seq;
                if (seq == TCP_SKB_CB(skb1)->end_seq) {
-                        __skb_append(skb1, skb);
+                        __skb_append(skb1, skb, &tp->out_of_order_queue);
                        if (!tp->rx_opt.num_sacks ||
                            tp->selective_acks[0].end_seq != seq)
@@ -3071,7 +3124,7 @@ drop:
                               tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
                               break;
                       }
-                       __skb_unlink(skb1, skb1->list);
+                       __skb_unlink(skb1, &tp->out_of_order_queue);
                       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
                       __kfree_skb(skb1);
                }
@@ -3088,8 +3141,9 @@ add_sack:
 * simplifies code)
 */
 static void
-tcp_collapse(struct sock *sk, struct sk_buff *head,
+tcp_collapse(struct sock *sk, struct sk_buff_head *list,
-             struct sk_buff *tail, u32 start, u32 end)
+             struct sk_buff *head, struct sk_buff *tail,
+             u32 start, u32 end)
 {
        struct sk_buff *skb;
@@ -3099,7 +3153,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head,
                /* No new bits? It is possible on ofo queue. */
                if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
                        struct sk_buff *next = skb->next;
-                        __skb_unlink(skb, skb->list);
+                        __skb_unlink(skb, list);
                        __kfree_skb(skb);
                        NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
                        skb = next;
@@ -3145,7 +3199,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head,
                nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
                memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
                TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
-                __skb_insert(nskb, skb->prev, skb, skb->list);
+                __skb_insert(nskb, skb->prev, skb, list);
                sk_stream_set_owner_r(nskb, sk);
                /* Copy data, releasing collapsed skbs. */
@@ -3164,7 +3218,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head,
                        }
                        if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
                                struct sk_buff *next = skb->next;
-                                __skb_unlink(skb, skb->list);
+                                __skb_unlink(skb, list);
                                __kfree_skb(skb);
                                NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
                                skb = next;
@@ -3200,7 +3254,8 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
                if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
                    after(TCP_SKB_CB(skb)->seq, end) ||
                    before(TCP_SKB_CB(skb)->end_seq, start)) {
-                        tcp_collapse(sk, head, skb, start, end);
+                        tcp_collapse(sk, &tp->out_of_order_queue,
+                                     head, skb, start, end);
                        head = skb;
                        if (skb == (struct sk_buff *)&tp->out_of_order_queue)
                                break;
@@ -3237,7 +3292,8 @@ static int tcp_prune_queue(struct sock *sk)
                tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
        tcp_collapse_ofo_queue(sk);
-        tcp_collapse(sk, sk->sk_receive_queue.next,
+        tcp_collapse(sk, &sk->sk_receive_queue,
+                     sk->sk_receive_queue.next,
                     (struct sk_buff*)&sk->sk_receive_queue,
                     tp->copied_seq, tp->rcv_nxt);
        sk_stream_mem_reclaim(sk);
@@ -3286,12 +3342,12 @@ void tcp_cwnd_application_limited(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-        if (tp->ca_state == TCP_CA_Open &&
+        if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
            sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
                /* Limited by application or receiver window. */
                u32 win_used = max(tp->snd_cwnd_used, 2U);
                if (win_used < tp->snd_cwnd) {
-                        tp->snd_ssthresh = tcp_current_ssthresh(tp);
+                        tp->snd_ssthresh = tcp_current_ssthresh(sk);
                        tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
                }
                tp->snd_cwnd_used = 0;
@@ -3370,13 +3426,13 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
        struct tcp_sock *tp = tcp_sk(sk);
            /* More than one full frame received... */
-        if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss
+        if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss
             /* ... and right edge of window advances far enough.
              * (tcp_recvmsg() will send ACK otherwise). Or...
              */
             && __tcp_select_window(sk) >= tp->rcv_wnd) ||
            /* We ACK each frame or... */
-            tcp_in_quickack_mode(tp) ||
+            tcp_in_quickack_mode(sk) ||
            /* We have out of order data. */
            (ofo_possible &&
             skb_peek(&tp->out_of_order_queue))) {
@@ -3390,8 +3446,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 static __inline__ void tcp_ack_snd_check(struct sock *sk)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        if (!inet_csk_ack_scheduled(sk)) {
-        if (!tcp_ack_scheduled(tp)) {
                /* We sent a data segment already. */
                return;
        }
@@ -3462,7 +3517,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
                struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
                tp->copied_seq++;
                if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
-                        __skb_unlink(skb, skb->list);
+                        __skb_unlink(skb, &sk->sk_receive_queue);
                        __kfree_skb(skb);
                }
        }
@@ -3645,7 +3700,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                    tp->rcv_nxt == tp->rcv_wup)
                                        tcp_store_ts_recent(tp);
-                                tcp_rcv_rtt_measure_ts(tp, skb);
+                                tcp_rcv_rtt_measure_ts(sk, skb);
                                /* We know that such packets are checksummed
                                 * on entry.
@@ -3678,7 +3733,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                            tp->rcv_nxt == tp->rcv_wup)
                                                tcp_store_ts_recent(tp);
-                                        tcp_rcv_rtt_measure_ts(tp, skb);
+                                        tcp_rcv_rtt_measure_ts(sk, skb);
                                        __skb_pull(skb, tcp_header_len);
                                        tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
@@ -3699,7 +3754,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                    tp->rcv_nxt == tp->rcv_wup)
                                        tcp_store_ts_recent(tp);
-                                tcp_rcv_rtt_measure_ts(tp, skb);
+                                tcp_rcv_rtt_measure_ts(sk, skb);
                                if ((int)skb->truesize > sk->sk_forward_alloc)
                                        goto step5;
@@ -3719,7 +3774,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                /* Well, only one small jumplet in fast path... */
                                tcp_ack(sk, skb, FLAG_DATA);
                                tcp_data_snd_check(sk, tp);
-                                if (!tcp_ack_scheduled(tp))
+                                if (!inet_csk_ack_scheduled(sk))
                                        goto no_ack;
                        }
@@ -3741,7 +3796,7 @@ slow_path:
         * RFC1323: H1. Apply PAWS check first.
         */
        if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-            tcp_paws_discard(tp, skb)) {
+            tcp_paws_discard(sk, skb)) {
                if (!th->rst) {
                        NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
                        tcp_send_dupack(sk, skb);
@@ -3788,7 +3843,7 @@ step5:
        if(th->ack)
                tcp_ack(sk, skb, FLAG_SLOWPATH);
-        tcp_rcv_rtt_measure_ts(tp, skb);
+        tcp_rcv_rtt_measure_ts(sk, skb);
        /* Process urgent data. */
        tcp_urg(sk, skb, th);
@@ -3817,6 +3872,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
        tcp_parse_options(skb, &tp->rx_opt, 0);
        if (th->ack) {
+                struct inet_connection_sock *icsk;
                /* rfc793:
                 * "If the state is SYN-SENT then
                 *    first check the ACK bit
@@ -3920,7 +3976,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                tcp_init_metrics(sk);
-                tcp_init_congestion_control(tp);
+                tcp_init_congestion_control(sk);
                /* Prevent spurious tcp_cwnd_restart() on first data
                 * packet.
@@ -3930,7 +3986,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                tcp_init_buffer_space(sk);
                if (sock_flag(sk, SOCK_KEEPOPEN))
-                        tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
+                        inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
                if (!tp->rx_opt.snd_wscale)
                        __tcp_fast_path_on(tp, tp->snd_wnd);
@@ -3942,7 +3998,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                        sk_wake_async(sk, 0, POLL_OUT);
                }
-                if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) {
+                icsk = inet_csk(sk);
+                if (sk->sk_write_pending ||
+                    icsk->icsk_accept_queue.rskq_defer_accept ||
+                    icsk->icsk_ack.pingpong) {
                        /* Save one ACK. Data will be ready after
                         * several ticks, if write_pending is set.
                         *
@@ -3950,12 +4010,13 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                         * look so _wonderfully_ clever, that I was not able
                         * to stand against the temptation 8)     --ANK
                         */
-                        tcp_schedule_ack(tp);
+                        inet_csk_schedule_ack(sk);
-                        tp->ack.lrcvtime = tcp_time_stamp;
+                        icsk->icsk_ack.lrcvtime = tcp_time_stamp;
-                        tp->ack.ato      = TCP_ATO_MIN;
+                        icsk->icsk_ack.ato       = TCP_ATO_MIN;
-                        tcp_incr_quickack(tp);
+                        tcp_incr_quickack(sk);
-                        tcp_enter_quickack_mode(tp);
+                        tcp_enter_quickack_mode(sk);
-                        tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
+                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                  TCP_DELACK_MAX, TCP_RTO_MAX);
 discard:
                        __kfree_skb(skb);
@@ -4111,7 +4172,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
        }
        if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
-            tcp_paws_discard(tp, skb)) {
+            tcp_paws_discard(sk, skb)) {
                if (!th->rst) {
                        NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
                        tcp_send_dupack(sk, skb);
@@ -4180,7 +4241,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                 */
                                if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
                                    !tp->srtt)
-                                        tcp_ack_saw_tstamp(tp, 0, 0);
+                                        tcp_ack_saw_tstamp(sk, NULL, 0);
                                if (tp->rx_opt.tstamp_ok)
                                        tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -4192,7 +4253,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                tcp_init_metrics(sk);
-                                tcp_init_congestion_control(tp);
+                                tcp_init_congestion_control(sk);
                                /* Prevent spurious tcp_cwnd_restart() on
                                 * first data packet.
@@ -4227,9 +4288,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                                return 1;
                                        }
-                                        tmo = tcp_fin_time(tp);
+                                        tmo = tcp_fin_time(sk);
                                        if (tmo > TCP_TIMEWAIT_LEN) {
-                                                tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
+                                                inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
                                        } else if (th->fin || sock_owned_by_user(sk)) {
                                                /* Bad case. We could lose such FIN otherwise.
                                                 * It is not a big problem, but it looks confusing
@@ -4237,7 +4298,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                                 * if it spins in bh_lock_sock(), but it is really
                                                 * marginal case.
                                                 */
-                                                tcp_reset_keepalive_timer(sk, tmo);
+                                                inet_csk_reset_keepalive_timer(sk, tmo);
                                        } else {
                                                tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
                                                goto discard;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 62f62bb05c2a..13dfb391cdf1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -64,7 +64,9 @@
 #include <linux/times.h>
 #include <net/icmp.h>
+#include <net/inet_hashtables.h>
 #include <net/tcp.h>
+#include <net/transp_v6.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
 #include <net/xfrm.h>
@@ -75,7 +77,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-extern int sysctl_ip_dynaddr;
 int sysctl_tcp_tw_reuse;
 int sysctl_tcp_low_latency;
@@ -88,458 +89,29 @@ static struct socket *tcp_socket;
 void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
                       struct sk_buff *skb);
-struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
+struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
-        .__tcp_lhash_lock       =       RW_LOCK_UNLOCKED,
+        .lhash_lock     = RW_LOCK_UNLOCKED,
-        .__tcp_lhash_users      =       ATOMIC_INIT(0),
+        .lhash_users    = ATOMIC_INIT(0),
-        .__tcp_lhash_wait
+        .lhash_wait     = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
-          = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
+        .portalloc_lock = SPIN_LOCK_UNLOCKED,
-        .__tcp_portalloc_lock   =       SPIN_LOCK_UNLOCKED
+        .port_rover     = 1024 - 1,
 };
-/*
- * This array holds the first and last local port number.
- * For high-usage systems, use sysctl to change this to
- * 32768-61000
- */
-int sysctl_local_port_range[2] = { 1024, 4999 };
-int tcp_port_rover = 1024 - 1;
-static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
-                                 __u32 faddr, __u16 fport)
-{
-        int h = (laddr ^ lport) ^ (faddr ^ fport);
-        h ^= h >> 16;
-        h ^= h >> 8;
-        return h & (tcp_ehash_size - 1);
-}
-static __inline__ int tcp_sk_hashfn(struct sock *sk)
-{
-        struct inet_sock *inet = inet_sk(sk);
-        __u32 laddr = inet->rcv_saddr;
-        __u16 lport = inet->num;
-        __u32 faddr = inet->daddr;
-        __u16 fport = inet->dport;
-        return tcp_hashfn(laddr, lport, faddr, fport);
-}
-/* Allocate and initialize a new TCP local port bind bucket.
- * The bindhash mutex for snum's hash chain must be held here.
- */
-struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
-                                          unsigned short snum)
-{
-        struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
-                                                      SLAB_ATOMIC);
-        if (tb) {
-                tb->port = snum;
-                tb->fastreuse = 0;
-                INIT_HLIST_HEAD(&tb->owners);
-                hlist_add_head(&tb->node, &head->chain);
-        }
-        return tb;
-}
-/* Caller must hold hashbucket lock for this tb with local BH disabled */
-void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
-{
-        if (hlist_empty(&tb->owners)) {
-                __hlist_del(&tb->node);
-                kmem_cache_free(tcp_bucket_cachep, tb);
-        }
-}
-/* Caller must disable local BH processing. */
-static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
-{
-        struct tcp_bind_hashbucket *head =
-                                &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
-        struct tcp_bind_bucket *tb;
-        spin_lock(&head->lock);
-        tb = tcp_sk(sk)->bind_hash;
-        sk_add_bind_node(child, &tb->owners);
-        tcp_sk(child)->bind_hash = tb;
-        spin_unlock(&head->lock);
-}
-inline void tcp_inherit_port(struct sock *sk, struct sock *child)
-{
-        local_bh_disable();
-        __tcp_inherit_port(sk, child);
-        local_bh_enable();
-}
-void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
-                   unsigned short snum)
-{
-        inet_sk(sk)->num = snum;
-        sk_add_bind_node(sk, &tb->owners);
-        tcp_sk(sk)->bind_hash = tb;
-}
-static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
-{
-        const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
-        struct sock *sk2;
-        struct hlist_node *node;
-        int reuse = sk->sk_reuse;
-        sk_for_each_bound(sk2, node, &tb->owners) {
-                if (sk != sk2 &&
-                    !tcp_v6_ipv6only(sk2) &&
-                    (!sk->sk_bound_dev_if ||
-                     !sk2->sk_bound_dev_if ||
-                     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-                        if (!reuse || !sk2->sk_reuse ||
-                            sk2->sk_state == TCP_LISTEN) {
-                                const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
-                                if (!sk2_rcv_saddr || !sk_rcv_saddr ||
-                                    sk2_rcv_saddr == sk_rcv_saddr)
-                                        break;
-                        }
-                }
-        }
-        return node != NULL;
-}
-/* Obtain a reference to a local port for the given sock,
- * if snum is zero it means select any available local port.
- */
 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 {
-        struct tcp_bind_hashbucket *head;
+        return inet_csk_get_port(&tcp_hashinfo, sk, snum);
-        struct hlist_node *node;
-        struct tcp_bind_bucket *tb;
-        int ret;
-        local_bh_disable();
-        if (!snum) {
-                int low = sysctl_local_port_range[0];
-                int high = sysctl_local_port_range[1];
-                int remaining = (high - low) + 1;
-                int rover;
-                spin_lock(&tcp_portalloc_lock);
-                if (tcp_port_rover < low)
-                        rover = low;
-                else
-                        rover = tcp_port_rover;
-                do {
-                        rover++;
-                        if (rover > high)
-                                rover = low;
-                        head = &tcp_bhash[tcp_bhashfn(rover)];
-                        spin_lock(&head->lock);
-                        tb_for_each(tb, node, &head->chain)
-                                if (tb->port == rover)
-                                        goto next;
-                        break;
-                next:
-                        spin_unlock(&head->lock);
-                } while (--remaining > 0);
-                tcp_port_rover = rover;
-                spin_unlock(&tcp_portalloc_lock);
-                /* Exhausted local port range during search? */
-                ret = 1;
-                if (remaining <= 0)
-                        goto fail;
-                /* OK, here is the one we will use.  HEAD is
-                 * non-NULL and we hold it's mutex.
-                 */
-                snum = rover;
-        } else {
-                head = &tcp_bhash[tcp_bhashfn(snum)];
-                spin_lock(&head->lock);
-                tb_for_each(tb, node, &head->chain)
-                        if (tb->port == snum)
-                                goto tb_found;
-        }
-        tb = NULL;
-        goto tb_not_found;
-tb_found:
-        if (!hlist_empty(&tb->owners)) {
-                if (sk->sk_reuse > 1)
-                        goto success;
-                if (tb->fastreuse > 0 &&
-                    sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
-                        goto success;
-                } else {
-                        ret = 1;
-                        if (tcp_bind_conflict(sk, tb))
-                                goto fail_unlock;
-                }
-        }
-tb_not_found:
-        ret = 1;
-        if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
-                goto fail_unlock;
-        if (hlist_empty(&tb->owners)) {
-                if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
-                        tb->fastreuse = 1;
-                else
-                        tb->fastreuse = 0;
-        } else if (tb->fastreuse &&
-                   (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
-                tb->fastreuse = 0;
-success:
-        if (!tcp_sk(sk)->bind_hash)
-                tcp_bind_hash(sk, tb, snum);
-        BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
-        ret = 0;
-fail_unlock:
-        spin_unlock(&head->lock);
-fail:
-        local_bh_enable();
-        return ret;
-}
-/* Get rid of any references to a local port held by the
- * given sock.
- */
-static void __tcp_put_port(struct sock *sk)
-{
-        struct inet_sock *inet = inet_sk(sk);
-        struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
-        struct tcp_bind_bucket *tb;
-        spin_lock(&head->lock);
-        tb = tcp_sk(sk)->bind_hash;
-        __sk_del_bind_node(sk);
-        tcp_sk(sk)->bind_hash = NULL;
-        inet->num = 0;
-        tcp_bucket_destroy(tb);
-        spin_unlock(&head->lock);
-}
-void tcp_put_port(struct sock *sk)
-{
-        local_bh_disable();
-        __tcp_put_port(sk);
-        local_bh_enable();
-}
-/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
- * Look, when several writers sleep and reader wakes them up, all but one
- * immediately hit write lock and grab all the cpus. Exclusive sleep solves
- * this, _but_ remember, it adds useless work on UP machines (wake up each
- * exclusive lock release). It should be ifdefed really.
- */
-void tcp_listen_wlock(void)
-{
-        write_lock(&tcp_lhash_lock);
-        if (atomic_read(&tcp_lhash_users)) {
-                DEFINE_WAIT(wait);
-                for (;;) {
-                        prepare_to_wait_exclusive(&tcp_lhash_wait,
-                                                &wait, TASK_UNINTERRUPTIBLE);
-                        if (!atomic_read(&tcp_lhash_users))
-                                break;
-                        write_unlock_bh(&tcp_lhash_lock);
-                        schedule();
-                        write_lock_bh(&tcp_lhash_lock);
-                }
-                finish_wait(&tcp_lhash_wait, &wait);
-        }
-}
-static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
-{
-        struct hlist_head *list;
-        rwlock_t *lock;
-        BUG_TRAP(sk_unhashed(sk));
-        if (listen_possible && sk->sk_state == TCP_LISTEN) {
-                list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
-                lock = &tcp_lhash_lock;
-                tcp_listen_wlock();
-        } else {
-                list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain;
-                lock = &tcp_ehash[sk->sk_hashent].lock;
-                write_lock(lock);
-        }
-        __sk_add_node(sk, list);
-        sock_prot_inc_use(sk->sk_prot);
-        write_unlock(lock);
-        if (listen_possible && sk->sk_state == TCP_LISTEN)
-                wake_up(&tcp_lhash_wait);
 }
 static void tcp_v4_hash(struct sock *sk)
 {
-        if (sk->sk_state != TCP_CLOSE) {
+        inet_hash(&tcp_hashinfo, sk);
-                local_bh_disable();
-                __tcp_v4_hash(sk, 1);
-                local_bh_enable();
-        }
 }
 void tcp_unhash(struct sock *sk)
 {
-        rwlock_t *lock;
+        inet_unhash(&tcp_hashinfo, sk);
-        if (sk_unhashed(sk))
-                goto ende;
-        if (sk->sk_state == TCP_LISTEN) {
-                local_bh_disable();
-                tcp_listen_wlock();
-                lock = &tcp_lhash_lock;
-        } else {
-                struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
-                lock = &head->lock;
-                write_lock_bh(&head->lock);
-        }
-        if (__sk_del_node_init(sk))
-                sock_prot_dec_use(sk->sk_prot);
-        write_unlock_bh(lock);
- ende:
-        if (sk->sk_state == TCP_LISTEN)
-                wake_up(&tcp_lhash_wait);
-}
-/* Don't inline this cruft.  Here are some nice properties to
- * exploit here.  The BSD API does not allow a listening TCP
- * to specify the remote port nor the remote address for the
- * connection.  So always assume those are both wildcarded
- * during the search since they can never be otherwise.
- */
-static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
-                                             unsigned short hnum, int dif)
-{
-        struct sock *result = NULL, *sk;
-        struct hlist_node *node;
-        int score, hiscore;
-        hiscore=-1;
-        sk_for_each(sk, node, head) {
-                struct inet_sock *inet = inet_sk(sk);
-                if (inet->num == hnum && !ipv6_only_sock(sk)) {
-                        __u32 rcv_saddr = inet->rcv_saddr;
-                        score = (sk->sk_family == PF_INET ? 1 : 0);
-                        if (rcv_saddr) {
-                                if (rcv_saddr != daddr)
-                                        continue;
-                                score+=2;
-                        }
-                        if (sk->sk_bound_dev_if) {
-                                if (sk->sk_bound_dev_if != dif)
-                                        continue;
-                                score+=2;
-                        }
-                        if (score == 5)
-                                return sk;
-                        if (score > hiscore) {
-                                hiscore = score;
-                                result = sk;
-                        }
-                }
-        }
-        return result;
-}
-/* Optimize the common listener case. */
-static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
-                unsigned short hnum, int dif)
-{
-        struct sock *sk = NULL;
-        struct hlist_head *head;
-        read_lock(&tcp_lhash_lock);
-        head = &tcp_listening_hash[tcp_lhashfn(hnum)];
-        if (!hlist_empty(head)) {
-                struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
-                if (inet->num == hnum && !sk->sk_node.next &&
-                    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
-                    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
-                    !sk->sk_bound_dev_if)
-                        goto sherry_cache;
-                sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
-        }
-        if (sk) {
-sherry_cache:
-                sock_hold(sk);
-        }
-        read_unlock(&tcp_lhash_lock);
-        return sk;
-}
-/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
- * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
- *
- * Local BH must be disabled here.
- */
-static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
-                                                       u32 daddr, u16 hnum,
-                                                       int dif)
-{
-        struct tcp_ehash_bucket *head;
-        TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
-        __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
-        struct sock *sk;
-        struct hlist_node *node;
-        /* Optimize here for direct hit, only listening connections can
-         * have wildcards anyways.
-         */
-        int hash = tcp_hashfn(daddr, hnum, saddr, sport);
-        head = &tcp_ehash[hash];
-        read_lock(&head->lock);
-        sk_for_each(sk, node, &head->chain) {
-                if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
-                        goto hit; /* You sunk my battleship! */
-        }
-        /* Must check for a TIME_WAIT'er before going to listener hash. */
-        sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
-                if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
-                        goto hit;
-        }
-        sk = NULL;
-out:
-        read_unlock(&head->lock);
-        return sk;
-hit:
-        sock_hold(sk);
-        goto out;
-}
-static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
-                                           u32 daddr, u16 hnum, int dif)
-{
-        struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
-                                                      daddr, hnum, dif);
-        return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif);
-}
-inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
-                                  u16 dport, int dif)
-{
-        struct sock *sk;
-        local_bh_disable();
-        sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
-        local_bh_enable();
-        return sk;
 }
-EXPORT_SYMBOL_GPL(tcp_v4_lookup);
 static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
 {
        return secure_tcp_sequence_number(skb->nh.iph->daddr,
@@ -550,27 +122,28 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
 /* called with local bh disabled */
 static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
-                                      struct tcp_tw_bucket **twp)
+                                      struct inet_timewait_sock **twp)
 {
        struct inet_sock *inet = inet_sk(sk);
        u32 daddr = inet->rcv_saddr;
        u32 saddr = inet->daddr;
        int dif = sk->sk_bound_dev_if;
-        TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
+        INET_ADDR_COOKIE(acookie, saddr, daddr)
-        __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
+        const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-        int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
+        const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size);
-        struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+        struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
        struct sock *sk2;
-        struct hlist_node *node;
+        const struct hlist_node *node;
-        struct tcp_tw_bucket *tw;
+        struct inet_timewait_sock *tw;
        write_lock(&head->lock);
        /* Check TIME-WAIT sockets first. */
-        sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
+        sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-                tw = (struct tcp_tw_bucket *)sk2;
+                tw = inet_twsk(sk2);
-                if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+                if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+                        const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
                        struct tcp_sock *tp = tcp_sk(sk);
                        /* With PAWS, it is safe from the viewpoint
@@ -587,15 +160,15 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
                           fall back to VJ's scheme and use initial
                           timestamp retrieved from peer table.
                         */
-                        if (tw->tw_ts_recent_stamp &&
+                        if (tcptw->tw_ts_recent_stamp &&
                            (!twp || (sysctl_tcp_tw_reuse &&
                                      xtime.tv_sec -
-                                      tw->tw_ts_recent_stamp > 1))) {
+                                      tcptw->tw_ts_recent_stamp > 1))) {
-                                if ((tp->write_seq =
+                                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-                                                tw->tw_snd_nxt + 65535 + 2) == 0)
+                                if (tp->write_seq == 0)
                                        tp->write_seq = 1;
-                                tp->rx_opt.ts_recent       = tw->tw_ts_recent;
+                                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
-                                tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+                                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
                                sock_hold(sk2);
                                goto unique;
                        } else
@@ -606,7 +179,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
        /* And established part... */
        sk_for_each(sk2, node, &head->chain) {
-                if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+                if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
                        goto not_unique;
        }
@@ -626,10 +199,10 @@ unique:
                NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
        } else if (tw) {
                /* Silly. Should hash-dance instead... */
-                tcp_tw_deschedule(tw);
+                inet_twsk_deschedule(tw, &tcp_death_row);
                NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-                tcp_tw_put(tw);
+                inet_twsk_put(tw);
        }
        return 0;
@@ -652,9 +225,9 @@ static inline u32 connect_port_offset(const struct sock *sk)
 */
 static inline int tcp_v4_hash_connect(struct sock *sk)
 {
-        unsigned short snum = inet_sk(sk)->num;
+        const unsigned short snum = inet_sk(sk)->num;
-        struct tcp_bind_hashbucket *head;
+        struct inet_bind_hashbucket *head;
-        struct tcp_bind_bucket *tb;
+        struct inet_bind_bucket *tb;
        int ret;
        if (!snum) {
@@ -666,19 +239,19 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
                static u32 hint;
                u32 offset = hint + connect_port_offset(sk);
                struct hlist_node *node;
-                struct tcp_tw_bucket *tw = NULL;
+                struct inet_timewait_sock *tw = NULL;
                local_bh_disable();
                for (i = 1; i <= range; i++) {
                        port = low + (i + offset) % range;
-                        head = &tcp_bhash[tcp_bhashfn(port)];
+                        head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
                        spin_lock(&head->lock);
                        /* Does not bother with rcv_saddr checks,
                         * because the established check is already
                         * unique enough.
                         */
-                        tb_for_each(tb, node, &head->chain) {
+                        inet_bind_bucket_for_each(tb, node, &head->chain) {
                                if (tb->port == port) {
                                        BUG_TRAP(!hlist_empty(&tb->owners));
                                        if (tb->fastreuse >= 0)
@@ -691,7 +264,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk)
                                }
                        }
-                        tb = tcp_bucket_create(head, port);
+                        tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
                        if (!tb) {
                                spin_unlock(&head->lock);
                                break;
@@ -710,27 +283,27 @@ ok:
                hint += i;
                /* Head lock still held and bh's disabled */
-                tcp_bind_hash(sk, tb, port);
+                inet_bind_hash(sk, tb, port);
                if (sk_unhashed(sk)) {
                        inet_sk(sk)->sport = htons(port);
-                        __tcp_v4_hash(sk, 0);
+                        __inet_hash(&tcp_hashinfo, sk, 0);
                }
                spin_unlock(&head->lock);
                if (tw) {
-                        tcp_tw_deschedule(tw);
+                        inet_twsk_deschedule(tw, &tcp_death_row);;
-                        tcp_tw_put(tw);
+                        inet_twsk_put(tw);
                }
                ret = 0;
                goto out;
        }
-        head  = &tcp_bhash[tcp_bhashfn(snum)];
+        head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
-        tb  = tcp_sk(sk)->bind_hash;
+        tb  = inet_csk(sk)->icsk_bind_hash;
        spin_lock_bh(&head->lock);
        if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-                __tcp_v4_hash(sk, 0);
+                __inet_hash(&tcp_hashinfo, sk, 0);
                spin_unlock_bh(&head->lock);
                return 0;
        } else {
@@ -793,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                tp->write_seq              = 0;
        }
-        if (sysctl_tcp_tw_recycle &&
+        if (tcp_death_row.sysctl_tw_recycle &&
            !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
                struct inet_peer *peer = rt_get_peer(rt);
@@ -832,8 +405,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                goto failure;
        /* OK, now commit destination to socket.  */
-        __sk_dst_set(sk, &rt->u.dst);
+        sk_setup_caps(sk, &rt->u.dst);
-        tcp_v4_setup_caps(sk, &rt->u.dst);
        if (!tp->write_seq)
                tp->write_seq = secure_tcp_sequence_number(inet->saddr,
@@ -859,53 +431,6 @@ failure:
        return err;
 }
-static __inline__ int tcp_v4_iif(struct sk_buff *skb)
-{
-        return ((struct rtable *)skb->dst)->rt_iif;
-}
-static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
-{
-        return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
-}
-static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
-                                              struct request_sock ***prevp,
-                                              __u16 rport,
-                                              __u32 raddr, __u32 laddr)
-{
-        struct listen_sock *lopt = tp->accept_queue.listen_opt;
-        struct request_sock *req, **prev;
-        for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
-             (req = *prev) != NULL;
-             prev = &req->dl_next) {
-                const struct inet_request_sock *ireq = inet_rsk(req);
-                if (ireq->rmt_port == rport &&
-                    ireq->rmt_addr == raddr &&
-                    ireq->loc_addr == laddr &&
-                    TCP_INET_FAMILY(req->rsk_ops->family)) {
-                        BUG_TRAP(!req->sk);
-                        *prevp = prev;
-                        break;
-                }
-        }
-        return req;
-}
-static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        struct listen_sock *lopt = tp->accept_queue.listen_opt;
-        u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
-        reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
-        tcp_synq_added(sk);
-}
 /*
 * This routine does path mtu discovery as defined in RFC1191.
 */
@@ -988,14 +513,14 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
                return;
        }
-        sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
+        sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
-                           th->source, tcp_v4_iif(skb));
+                         th->source, inet_iif(skb));
        if (!sk) {
                ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
                return;
        }
        if (sk->sk_state == TCP_TIME_WAIT) {
-                tcp_tw_put((struct tcp_tw_bucket *)sk);
+                inet_twsk_put((struct inet_timewait_sock *)sk);
                return;
        }
@@ -1049,8 +574,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
                if (sock_owned_by_user(sk))
                        goto out;
-                req = tcp_v4_search_req(tp, &prev, th->dest,
+                req = inet_csk_search_req(sk, &prev, th->dest,
-                                        iph->daddr, iph->saddr);
+                                          iph->daddr, iph->saddr);
                if (!req)
                        goto out;
@@ -1070,7 +595,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
                 * created socket, and POSIX does not want network
                 * errors returned from accept().
                 */
-                tcp_synq_drop(sk, req, prev);
+                inet_csk_reqsk_queue_drop(sk, req, prev);
                goto out;
        case TCP_SYN_SENT:
@@ -1240,12 +765,13 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 {
-        struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+        struct inet_timewait_sock *tw = inet_twsk(sk);
+        const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
-        tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
+        tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
-                        tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
-        tcp_tw_put(tw);
+        inet_twsk_put(tw);
 }
 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
@@ -1254,36 +780,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
                        req->ts_recent);
 }
-static struct dst_entry* tcp_v4_route_req(struct sock *sk,
-                                          struct request_sock *req)
-{
-        struct rtable *rt;
-        const struct inet_request_sock *ireq = inet_rsk(req);
-        struct ip_options *opt = inet_rsk(req)->opt;
-        struct flowi fl = { .oif = sk->sk_bound_dev_if,
-                            .nl_u = { .ip4_u =
-                                      { .daddr = ((opt && opt->srr) ?
-                                                  opt->faddr :
-                                                  ireq->rmt_addr),
-                                        .saddr = ireq->loc_addr,
-                                        .tos = RT_CONN_FLAGS(sk) } },
-                            .proto = IPPROTO_TCP,
-                            .uli_u = { .ports =
-                                       { .sport = inet_sk(sk)->sport,
-                                         .dport = ireq->rmt_port } } };
-        if (ip_route_output_flow(&rt, &fl, sk, 0)) {
-                IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
-                return NULL;
-        }
-        if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
-                ip_rt_put(rt);
-                IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
-                return NULL;
-        }
-        return &rt->u.dst;
-}
 /*
 *      Send a SYN-ACK after having received an ACK.
 *      This still operates on a request_sock only, not on a big
@@ -1297,7 +793,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
        struct sk_buff * skb;
        /* First, grab a route. */
-        if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
+        if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
                goto out;
        skb = tcp_make_synack(sk, dst, req);
@@ -1399,7 +895,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
         * limitations, they conserve resources and peer is
         * evidently real one.
         */
-        if (tcp_synq_is_full(sk) && !isn) {
+        if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
 #ifdef CONFIG_SYN_COOKIES
                if (sysctl_tcp_syncookies) {
                        want_cookie = 1;
@@ -1413,7 +909,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
         * clogging syn queue with openreqs with exponentially increasing
         * timeout.
         */
-        if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
                goto drop;
        req = reqsk_alloc(&tcp_request_sock_ops);
@@ -1469,8 +965,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                 * are made in the function processing timewait state.
                 */
                if (tmp_opt.saw_tstamp &&
-                    sysctl_tcp_tw_recycle &&
+                    tcp_death_row.sysctl_tw_recycle &&
-                    (dst = tcp_v4_route_req(sk, req)) != NULL &&
+                    (dst = inet_csk_route_req(sk, req)) != NULL &&
                    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
                    peer->v4daddr == saddr) {
                        if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
@@ -1483,7 +979,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                }
                /* Kill the following clause, if you dislike this way. */
                else if (!sysctl_tcp_syncookies &&
-                         (sysctl_max_syn_backlog - tcp_synq_len(sk) <
+                         (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
                          (sysctl_max_syn_backlog >> 2)) &&
                         (!peer || !peer->tcp_ts_stamp) &&
                         (!dst || !dst_metric(dst, RTAX_RTT))) {
@@ -1494,12 +990,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                         * to destinations, already remembered
                         * to the moment of synflood.
                         */
-                        NETDEBUG(if (net_ratelimit()) \
+                        LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
-                                        printk(KERN_DEBUG "TCP: drop open "
+                                       "request from %u.%u.%u.%u/%u\n",
-                                                          "request from %u.%u."
+                                       NIPQUAD(saddr),
-                                                          "%u.%u/%u\n", \
+                                       ntohs(skb->h.th->source));
-                                               NIPQUAD(saddr),
-                                               ntohs(skb->h.th->source)));
                        dst_release(dst);
                        goto drop_and_free;
                }
@@ -1514,7 +1008,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        if (want_cookie) {
                reqsk_free(req);
        } else {
-                tcp_v4_synq_add(sk, req);
+                inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
        }
        return 0;
@@ -1542,15 +1036,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        if (sk_acceptq_is_full(sk))
                goto exit_overflow;
-        if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
+        if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
                goto exit;
        newsk = tcp_create_openreq_child(sk, req, skb);
        if (!newsk)
                goto exit;
-        newsk->sk_dst_cache = dst;
+        sk_setup_caps(newsk, dst);
-        tcp_v4_setup_caps(newsk, dst);
        newtp                 = tcp_sk(newsk);
        newinet               = inet_sk(newsk);
@@ -1560,7 +1053,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        newinet->saddr        = ireq->loc_addr;
        newinet->opt          = ireq->opt;
        ireq->opt             = NULL;
-        newinet->mc_index     = tcp_v4_iif(skb);
+        newinet->mc_index     = inet_iif(skb);
        newinet->mc_ttl       = skb->nh.iph->ttl;
        newtp->ext_header_len = 0;
        if (newinet->opt)
@@ -1571,8 +1064,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
        tcp_initialize_rcv_mss(newsk);
-        __tcp_v4_hash(newsk, 0);
+        __inet_hash(&tcp_hashinfo, newsk, 0);
-        __tcp_inherit_port(sk, newsk);
+        __inet_inherit_port(&tcp_hashinfo, sk, newsk);
        return newsk;
@@ -1588,27 +1081,24 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
        struct tcphdr *th = skb->h.th;
        struct iphdr *iph = skb->nh.iph;
-        struct tcp_sock *tp = tcp_sk(sk);
        struct sock *nsk;
        struct request_sock **prev;
        /* Find possible connection requests. */
-        struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source,
+        struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
-                                                     iph->saddr, iph->daddr);
+                                                       iph->saddr, iph->daddr);
        if (req)
                return tcp_check_req(sk, skb, req, prev);
-        nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
+        nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
-                                          th->source,
+                                        th->source, skb->nh.iph->daddr,
-                                          skb->nh.iph->daddr,
+                                        ntohs(th->dest), inet_iif(skb));
-                                          ntohs(th->dest),
-                                          tcp_v4_iif(skb));
        if (nsk) {
                if (nsk->sk_state != TCP_TIME_WAIT) {
                        bh_lock_sock(nsk);
                        return nsk;
                }
-                tcp_tw_put((struct tcp_tw_bucket *)nsk);
+                inet_twsk_put((struct inet_timewait_sock *)nsk);
                return NULL;
        }
@@ -1627,8 +1117,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb)
                                  skb->nh.iph->daddr, skb->csum))
                        return 0;
-                NETDEBUG(if (net_ratelimit())
+                LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n");
-                                printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
                skb->ip_summed = CHECKSUM_NONE;
        }
        if (skb->len <= 76) {
@@ -1744,9 +1233,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
        TCP_SKB_CB(skb)->flags   = skb->nh.iph->tos;
        TCP_SKB_CB(skb)->sacked  = 0;
-        sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
+        sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
-                             skb->nh.iph->daddr, ntohs(th->dest),
+                           skb->nh.iph->daddr, ntohs(th->dest),
-                             tcp_v4_iif(skb));
+                           inet_iif(skb));
        if (!sk)
                goto no_tcp_socket;
@@ -1798,24 +1287,26 @@ discard_and_relse:
 do_time_wait:
        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-                tcp_tw_put((struct tcp_tw_bucket *) sk);
+                inet_twsk_put((struct inet_timewait_sock *) sk);
                goto discard_it;
        }
        if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
                TCP_INC_STATS_BH(TCP_MIB_INERRS);
-                tcp_tw_put((struct tcp_tw_bucket *) sk);
+                inet_twsk_put((struct inet_timewait_sock *) sk);
                goto discard_it;
        }
-        switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+        switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
-                                           skb, th, skb->len)) {
+                                           skb, th)) {
        case TCP_TW_SYN: {
-                struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
+                struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
-                                                          ntohs(th->dest),
+                                                        skb->nh.iph->daddr,
-                                                          tcp_v4_iif(skb));
+                                                        ntohs(th->dest),
+                                                        inet_iif(skb));
                if (sk2) {
-                        tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+                        inet_twsk_deschedule((struct inet_timewait_sock *)sk,
-                        tcp_tw_put((struct tcp_tw_bucket *)sk);
+                                             &tcp_death_row);
+                        inet_twsk_put((struct inet_timewait_sock *)sk);
                        sk = sk2;
                        goto process;
                }
@@ -1831,112 +1322,6 @@ do_time_wait:
        goto discard_it;
 }
-/* With per-bucket locks this operation is not-atomic, so that
- * this version is not worse.
- */
-static void __tcp_v4_rehash(struct sock *sk)
-{
-        sk->sk_prot->unhash(sk);
-        sk->sk_prot->hash(sk);
-}
-static int tcp_v4_reselect_saddr(struct sock *sk)
-{
-        struct inet_sock *inet = inet_sk(sk);
-        int err;
-        struct rtable *rt;
-        __u32 old_saddr = inet->saddr;
-        __u32 new_saddr;
-        __u32 daddr = inet->daddr;
-        if (inet->opt && inet->opt->srr)
-                daddr = inet->opt->faddr;
-        /* Query new route. */
-        err = ip_route_connect(&rt, daddr, 0,
-                               RT_CONN_FLAGS(sk),
-                               sk->sk_bound_dev_if,
-                               IPPROTO_TCP,
-                               inet->sport, inet->dport, sk);
-        if (err)
-                return err;
-        __sk_dst_set(sk, &rt->u.dst);
-        tcp_v4_setup_caps(sk, &rt->u.dst);
-        new_saddr = rt->rt_src;
-        if (new_saddr == old_saddr)
-                return 0;
-        if (sysctl_ip_dynaddr > 1) {
-                printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->"
-                                 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
-                       NIPQUAD(old_saddr),
-                       NIPQUAD(new_saddr));
-        }
-        inet->saddr = new_saddr;
-        inet->rcv_saddr = new_saddr;
-        /* XXX The only one ugly spot where we need to
-         * XXX really change the sockets identity after
-         * XXX it has entered the hashes. -DaveM
-         *
-         * Besides that, it does not check for connection
-         * uniqueness. Wait for troubles.
-         */
-        __tcp_v4_rehash(sk);
-        return 0;
-}
-int tcp_v4_rebuild_header(struct sock *sk)
-{
-        struct inet_sock *inet = inet_sk(sk);
-        struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
-        u32 daddr;
-        int err;
-        /* Route is OK, nothing to do. */
-        if (rt)
-                return 0;
-        /* Reroute. */
-        daddr = inet->daddr;
-        if (inet->opt && inet->opt->srr)
-                daddr = inet->opt->faddr;
-        {
-                struct flowi fl = { .oif = sk->sk_bound_dev_if,
-                                    .nl_u = { .ip4_u =
-                                              { .daddr = daddr,
-                                                .saddr = inet->saddr,
-                                                .tos = RT_CONN_FLAGS(sk) } },
-                                    .proto = IPPROTO_TCP,
-                                    .uli_u = { .ports =
-                                               { .sport = inet->sport,
-                                                 .dport = inet->dport } } };
-                                                
-                err = ip_route_output_flow(&rt, &fl, sk, 0);
-        }
-        if (!err) {
-                __sk_dst_set(sk, &rt->u.dst);
-                tcp_v4_setup_caps(sk, &rt->u.dst);
-                return 0;
-        }
-        /* Routing failed... */
-        sk->sk_route_caps = 0;
-        if (!sysctl_ip_dynaddr ||
-            sk->sk_state != TCP_SYN_SENT ||
-            (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
-            (err = tcp_v4_reselect_saddr(sk)) != 0)
-                sk->sk_err_soft = -err;
-        return err;
-}
 static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 {
        struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
@@ -1985,18 +1370,18 @@ int tcp_v4_remember_stamp(struct sock *sk)
        return 0;
 }
-int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
+int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 {
-        struct inet_peer *peer = NULL;
+        struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
-        peer = inet_getpeer(tw->tw_daddr, 1);
        if (peer) {
-                if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 ||
+                const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+                if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
                    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
-                     peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) {
+                     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
-                        peer->tcp_ts_stamp = tw->tw_ts_recent_stamp;
+                        peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
-                        peer->tcp_ts = tw->tw_ts_recent;
+                        peer->tcp_ts       = tcptw->tw_ts_recent;
                }
                inet_putpeer(peer);
                return 1;
@@ -2008,7 +1393,7 @@ int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
 struct tcp_func ipv4_specific = {
        .queue_xmit     =       ip_queue_xmit,
        .send_check     =       tcp_v4_send_check,
-        .rebuild_header =       tcp_v4_rebuild_header,
+        .rebuild_header =       inet_sk_rebuild_header,
        .conn_request   =       tcp_v4_conn_request,
        .syn_recv_sock  =       tcp_v4_syn_recv_sock,
        .remember_stamp =       tcp_v4_remember_stamp,
@@ -2024,13 +1409,14 @@ struct tcp_func ipv4_specific = {
 */
 static int tcp_v4_init_sock(struct sock *sk)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        skb_queue_head_init(&tp->out_of_order_queue);
        tcp_init_xmit_timers(sk);
        tcp_prequeue_init(tp);
-        tp->rto  = TCP_TIMEOUT_INIT;
+        icsk->icsk_rto = TCP_TIMEOUT_INIT;
        tp->mdev = TCP_TIMEOUT_INIT;
        /* So many TCP implementations out there (incorrectly) count the
@@ -2048,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk)
        tp->mss_cache = 536;
        tp->reordering = sysctl_tcp_reordering;
-        tp->ca_ops = &tcp_init_congestion_ops;
+        icsk->icsk_ca_ops = &tcp_init_congestion_ops;
        sk->sk_state = TCP_CLOSE;
@@ -2071,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
        tcp_clear_xmit_timers(sk);
-        tcp_cleanup_congestion_control(tp);
+        tcp_cleanup_congestion_control(sk);
        /* Cleanup up the write buffer. */
        sk_stream_writequeue_purge(sk);
@@ -2083,8 +1469,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
        __skb_queue_purge(&tp->ucopy.prequeue);
        /* Clean up a referenced TCP bind bucket. */
-        if (tp->bind_hash)
+        if (inet_csk(sk)->icsk_bind_hash)
-                tcp_put_port(sk);
+                inet_put_port(&tcp_hashinfo, sk);
        /*
         * If sendmsg cached page exists, toss it.
@@ -2104,13 +1490,13 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCP sock list dumping. */
-static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
+static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
 {
        return hlist_empty(head) ? NULL :
-                list_entry(head->first, struct tcp_tw_bucket, tw_node);
+                list_entry(head->first, struct inet_timewait_sock, tw_node);
 }
-static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
+static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
 {
        return tw->tw_node.next ?
                hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
@@ -2118,14 +1504,14 @@ static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
-        struct tcp_sock *tp;
+        struct inet_connection_sock *icsk;
        struct hlist_node *node;
        struct sock *sk = cur;
        struct tcp_iter_state* st = seq->private;
        if (!sk) {
                st->bucket = 0;
-                sk = sk_head(&tcp_listening_hash[0]);
+                sk = sk_head(&tcp_hashinfo.listening_hash[0]);
                goto get_sk;
        }
@@ -2134,7 +1520,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
        if (st->state == TCP_SEQ_STATE_OPENREQ) {
                struct request_sock *req = cur;
-                tp = tcp_sk(st->syn_wait_sk);
+                icsk = inet_csk(st->syn_wait_sk);
                req = req->dl_next;
                while (1) {
                        while (req) {
@@ -2147,17 +1533,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
                        if (++st->sbucket >= TCP_SYNQ_HSIZE)
                                break;
 get_req:
-                        req = tp->accept_queue.listen_opt->syn_table[st->sbucket];
+                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
                }
                sk        = sk_next(st->syn_wait_sk);
                st->state = TCP_SEQ_STATE_LISTENING;
-                read_unlock_bh(&tp->accept_queue.syn_wait_lock);
+                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
        } else {
-                tp = tcp_sk(sk);
+                icsk = inet_csk(sk);
-                read_lock_bh(&tp->accept_queue.syn_wait_lock);
+                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-                if (reqsk_queue_len(&tp->accept_queue))
+                if (reqsk_queue_len(&icsk->icsk_accept_queue))
                        goto start_req;
-                read_unlock_bh(&tp->accept_queue.syn_wait_lock);
+                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
                sk = sk_next(sk);
        }
 get_sk:
@@ -2166,9 +1552,9 @@ get_sk:
                        cur = sk;
                        goto out;
                }
-                tp = tcp_sk(sk);
+                icsk = inet_csk(sk);
-                read_lock_bh(&tp->accept_queue.syn_wait_lock);
+                read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-                if (reqsk_queue_len(&tp->accept_queue)) {
+                if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
 start_req:
                        st->uid         = sock_i_uid(sk);
                        st->syn_wait_sk = sk;
@@ -2176,10 +1562,10 @@ start_req:
                        st->sbucket     = 0;
                        goto get_req;
                }
-                read_unlock_bh(&tp->accept_queue.syn_wait_lock);
+                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
        }
-        if (++st->bucket < TCP_LHTABLE_SIZE) {
+        if (++st->bucket < INET_LHTABLE_SIZE) {
-                sk = sk_head(&tcp_listening_hash[st->bucket]);
+                sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
                goto get_sk;
        }
        cur = NULL;
@@ -2203,16 +1589,16 @@ static void *established_get_first(struct seq_file *seq)
        struct tcp_iter_state* st = seq->private;
        void *rc = NULL;
-        for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
+        for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
                struct sock *sk;
                struct hlist_node *node;
-                struct tcp_tw_bucket *tw;
+                struct inet_timewait_sock *tw;
                /* We can reschedule _before_ having picked the target: */
                cond_resched_softirq();
-                read_lock(&tcp_ehash[st->bucket].lock);
+                read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
-                sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
+                sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
                        if (sk->sk_family != st->family) {
                                continue;
                        }
@@ -2220,15 +1606,15 @@ static void *established_get_first(struct seq_file *seq)
                        goto out;
                }
                st->state = TCP_SEQ_STATE_TIME_WAIT;
-                tw_for_each(tw, node,
+                inet_twsk_for_each(tw, node,
-                            &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
+                                   &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
                        if (tw->tw_family != st->family) {
                                continue;
                        }
                        rc = tw;
                        goto out;
                }
-                read_unlock(&tcp_ehash[st->bucket].lock);
+                read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
                st->state = TCP_SEQ_STATE_ESTABLISHED;
        }
 out:
@@ -2238,7 +1624,7 @@ out:
 static void *established_get_next(struct seq_file *seq, void *cur)
 {
        struct sock *sk = cur;
-        struct tcp_tw_bucket *tw;
+        struct inet_timewait_sock *tw;
        struct hlist_node *node;
        struct tcp_iter_state* st = seq->private;
@@ -2255,15 +1641,15 @@ get_tw:
                        cur = tw;
                        goto out;
                }
-                read_unlock(&tcp_ehash[st->bucket].lock);
+                read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
                st->state = TCP_SEQ_STATE_ESTABLISHED;
                /* We can reschedule between buckets: */
                cond_resched_softirq();
-                if (++st->bucket < tcp_ehash_size) {
+                if (++st->bucket < tcp_hashinfo.ehash_size) {
-                        read_lock(&tcp_ehash[st->bucket].lock);
+                        read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
-                        sk = sk_head(&tcp_ehash[st->bucket].chain);
+                        sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
                } else {
                        cur = NULL;
                        goto out;
@@ -2277,7 +1663,7 @@ get_tw:
        }
        st->state = TCP_SEQ_STATE_TIME_WAIT;
-        tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
+        tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
        goto get_tw;
 found:
        cur = sk;
@@ -2301,12 +1687,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
        void *rc;
        struct tcp_iter_state* st = seq->private;
-        tcp_listen_lock();
+        inet_listen_lock(&tcp_hashinfo);
        st->state = TCP_SEQ_STATE_LISTENING;
        rc        = listening_get_idx(seq, &pos);
        if (!rc) {
-                tcp_listen_unlock();
+                inet_listen_unlock(&tcp_hashinfo);
                local_bh_disable();
                st->state = TCP_SEQ_STATE_ESTABLISHED;
                rc        = established_get_idx(seq, pos);
@@ -2339,7 +1725,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        case TCP_SEQ_STATE_LISTENING:
                rc = listening_get_next(seq, v);
                if (!rc) {
-                        tcp_listen_unlock();
+                        inet_listen_unlock(&tcp_hashinfo);
                        local_bh_disable();
                        st->state = TCP_SEQ_STATE_ESTABLISHED;
                        rc        = established_get_first(seq);
@@ -2362,17 +1748,17 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
        switch (st->state) {
        case TCP_SEQ_STATE_OPENREQ:
                if (v) {
-                        struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
+                        struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
-                        read_unlock_bh(&tp->accept_queue.syn_wait_lock);
+                        read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
                }
        case TCP_SEQ_STATE_LISTENING:
                if (v != SEQ_START_TOKEN)
-                        tcp_listen_unlock();
+                        inet_listen_unlock(&tcp_hashinfo);
                break;
        case TCP_SEQ_STATE_TIME_WAIT:
        case TCP_SEQ_STATE_ESTABLISHED:
                if (v)
-                        read_unlock(&tcp_ehash[st->bucket].lock);
+                        read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
                local_bh_enable();
                break;
        }
@@ -2469,18 +1855,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
        int timer_active;
        unsigned long timer_expires;
        struct tcp_sock *tp = tcp_sk(sp);
+        const struct inet_connection_sock *icsk = inet_csk(sp);
        struct inet_sock *inet = inet_sk(sp);
        unsigned int dest = inet->daddr;
        unsigned int src = inet->rcv_saddr;
        __u16 destp = ntohs(inet->dport);
        __u16 srcp = ntohs(inet->sport);
-        if (tp->pending == TCP_TIME_RETRANS) {
+        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
                timer_active    = 1;
-                timer_expires   = tp->timeout;
+                timer_expires   = icsk->icsk_timeout;
-        } else if (tp->pending == TCP_TIME_PROBE0) {
+        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
                timer_active    = 4;
-                timer_expires   = tp->timeout;
+                timer_expires   = icsk->icsk_timeout;
        } else if (timer_pending(&sp->sk_timer)) {
                timer_active    = 2;
                timer_expires   = sp->sk_timer.expires;
@@ -2495,17 +1882,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
                tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
                timer_active,
                jiffies_to_clock_t(timer_expires - jiffies),
-                tp->retransmits,
+                icsk->icsk_retransmits,
                sock_i_uid(sp),
-                tp->probes_out,
+                icsk->icsk_probes_out,
                sock_i_ino(sp),
                atomic_read(&sp->sk_refcnt), sp,
-                tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong,
+                icsk->icsk_rto,
+                icsk->icsk_ack.ato,
+                (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
                tp->snd_cwnd,
                tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
 }
-static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
 {
        unsigned int dest, src;
        __u16 destp, srcp;
@@ -2585,7 +1974,7 @@ struct proto tcp_prot = {
        .close                  = tcp_close,
        .connect                = tcp_v4_connect,
        .disconnect             = tcp_disconnect,
-        .accept                 = tcp_accept,
+        .accept                 = inet_csk_accept,
        .ioctl                  = tcp_ioctl,
        .init                   = tcp_v4_init_sock,
        .destroy                = tcp_v4_destroy_sock,
@@ -2600,6 +1989,7 @@ struct proto tcp_prot = {
        .get_port               = tcp_v4_get_port,
        .enter_memory_pressure  = tcp_enter_memory_pressure,
        .sockets_allocated      = &tcp_sockets_allocated,
+        .orphan_count           = &tcp_orphan_count,
        .memory_allocated       = &tcp_memory_allocated,
        .memory_pressure        = &tcp_memory_pressure,
        .sysctl_mem             = sysctl_tcp_mem,
@@ -2607,6 +1997,7 @@ struct proto tcp_prot = {
        .sysctl_rmem            = sysctl_tcp_rmem,
        .max_header             = MAX_TCP_HEADER,
        .obj_size               = sizeof(struct tcp_sock),
+        .twsk_obj_size          = sizeof(struct tcp_timewait_sock),
        .rsk_prot               = &tcp_request_sock_ops,
 };
@@ -2628,19 +2019,13 @@ void __init tcp_v4_init(struct net_proto_family *ops)
 }
 EXPORT_SYMBOL(ipv4_specific);
-EXPORT_SYMBOL(tcp_bind_hash);
+EXPORT_SYMBOL(inet_bind_bucket_create);
-EXPORT_SYMBOL(tcp_bucket_create);
 EXPORT_SYMBOL(tcp_hashinfo);
-EXPORT_SYMBOL(tcp_inherit_port);
-EXPORT_SYMBOL(tcp_listen_wlock);
-EXPORT_SYMBOL(tcp_port_rover);
 EXPORT_SYMBOL(tcp_prot);
-EXPORT_SYMBOL(tcp_put_port);
 EXPORT_SYMBOL(tcp_unhash);
 EXPORT_SYMBOL(tcp_v4_conn_request);
 EXPORT_SYMBOL(tcp_v4_connect);
 EXPORT_SYMBOL(tcp_v4_do_rcv);
-EXPORT_SYMBOL(tcp_v4_rebuild_header);
 EXPORT_SYMBOL(tcp_v4_remember_stamp);
 EXPORT_SYMBOL(tcp_v4_send_check);
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f42a284164b7..a88db28b0af7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -35,13 +35,27 @@
 #define SYNC_INIT 1
 #endif
-int sysctl_tcp_tw_recycle;
-int sysctl_tcp_max_tw_buckets = NR_FILE*2;
 int sysctl_tcp_syncookies = SYNC_INIT; 
 int sysctl_tcp_abort_on_overflow;
-static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
+struct inet_timewait_death_row tcp_death_row = {
+        .sysctl_max_tw_buckets = NR_FILE * 2,
+        .period         = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
+        .death_lock     = SPIN_LOCK_UNLOCKED,
+        .hashinfo       = &tcp_hashinfo,
+        .tw_timer       = TIMER_INITIALIZER(inet_twdr_hangman, 0,
+                                            (unsigned long)&tcp_death_row),
+        .twkill_work    = __WORK_INITIALIZER(tcp_death_row.twkill_work,
+                                             inet_twdr_twkill_work,
+                                             &tcp_death_row),
+/* Short-time timewait calendar */
+        .twcal_hand     = -1,
+        .twcal_timer    = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
+                                            (unsigned long)&tcp_death_row),
+};
+EXPORT_SYMBOL_GPL(tcp_death_row);
 static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 {
@@ -52,47 +66,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
        return (seq == e_win && seq == end_seq);
 }
-/* New-style handling of TIME_WAIT sockets. */
-int tcp_tw_count;
-/* Must be called with locally disabled BHs. */
-static void tcp_timewait_kill(struct tcp_tw_bucket *tw)
-{
-        struct tcp_ehash_bucket *ehead;
-        struct tcp_bind_hashbucket *bhead;
-        struct tcp_bind_bucket *tb;
-        /* Unlink from established hashes. */
-        ehead = &tcp_ehash[tw->tw_hashent];
-        write_lock(&ehead->lock);
-        if (hlist_unhashed(&tw->tw_node)) {
-                write_unlock(&ehead->lock);
-                return;
-        }
-        __hlist_del(&tw->tw_node);
-        sk_node_init(&tw->tw_node);
-        write_unlock(&ehead->lock);
-        /* Disassociate with bind bucket. */
-        bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)];
-        spin_lock(&bhead->lock);
-        tb = tw->tw_tb;
-        __hlist_del(&tw->tw_bind_node);
-        tw->tw_tb = NULL;
-        tcp_bucket_destroy(tb);
-        spin_unlock(&bhead->lock);
-#ifdef INET_REFCNT_DEBUG
-        if (atomic_read(&tw->tw_refcnt) != 1) {
-                printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw,
-                       atomic_read(&tw->tw_refcnt));
-        }
-#endif
-        tcp_tw_put(tw);
-}
 /* 
 * * Main purpose of TIME-WAIT state is to close connection gracefully,
 *   when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
@@ -122,19 +95,20 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw)
 * to avoid misread sequence numbers, states etc.  --ANK
 */
 enum tcp_tw_status
-tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
+tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
-                           struct tcphdr *th, unsigned len)
+                           const struct tcphdr *th)
 {
+        struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
        struct tcp_options_received tmp_opt;
        int paws_reject = 0;
        tmp_opt.saw_tstamp = 0;
-        if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) {
+        if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
                tcp_parse_options(skb, &tmp_opt, 0);
                if (tmp_opt.saw_tstamp) {
-                        tmp_opt.ts_recent          = tw->tw_ts_recent;
+                        tmp_opt.ts_recent       = tcptw->tw_ts_recent;
-                        tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+                        tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
                        paws_reject = tcp_paws_check(&tmp_opt, th->rst);
                }
        }
@@ -145,20 +119,20 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
                /* Out of window, send ACK */
                if (paws_reject ||
                    !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-                                   tw->tw_rcv_nxt,
+                                   tcptw->tw_rcv_nxt,
-                                   tw->tw_rcv_nxt + tw->tw_rcv_wnd))
+                                   tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
                        return TCP_TW_ACK;
                if (th->rst)
                        goto kill;
-                if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt))
+                if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
                        goto kill_with_rst;
                /* Dup ACK? */
-                if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) ||
+                if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
                    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
-                        tcp_tw_put(tw);
+                        inet_twsk_put(tw);
                        return TCP_TW_SUCCESS;
                }
@@ -166,19 +140,19 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
                 * reset.
                 */
                if (!th->fin ||
-                    TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) {
+                    TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
 kill_with_rst:
-                        tcp_tw_deschedule(tw);
+                        inet_twsk_deschedule(tw, &tcp_death_row);
-                        tcp_tw_put(tw);
+                        inet_twsk_put(tw);
                        return TCP_TW_RST;
                }
                /* FIN arrived, enter true time-wait state. */
-                tw->tw_substate = TCP_TIME_WAIT;
+                tw->tw_substate   = TCP_TIME_WAIT;
-                tw->tw_rcv_nxt  = TCP_SKB_CB(skb)->end_seq;
+                tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                if (tmp_opt.saw_tstamp) {
-                        tw->tw_ts_recent_stamp  = xtime.tv_sec;
+                        tcptw->tw_ts_recent_stamp = xtime.tv_sec;
-                        tw->tw_ts_recent        = tmp_opt.rcv_tsval;
+                        tcptw->tw_ts_recent       = tmp_opt.rcv_tsval;
                }
                /* I am shamed, but failed to make it more elegant.
@@ -187,11 +161,13 @@ kill_with_rst:
                 * do not undertsnad recycling in any case, it not
                 * a big problem in practice. --ANK */
                if (tw->tw_family == AF_INET &&
-                    sysctl_tcp_tw_recycle && tw->tw_ts_recent_stamp &&
+                    tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
                    tcp_v4_tw_remember_stamp(tw))
-                        tcp_tw_schedule(tw, tw->tw_timeout);
+                        inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
+                                           TCP_TIMEWAIT_LEN);
                else
-                        tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+                        inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
+                                           TCP_TIMEWAIT_LEN);
                return TCP_TW_ACK;
        }
@@ -213,7 +189,7 @@ kill_with_rst:
         */
        if (!paws_reject &&
-            (TCP_SKB_CB(skb)->seq == tw->tw_rcv_nxt &&
+            (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
             (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
                /* In window segment, it may be only reset or bare ack. */
@@ -224,19 +200,20 @@ kill_with_rst:
                         */
                        if (sysctl_tcp_rfc1337 == 0) {
 kill:
-                                tcp_tw_deschedule(tw);
+                                inet_twsk_deschedule(tw, &tcp_death_row);
-                                tcp_tw_put(tw);
+                                inet_twsk_put(tw);
                                return TCP_TW_SUCCESS;
                        }
                }
-                tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+                inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
+                                   TCP_TIMEWAIT_LEN);
                if (tmp_opt.saw_tstamp) {
-                        tw->tw_ts_recent        = tmp_opt.rcv_tsval;
+                        tcptw->tw_ts_recent       = tmp_opt.rcv_tsval;
-                        tw->tw_ts_recent_stamp  = xtime.tv_sec;
+                        tcptw->tw_ts_recent_stamp = xtime.tv_sec;
                }
-                tcp_tw_put(tw);
+                inet_twsk_put(tw);
                return TCP_TW_SUCCESS;
        }
@@ -258,9 +235,10 @@ kill:
         */
        if (th->syn && !th->rst && !th->ack && !paws_reject &&
-            (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) ||
+            (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
-             (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
+             (tmp_opt.saw_tstamp &&
-                u32 isn = tw->tw_snd_nxt + 65535 + 2;
+              (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
+                u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
                if (isn == 0)
                        isn++;
                TCP_SKB_CB(skb)->when = isn;
@@ -278,107 +256,57 @@ kill:
                 * Do not reschedule in the last case.
                 */
                if (paws_reject || th->ack)
-                        tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+                        inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
+                                           TCP_TIMEWAIT_LEN);
                /* Send ACK. Note, we do not put the bucket,
                 * it will be released by caller.
                 */
                return TCP_TW_ACK;
        }
-        tcp_tw_put(tw);
+        inet_twsk_put(tw);
        return TCP_TW_SUCCESS;
 }
-/* Enter the time wait state.  This is called with locally disabled BH.
- * Essentially we whip up a timewait bucket, copy the
- * relevant info into it from the SK, and mess with hash chains
- * and list linkage.
- */
-static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw)
-{
-        struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent];
-        struct tcp_bind_hashbucket *bhead;
-        /* Step 1: Put TW into bind hash. Original socket stays there too.
-           Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in
-           binding cache, even if it is closed.
-         */
-        bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)];
-        spin_lock(&bhead->lock);
-        tw->tw_tb = tcp_sk(sk)->bind_hash;
-        BUG_TRAP(tcp_sk(sk)->bind_hash);
-        tw_add_bind_node(tw, &tw->tw_tb->owners);
-        spin_unlock(&bhead->lock);
-        write_lock(&ehead->lock);
-        /* Step 2: Remove SK from established hash. */
-        if (__sk_del_node_init(sk))
-                sock_prot_dec_use(sk->sk_prot);
-        /* Step 3: Hash TW into TIMEWAIT half of established hash table. */
-        tw_add_node(tw, &(ehead + tcp_ehash_size)->chain);
-        atomic_inc(&tw->tw_refcnt);
-        write_unlock(&ehead->lock);
-}
 /* 
 * Move a socket to time-wait or dead fin-wait-2 state.
 */ 
 void tcp_time_wait(struct sock *sk, int state, int timeo)
 {
-        struct tcp_tw_bucket *tw = NULL;
+        struct inet_timewait_sock *tw = NULL;
-        struct tcp_sock *tp = tcp_sk(sk);
+        const struct tcp_sock *tp = tcp_sk(sk);
        int recycle_ok = 0;
-        if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp)
+        if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
                recycle_ok = tp->af_specific->remember_stamp(sk);
-        if (tcp_tw_count < sysctl_tcp_max_tw_buckets)
+        if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
-                tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
+                tw = inet_twsk_alloc(sk, state);
-        if(tw != NULL) {
-                struct inet_sock *inet = inet_sk(sk);
-                int rto = (tp->rto<<2) - (tp->rto>>1);
-                /* Give us an identity. */
-                tw->tw_daddr            = inet->daddr;
-                tw->tw_rcv_saddr        = inet->rcv_saddr;
-                tw->tw_bound_dev_if     = sk->sk_bound_dev_if;
-                tw->tw_num              = inet->num;
-                tw->tw_state            = TCP_TIME_WAIT;
-                tw->tw_substate         = state;
-                tw->tw_sport            = inet->sport;
-                tw->tw_dport            = inet->dport;
-                tw->tw_family           = sk->sk_family;
-                tw->tw_reuse            = sk->sk_reuse;
-                tw->tw_rcv_wscale       = tp->rx_opt.rcv_wscale;
-                atomic_set(&tw->tw_refcnt, 1);
-                tw->tw_hashent          = sk->sk_hashent;
+        if (tw != NULL) {
-                tw->tw_rcv_nxt          = tp->rcv_nxt;
+                struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-                tw->tw_snd_nxt          = tp->snd_nxt;
+                const struct inet_connection_sock *icsk = inet_csk(sk);
-                tw->tw_rcv_wnd          = tcp_receive_window(tp);
+                const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
-                tw->tw_ts_recent        = tp->rx_opt.ts_recent;
-                tw->tw_ts_recent_stamp  = tp->rx_opt.ts_recent_stamp;
+                tw->tw_rcv_wscale       = tp->rx_opt.rcv_wscale;
-                tw_dead_node_init(tw);
+                tcptw->tw_rcv_nxt       = tp->rcv_nxt;
+                tcptw->tw_snd_nxt       = tp->snd_nxt;
+                tcptw->tw_rcv_wnd       = tcp_receive_window(tp);
+                tcptw->tw_ts_recent     = tp->rx_opt.ts_recent;
+                tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
                if (tw->tw_family == PF_INET6) {
                        struct ipv6_pinfo *np = inet6_sk(sk);
+                        struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
-                        ipv6_addr_copy(&tw->tw_v6_daddr, &np->daddr);
+                        ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr);
-                        ipv6_addr_copy(&tw->tw_v6_rcv_saddr, &np->rcv_saddr);
+                        ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr);
-                        tw->tw_v6_ipv6only = np->ipv6only;
+                        tw->tw_ipv6only = np->ipv6only;
-                } else {
-                        memset(&tw->tw_v6_daddr, 0, sizeof(tw->tw_v6_daddr));
-                        memset(&tw->tw_v6_rcv_saddr, 0, sizeof(tw->tw_v6_rcv_saddr));
-                        tw->tw_v6_ipv6only = 0;
                }
 #endif
                /* Linkage updates. */
-                __tcp_tw_hashdance(sk, tw);
+                __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
                /* Get the TIME_WAIT timeout firing. */
                if (timeo < rto)
@@ -392,8 +320,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                                timeo = TCP_TIMEWAIT_LEN;
                }
-                tcp_tw_schedule(tw, timeo);
+                inet_twsk_schedule(tw, &tcp_death_row, timeo,
-                tcp_tw_put(tw);
+                                   TCP_TIMEWAIT_LEN);
+                inet_twsk_put(tw);
        } else {
                /* Sorry, if we're out of memory, just CLOSE this
                 * socket up.  We've got bigger problems than
@@ -407,277 +336,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
        tcp_done(sk);
 }
-/* Kill off TIME_WAIT sockets once their lifetime has expired. */
-static int tcp_tw_death_row_slot;
-static void tcp_twkill(unsigned long);
-/* TIME_WAIT reaping mechanism. */
-#define TCP_TWKILL_SLOTS        8       /* Please keep this a power of 2. */
-#define TCP_TWKILL_PERIOD       (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
-#define TCP_TWKILL_QUOTA        100
-static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS];
-static DEFINE_SPINLOCK(tw_death_lock);
-static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0);
-static void twkill_work(void *);
-static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL);
-static u32 twkill_thread_slots;
-/* Returns non-zero if quota exceeded.  */
-static int tcp_do_twkill_work(int slot, unsigned int quota)
-{
-        struct tcp_tw_bucket *tw;
-        struct hlist_node *node;
-        unsigned int killed;
-        int ret;
-        /* NOTE: compare this to previous version where lock
-         * was released after detaching chain. It was racy,
-         * because tw buckets are scheduled in not serialized context
-         * in 2.3 (with netfilter), and with softnet it is common, because
-         * soft irqs are not sequenced.
-         */
-        killed = 0;
-        ret = 0;
-rescan:
-        tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) {
-                __tw_del_dead_node(tw);
-                spin_unlock(&tw_death_lock);
-                tcp_timewait_kill(tw);
-                tcp_tw_put(tw);
-                killed++;
-                spin_lock(&tw_death_lock);
-                if (killed > quota) {
-                        ret = 1;
-                        break;
-                }
-                /* While we dropped tw_death_lock, another cpu may have
-                 * killed off the next TW bucket in the list, therefore
-                 * do a fresh re-read of the hlist head node with the
-                 * lock reacquired.  We still use the hlist traversal
-                 * macro in order to get the prefetches.
-                 */
-                goto rescan;
-        }
-        tcp_tw_count -= killed;
-        NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
-        return ret;
-}
-static void tcp_twkill(unsigned long dummy)
-{
-        int need_timer, ret;
-        spin_lock(&tw_death_lock);
-        if (tcp_tw_count == 0)
-                goto out;
-        need_timer = 0;
-        ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA);
-        if (ret) {
-                twkill_thread_slots |= (1 << tcp_tw_death_row_slot);
-                mb();
-                schedule_work(&tcp_twkill_work);
-                need_timer = 1;
-        } else {
-                /* We purged the entire slot, anything left?  */
-                if (tcp_tw_count)
-                        need_timer = 1;
-        }
-        tcp_tw_death_row_slot =
-                ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
-        if (need_timer)
-                mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD);
-out:
-        spin_unlock(&tw_death_lock);
-}
-extern void twkill_slots_invalid(void);
-static void twkill_work(void *dummy)
-{
-        int i;
-        if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8))
-                twkill_slots_invalid();
-        while (twkill_thread_slots) {
-                spin_lock_bh(&tw_death_lock);
-                for (i = 0; i < TCP_TWKILL_SLOTS; i++) {
-                        if (!(twkill_thread_slots & (1 << i)))
-                                continue;
-                        while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) {
-                                if (need_resched()) {
-                                        spin_unlock_bh(&tw_death_lock);
-                                        schedule();
-                                        spin_lock_bh(&tw_death_lock);
-                                }
-                        }
-                        twkill_thread_slots &= ~(1 << i);
-                }
-                spin_unlock_bh(&tw_death_lock);
-        }
-}
-/* These are always called from BH context.  See callers in
- * tcp_input.c to verify this.
- */
-/* This is for handling early-kills of TIME_WAIT sockets. */
-void tcp_tw_deschedule(struct tcp_tw_bucket *tw)
-{
-        spin_lock(&tw_death_lock);
-        if (tw_del_dead_node(tw)) {
-                tcp_tw_put(tw);
-                if (--tcp_tw_count == 0)
-                        del_timer(&tcp_tw_timer);
-        }
-        spin_unlock(&tw_death_lock);
-        tcp_timewait_kill(tw);
-}
-/* Short-time timewait calendar */
-static int tcp_twcal_hand = -1;
-static int tcp_twcal_jiffie;
-static void tcp_twcal_tick(unsigned long);
-static struct timer_list tcp_twcal_timer =
-                TIMER_INITIALIZER(tcp_twcal_tick, 0, 0);
-static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS];
-static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo)
-{
-        struct hlist_head *list;
-        int slot;
-        /* timeout := RTO * 3.5
-         *
-         * 3.5 = 1+2+0.5 to wait for two retransmits.
-         *
-         * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
-         * our ACK acking that FIN can be lost. If N subsequent retransmitted
-         * FINs (or previous seqments) are lost (probability of such event
-         * is p^(N+1), where p is probability to lose single packet and
-         * time to detect the loss is about RTO*(2^N - 1) with exponential
-         * backoff). Normal timewait length is calculated so, that we
-         * waited at least for one retransmitted FIN (maximal RTO is 120sec).
-         * [ BTW Linux. following BSD, violates this requirement waiting
-         *   only for 60sec, we should wait at least for 240 secs.
-         *   Well, 240 consumes too much of resources 8)
-         * ]
-         * This interval is not reduced to catch old duplicate and
-         * responces to our wandering segments living for two MSLs.
-         * However, if we use PAWS to detect
-         * old duplicates, we can reduce the interval to bounds required
-         * by RTO, rather than MSL. So, if peer understands PAWS, we
-         * kill tw bucket after 3.5*RTO (it is important that this number
-         * is greater than TS tick!) and detect old duplicates with help
-         * of PAWS.
-         */
-        slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK;
-        spin_lock(&tw_death_lock);
-        /* Unlink it, if it was scheduled */
-        if (tw_del_dead_node(tw))
-                tcp_tw_count--;
-        else
-                atomic_inc(&tw->tw_refcnt);
-        if (slot >= TCP_TW_RECYCLE_SLOTS) {
-                /* Schedule to slow timer */
-                if (timeo >= TCP_TIMEWAIT_LEN) {
-                        slot = TCP_TWKILL_SLOTS-1;
-                } else {
-                        slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD;
-                        if (slot >= TCP_TWKILL_SLOTS)
-                                slot = TCP_TWKILL_SLOTS-1;
-                }
-                tw->tw_ttd = jiffies + timeo;
-                slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1);
-                list = &tcp_tw_death_row[slot];
-        } else {
-                tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK);
-                if (tcp_twcal_hand < 0) {
-                        tcp_twcal_hand = 0;
-                        tcp_twcal_jiffie = jiffies;
-                        tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK);
-                        add_timer(&tcp_twcal_timer);
-                } else {
-                        if (time_after(tcp_twcal_timer.expires, jiffies + (slot<<TCP_TW_RECYCLE_TICK)))
-                                mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK));
-                        slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1);
-                }
-                list = &tcp_twcal_row[slot];
-        }
-        hlist_add_head(&tw->tw_death_node, list);
-        if (tcp_tw_count++ == 0)
-                mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
-        spin_unlock(&tw_death_lock);
-}
-void tcp_twcal_tick(unsigned long dummy)
-{
-        int n, slot;
-        unsigned long j;
-        unsigned long now = jiffies;
-        int killed = 0;
-        int adv = 0;
-        spin_lock(&tw_death_lock);
-        if (tcp_twcal_hand < 0)
-                goto out;
-        slot = tcp_twcal_hand;
-        j = tcp_twcal_jiffie;
-        for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) {
-                if (time_before_eq(j, now)) {
-                        struct hlist_node *node, *safe;
-                        struct tcp_tw_bucket *tw;
-                        tw_for_each_inmate_safe(tw, node, safe,
-                                           &tcp_twcal_row[slot]) {
-                                __tw_del_dead_node(tw);
-                                tcp_timewait_kill(tw);
-                                tcp_tw_put(tw);
-                                killed++;
-                        }
-                } else {
-                        if (!adv) {
-                                adv = 1;
-                                tcp_twcal_jiffie = j;
-                                tcp_twcal_hand = slot;
-                        }
-                        if (!hlist_empty(&tcp_twcal_row[slot])) {
-                                mod_timer(&tcp_twcal_timer, j);
-                                goto out;
-                        }
-                }
-                j += (1<<TCP_TW_RECYCLE_TICK);
-                slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1);
-        }
-        tcp_twcal_hand = -1;
-out:
-        if ((tcp_tw_count -= killed) == 0)
-                del_timer(&tcp_tw_timer);
-        NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
-        spin_unlock(&tw_death_lock);
-}
 /* This is not only more efficient than what we used to do, it eliminates
 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
 *
@@ -686,75 +344,27 @@ out:
 */
 struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
 {
-        /* allocate the newsk from the same slab of the master sock,
+        struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
-         * if not, at sk_free time we'll try to free it from the wrong
-         * slabcache (i.e. is it TCPv4 or v6?), this is handled thru sk->sk_prot -acme */
-        struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0);
-        if(newsk != NULL) {
+        if (newsk != NULL) {
-                struct inet_request_sock *ireq = inet_rsk(req);
+                const struct inet_request_sock *ireq = inet_rsk(req);
                struct tcp_request_sock *treq = tcp_rsk(req);
+                struct inet_connection_sock *newicsk = inet_csk(sk);
                struct tcp_sock *newtp;
-                struct sk_filter *filter;
-                memcpy(newsk, sk, sizeof(struct tcp_sock));
-                newsk->sk_state = TCP_SYN_RECV;
-                /* SANITY */
-                sk_node_init(&newsk->sk_node);
-                tcp_sk(newsk)->bind_hash = NULL;
-                /* Clone the TCP header template */
-                inet_sk(newsk)->dport = ireq->rmt_port;
-                sock_lock_init(newsk);
-                bh_lock_sock(newsk);
-                rwlock_init(&newsk->sk_dst_lock);
-                atomic_set(&newsk->sk_rmem_alloc, 0);
-                skb_queue_head_init(&newsk->sk_receive_queue);
-                atomic_set(&newsk->sk_wmem_alloc, 0);
-                skb_queue_head_init(&newsk->sk_write_queue);
-                atomic_set(&newsk->sk_omem_alloc, 0);
-                newsk->sk_wmem_queued = 0;
-                newsk->sk_forward_alloc = 0;
-                sock_reset_flag(newsk, SOCK_DONE);
-                newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
-                newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
-                newsk->sk_send_head = NULL;
-                rwlock_init(&newsk->sk_callback_lock);
-                skb_queue_head_init(&newsk->sk_error_queue);
-                newsk->sk_write_space = sk_stream_write_space;
-                if ((filter = newsk->sk_filter) != NULL)
-                        sk_filter_charge(newsk, filter);
-                if (unlikely(xfrm_sk_clone_policy(newsk))) {
-                        /* It is still raw copy of parent, so invalidate
-                         * destructor and make plain sk_free() */
-                        newsk->sk_destruct = NULL;
-                        sk_free(newsk);
-                        return NULL;
-                }
                /* Now setup tcp_sock */
                newtp = tcp_sk(newsk);
                newtp->pred_flags = 0;
                newtp->rcv_nxt = treq->rcv_isn + 1;
-                newtp->snd_nxt = treq->snt_isn + 1;
+                newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1;
-                newtp->snd_una = treq->snt_isn + 1;
-                newtp->snd_sml = treq->snt_isn + 1;
                tcp_prequeue_init(newtp);
                tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
-                newtp->retransmits = 0;
-                newtp->backoff = 0;
                newtp->srtt = 0;
                newtp->mdev = TCP_TIMEOUT_INIT;
-                newtp->rto = TCP_TIMEOUT_INIT;
+                newicsk->icsk_rto = TCP_TIMEOUT_INIT;
                newtp->packets_out = 0;
                newtp->left_out = 0;
@@ -774,9 +384,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                newtp->frto_counter = 0;
                newtp->frto_highmark = 0;
-                newtp->ca_ops = &tcp_reno;
+                newicsk->icsk_ca_ops = &tcp_reno;
-                tcp_set_ca_state(newtp, TCP_CA_Open);
+                tcp_set_ca_state(newsk, TCP_CA_Open);
                tcp_init_xmit_timers(newsk);
                skb_queue_head_init(&newtp->out_of_order_queue);
                newtp->rcv_wup = treq->rcv_isn + 1;
@@ -789,26 +399,12 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                newtp->rx_opt.dsack = 0;
                newtp->rx_opt.eff_sacks = 0;
-                newtp->probes_out = 0;
                newtp->rx_opt.num_sacks = 0;
                newtp->urg_data = 0;
-                /* Deinitialize accept_queue to trap illegal accesses. */
-                memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue));
-                /* Back to base struct sock members. */
-                newsk->sk_err = 0;
-                newsk->sk_priority = 0;
-                atomic_set(&newsk->sk_refcnt, 2);
-#ifdef INET_REFCNT_DEBUG
-                atomic_inc(&inet_sock_nr);
-#endif
-                atomic_inc(&tcp_sockets_allocated);
                if (sock_flag(newsk, SOCK_KEEPOPEN))
-                        tcp_reset_keepalive_timer(newsk,
+                        inet_csk_reset_keepalive_timer(newsk,
-                                                  keepalive_time_when(newtp));
+                                                       keepalive_time_when(newtp));
-                newsk->sk_socket = NULL;
-                newsk->sk_sleep = NULL;
                newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
                if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
@@ -838,7 +434,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                        newtp->tcp_header_len = sizeof(struct tcphdr);
                }
                if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
-                        newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
+                        newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
                newtp->rx_opt.mss_clamp = req->mss;
                TCP_ECN_openreq_child(newtp, req);
                if (newtp->ecn_flags&TCP_ECN_OK)
@@ -934,9 +530,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
           does sequence test, SYN is truncated, and thus we consider
           it a bare ACK.
-           If tp->defer_accept, we silently drop this bare ACK.  Otherwise,
+           If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
-           we create an established connection.  Both ends (listening sockets)
+           bare ACK.  Otherwise, we create an established connection.  Both
-           accept the new incoming connection and try to talk to each other. 8-)
+           ends (listening sockets) accept the new incoming connection and try
+           to talk to each other. 8-)
           Note: This case is both harmless, and rare.  Possibility is about the
           same as us discovering intelligent life on another plant tomorrow.
@@ -1003,7 +600,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
                        return NULL;
                /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
-                if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+                if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+                    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
                        inet_rsk(req)->acked = 1;
                        return NULL;
                }
@@ -1018,10 +616,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
                if (child == NULL)
                        goto listen_overflow;
-                tcp_synq_unlink(tp, req, prev);
+                inet_csk_reqsk_queue_unlink(sk, req, prev);
-                tcp_synq_removed(sk, req);
+                inet_csk_reqsk_queue_removed(sk, req);
-                tcp_acceptq_queue(sk, req, child);
+                inet_csk_reqsk_queue_add(sk, req, child);
                return child;
        listen_overflow:
@@ -1035,7 +633,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
                if (!(flg & TCP_FLAG_RST))
                        req->rsk_ops->send_reset(skb);
-                tcp_synq_drop(sk, req, prev);
+                inet_csk_reqsk_queue_drop(sk, req, prev);
                return NULL;
 }
@@ -1074,4 +672,3 @@ EXPORT_SYMBOL(tcp_check_req);
 EXPORT_SYMBOL(tcp_child_process);
 EXPORT_SYMBOL(tcp_create_openreq_child);
 EXPORT_SYMBOL(tcp_timewait_state_process);
-EXPORT_SYMBOL(tcp_tw_deschedule);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e3f8ea1bfa9c..6094db5e11be 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -105,18 +105,19 @@ static __u16 tcp_advertise_mss(struct sock *sk)
 /* RFC2861. Reset CWND after idle period longer RTO to "restart window".
 * This is the first part of cwnd validation mechanism. */
-static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst)
+static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        s32 delta = tcp_time_stamp - tp->lsndtime;
        u32 restart_cwnd = tcp_init_cwnd(tp, dst);
        u32 cwnd = tp->snd_cwnd;
-        tcp_ca_event(tp, CA_EVENT_CWND_RESTART);
+        tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
-        tp->snd_ssthresh = tcp_current_ssthresh(tp);
+        tp->snd_ssthresh = tcp_current_ssthresh(sk);
        restart_cwnd = min(restart_cwnd, cwnd);
-        while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd)
+        while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
                cwnd >>= 1;
        tp->snd_cwnd = max(cwnd, restart_cwnd);
        tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -126,26 +127,25 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst)
 static inline void tcp_event_data_sent(struct tcp_sock *tp,
                                       struct sk_buff *skb, struct sock *sk)
 {
-        u32 now = tcp_time_stamp;
+        struct inet_connection_sock *icsk = inet_csk(sk);
+        const u32 now = tcp_time_stamp;
-        if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
+        if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)
-                tcp_cwnd_restart(tp, __sk_dst_get(sk));
+                tcp_cwnd_restart(sk, __sk_dst_get(sk));
        tp->lsndtime = now;
        /* If it is a reply for ato after last received
         * packet, enter pingpong mode.
         */
-        if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato)
+        if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
-                tp->ack.pingpong = 1;
+                icsk->icsk_ack.pingpong = 1;
 }
 static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        tcp_dec_quickack_mode(sk, pkts);
+        inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
-        tcp_dec_quickack_mode(tp, pkts);
-        tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
 }
 /* Determine a window scaling and initial window to offer.
@@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 {
        if (skb != NULL) {
+                const struct inet_connection_sock *icsk = inet_csk(sk);
                struct inet_sock *inet = inet_sk(sk);
                struct tcp_sock *tp = tcp_sk(sk);
                struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -280,8 +281,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 #define SYSCTL_FLAG_SACK        0x4
                /* If congestion control is doing timestamping */
-                if (tp->ca_ops->rtt_sample)
+                if (icsk->icsk_ca_ops->rtt_sample)
-                        do_gettimeofday(&skb->stamp);
+                        __net_timestamp(skb);
                sysctl_flags = 0;
                if (tcb->flags & TCPCB_FLAG_SYN) {
@@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                }
                
                if (tcp_packets_in_flight(tp) == 0)
-                        tcp_ca_event(tp, CA_EVENT_TX_START);
+                        tcp_ca_event(sk, CA_EVENT_TX_START);
                th = (struct tcphdr *) skb_push(skb, tcp_header_size);
                skb->h.th = th;
@@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                if (err <= 0)
                        return err;
-                tcp_enter_cwr(tp);
+                tcp_enter_cwr(sk);
                /* NET_XMIT_CN is special. It does not guarantee,
                 * that this packet is lost. It tells that device
@@ -403,11 +404,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
                sk->sk_send_head = skb;
 }
-static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
+static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        if (skb->len <= mss_now ||
-        if (skb->len <= tp->mss_cache ||
            !(sk->sk_route_caps & NETIF_F_TSO)) {
                /* Avoid the costly divide in the normal
                 * non-TSO case.
@@ -417,10 +416,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
        } else {
                unsigned int factor;
-                factor = skb->len + (tp->mss_cache - 1);
+                factor = skb->len + (mss_now - 1);
-                factor /= tp->mss_cache;
+                factor /= mss_now;
                skb_shinfo(skb)->tso_segs = factor;
-                skb_shinfo(skb)->tso_size = tp->mss_cache;
+                skb_shinfo(skb)->tso_size = mss_now;
        }
 }
@@ -429,11 +428,11 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
 * packet to the list.  This won't be called frequently, I hope. 
 * Remember, these are still headerless SKBs at this point.
 */
-static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
+int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *buff;
-        int nsize;
+        int nsize, old_factor;
        u16 flags;
        nsize = skb_headlen(skb) - len;
@@ -484,30 +483,41 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
         * skbs, which it never sent before. --ANK
         */
        TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
-        buff->stamp = skb->stamp;
+        buff->tstamp = skb->tstamp;
        if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
                tp->lost_out -= tcp_skb_pcount(skb);
                tp->left_out -= tcp_skb_pcount(skb);
        }
-        /* Fix up tso_factor for both original and new SKB.  */
+        old_factor = tcp_skb_pcount(skb);
-        tcp_set_skb_tso_segs(sk, skb);
-        tcp_set_skb_tso_segs(sk, buff);
-        if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
+        /* Fix up tso_factor for both original and new SKB.  */
-                tp->lost_out += tcp_skb_pcount(skb);
+        tcp_set_skb_tso_segs(sk, skb, mss_now);
-                tp->left_out += tcp_skb_pcount(skb);
+        tcp_set_skb_tso_segs(sk, buff, mss_now);
-        }
-        if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
+        /* If this packet has been sent out already, we must
-                tp->lost_out += tcp_skb_pcount(buff);
+         * adjust the various packet counters.
-                tp->left_out += tcp_skb_pcount(buff);
+         */
+        if (after(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
+                int diff = old_factor - tcp_skb_pcount(skb) -
+                        tcp_skb_pcount(buff);
+                tp->packets_out -= diff;
+                if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
+                        tp->lost_out -= diff;
+                        tp->left_out -= diff;
+                }
+                if (diff > 0) {
+                        tp->fackets_out -= diff;
+                        if ((int)tp->fackets_out < 0)
+                                tp->fackets_out = 0;
+                }
        }
        /* Link BUFF into the send queue. */
        skb_header_release(buff);
-        __skb_append(skb, buff);
+        __skb_append(skb, buff, &sk->sk_write_queue);
        return 0;
 }
@@ -569,7 +579,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
         * factor and mss.
         */
        if (tcp_skb_pcount(skb) > 1)
-                tcp_set_skb_tso_segs(sk, skb);
+                tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));
        return 0;
 }
@@ -698,7 +708,7 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
                if (tp->packets_out > tp->snd_cwnd_used)
                        tp->snd_cwnd_used = tp->packets_out;
-                if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
+                if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
                        tcp_cwnd_application_limited(sk);
        }
 }
@@ -734,12 +744,14 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
 /* This must be invoked the first time we consider transmitting
 * SKB onto the wire.
 */
-static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
 {
        int tso_segs = tcp_skb_pcount(skb);
-        if (!tso_segs) {
+        if (!tso_segs ||
-                tcp_set_skb_tso_segs(sk, skb);
+            (tso_segs > 1 &&
+             skb_shinfo(skb)->tso_size != mss_now)) {
+                tcp_set_skb_tso_segs(sk, skb, mss_now);
                tso_segs = tcp_skb_pcount(skb);
        }
        return tso_segs;
@@ -817,7 +829,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
        struct tcp_sock *tp = tcp_sk(sk);
        unsigned int cwnd_quota;
-        tcp_init_tso_segs(sk, skb);
+        tcp_init_tso_segs(sk, skb, cur_mss);
        if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
                return 0;
@@ -854,14 +866,15 @@ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
 * know that all the data is in scatter-gather pages, and that the
 * packet has never been sent out before (and thus is not cloned).
 */
-static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now)
 {
        struct sk_buff *buff;
        int nlen = skb->len - len;
        u16 flags;
        /* All of a TSO frame must be composed of paged data.  */
-        BUG_ON(skb->len != skb->data_len);
+        if (skb->len != skb->data_len)
+                return tcp_fragment(sk, skb, len, mss_now);
        buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
        if (unlikely(buff == NULL))
@@ -887,12 +900,12 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
        skb_split(skb, buff, len);
        /* Fix up tso_factor for both original and new SKB.  */
-        tcp_set_skb_tso_segs(sk, skb);
+        tcp_set_skb_tso_segs(sk, skb, mss_now);
-        tcp_set_skb_tso_segs(sk, buff);
+        tcp_set_skb_tso_segs(sk, buff, mss_now);
        /* Link BUFF into the send queue. */
        skb_header_release(buff);
-        __skb_append(skb, buff);
+        __skb_append(skb, buff, &sk->sk_write_queue);
        return 0;
 }
@@ -904,12 +917,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
 */
 static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
 {
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 send_win, cong_win, limit, in_flight;
        if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
                return 0;
-        if (tp->ca_state != TCP_CA_Open)
+        if (icsk->icsk_ca_state != TCP_CA_Open)
                return 0;
        in_flight = tcp_packets_in_flight(tp);
@@ -924,10 +938,6 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_
        limit = min(send_win, cong_win);
-        /* If sk_send_head can be sent fully now, just do it.  */
-        if (skb->len <= limit)
-                return 0;
        if (sysctl_tcp_tso_win_divisor) {
                u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
@@ -972,19 +982,20 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
        if (unlikely(sk->sk_state == TCP_CLOSE))
                return 0;
-        skb = sk->sk_send_head;
-        if (unlikely(!skb))
-                return 0;
-        tso_segs = tcp_init_tso_segs(sk, skb);
-        cwnd_quota = tcp_cwnd_test(tp, skb);
-        if (unlikely(!cwnd_quota))
-                goto out;
        sent_pkts = 0;
-        while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) {
+        while ((skb = sk->sk_send_head)) {
+                unsigned int limit;
+                tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
                BUG_ON(!tso_segs);
+                cwnd_quota = tcp_cwnd_test(tp, skb);
+                if (!cwnd_quota)
+                        break;
+                if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
+                        break;
                if (tso_segs == 1) {
                        if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
                                                     (tcp_skb_is_last(sk, skb) ?
@@ -995,9 +1006,10 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
                                break;
                }
+                limit = mss_now;
                if (tso_segs > 1) {
-                        u32 limit = tcp_window_allows(tp, skb,
+                        limit = tcp_window_allows(tp, skb,
-                                                      mss_now, cwnd_quota);
+                                                  mss_now, cwnd_quota);
                        if (skb->len < limit) {
                                unsigned int trim = skb->len % mss_now;
@@ -1005,15 +1017,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
                                if (trim)
                                        limit = skb->len - trim;
                        }
-                        if (skb->len > limit) {
-                                if (tso_fragment(sk, skb, limit))
-                                        break;
-                        }
-                } else if (unlikely(skb->len > mss_now)) {
-                        if (unlikely(tcp_fragment(sk, skb,  mss_now)))
-                                break;
                }
+                if (skb->len > limit &&
+                    unlikely(tso_fragment(sk, skb, limit, mss_now)))
+                        break;
                TCP_SKB_CB(skb)->when = tcp_time_stamp;
                if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
@@ -1026,27 +1035,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
                tcp_minshall_update(tp, mss_now, skb);
                sent_pkts++;
-                /* Do not optimize this to use tso_segs. If we chopped up
-                 * the packet above, tso_segs will no longer be valid.
-                 */
-                cwnd_quota -= tcp_skb_pcount(skb);
-                BUG_ON(cwnd_quota < 0);
-                if (!cwnd_quota)
-                        break;
-                skb = sk->sk_send_head;
-                if (!skb)
-                        break;
-                tso_segs = tcp_init_tso_segs(sk, skb);
        }
        if (likely(sent_pkts)) {
                tcp_cwnd_validate(sk, tp);
                return 0;
        }
-out:
        return !tp->packets_out && sk->sk_send_head;
 }
@@ -1076,15 +1070,18 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
        BUG_ON(!skb || skb->len < mss_now);
-        tso_segs = tcp_init_tso_segs(sk, skb);
+        tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
        cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
        if (likely(cwnd_quota)) {
+                unsigned int limit;
                BUG_ON(!tso_segs);
+                limit = mss_now;
                if (tso_segs > 1) {
-                        u32 limit = tcp_window_allows(tp, skb,
+                        limit = tcp_window_allows(tp, skb,
-                                                      mss_now, cwnd_quota);
+                                                  mss_now, cwnd_quota);
                        if (skb->len < limit) {
                                unsigned int trim = skb->len % mss_now;
@@ -1092,15 +1089,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
                                if (trim)
                                        limit = skb->len - trim;
                        }
-                        if (skb->len > limit) {
-                                if (unlikely(tso_fragment(sk, skb, limit)))
-                                        return;
-                        }
-                } else if (unlikely(skb->len > mss_now)) {
-                        if (unlikely(tcp_fragment(sk, skb, mss_now)))
-                                return;
                }
+                if (skb->len > limit &&
+                    unlikely(tso_fragment(sk, skb, limit, mss_now)))
+                        return;
                /* Send it out now. */
                TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -1166,6 +1160,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 */
 u32 __tcp_select_window(struct sock *sk)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        /* MSS for the peer's data.  Previous verions used mss_clamp
         * here.  I don't know if the value based on our guesses
@@ -1173,7 +1168,7 @@ u32 __tcp_select_window(struct sock *sk)
         * but may be worse for the performance because of rcv_mss
         * fluctuations.  --SAW  1998/11/1
         */
-        int mss = tp->ack.rcv_mss;
+        int mss = icsk->icsk_ack.rcv_mss;
        int free_space = tcp_space(sk);
        int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
        int window;
@@ -1182,7 +1177,7 @@ u32 __tcp_select_window(struct sock *sk)
                mss = full_space; 
        if (free_space < full_space/2) {
-                tp->ack.quick = 0;
+                icsk->icsk_ack.quick = 0;
                if (tcp_memory_pressure)
                        tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
@@ -1257,7 +1252,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
                       tcp_skb_pcount(next_skb) != 1);
                /* Ok.  We will be able to collapse the packet. */
-                __skb_unlink(next_skb, next_skb->list);
+                __skb_unlink(next_skb, &sk->sk_write_queue);
                memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1305,6 +1300,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 */ 
 void tcp_simple_retransmit(struct sock *sk)
 {
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        unsigned int mss = tcp_current_mss(sk, 0);
@@ -1335,12 +1331,12 @@ void tcp_simple_retransmit(struct sock *sk)
         * in network, but units changed and effective
         * cwnd/ssthresh really reduced now.
         */
-        if (tp->ca_state != TCP_CA_Loss) {
+        if (icsk->icsk_ca_state != TCP_CA_Loss) {
                tp->high_seq = tp->snd_nxt;
-                tp->snd_ssthresh = tcp_current_ssthresh(tp);
+                tp->snd_ssthresh = tcp_current_ssthresh(sk);
                tp->prior_ssthresh = 0;
                tp->undo_marker = 0;
-                tcp_set_ca_state(tp, TCP_CA_Loss);
+                tcp_set_ca_state(sk, TCP_CA_Loss);
        }
        tcp_xmit_retransmit_queue(sk);
 }
@@ -1365,12 +1361,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
        if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
                if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
                        BUG();
-                if (sk->sk_route_caps & NETIF_F_TSO) {
-                        sk->sk_route_caps &= ~NETIF_F_TSO;
-                        sock_set_flag(sk, SOCK_NO_LARGESEND);
-                }
                if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
                        return -ENOMEM;
        }
@@ -1385,16 +1375,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                return -EAGAIN;
        if (skb->len > cur_mss) {
-                int old_factor = tcp_skb_pcount(skb);
+                if (tcp_fragment(sk, skb, cur_mss, cur_mss))
-                int new_factor;
-                if (tcp_fragment(sk, skb, cur_mss))
                        return -ENOMEM; /* We'll try again later. */
-                /* New SKB created, account for it. */
-                new_factor = tcp_skb_pcount(skb);
-                tp->packets_out -= old_factor - new_factor;
-                tp->packets_out += tcp_skb_pcount(skb->next);
        }
        /* Collapse two adjacent packets if worthwhile and we can. */
@@ -1474,6 +1456,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 */
 void tcp_xmit_retransmit_queue(struct sock *sk)
 {
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        int packet_cnt = tp->lost_out;
@@ -1497,14 +1480,16 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                                if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
                                        if (tcp_retransmit_skb(sk, skb))
                                                return;
-                                        if (tp->ca_state != TCP_CA_Loss)
+                                        if (icsk->icsk_ca_state != TCP_CA_Loss)
                                                NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
                                        else
                                                NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
                                        if (skb ==
                                            skb_peek(&sk->sk_write_queue))
-                                                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+                                                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                                                          inet_csk(sk)->icsk_rto,
+                                                                          TCP_RTO_MAX);
                                }
                                packet_cnt -= tcp_skb_pcount(skb);
@@ -1517,7 +1502,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
        /* OK, demanded retransmission is finished. */
        /* Forward retransmissions are possible only during Recovery. */
-        if (tp->ca_state != TCP_CA_Recovery)
+        if (icsk->icsk_ca_state != TCP_CA_Recovery)
                return;
        /* No forward retransmissions in Reno are possible. */
@@ -1557,7 +1542,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                        break;
                if (skb == skb_peek(&sk->sk_write_queue))
-                        tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                                  inet_csk(sk)->icsk_rto,
+                                                  TCP_RTO_MAX);
                NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
        }
@@ -1586,7 +1573,7 @@ void tcp_send_fin(struct sock *sk)
        } else {
                /* Socket is locked, keep trying until memory is available. */
                for (;;) {
-                        skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
+                        skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
                        if (skb)
                                break;
                        yield();
@@ -1793,8 +1780,8 @@ static inline void tcp_connect_init(struct sock *sk)
        tp->rcv_wup = 0;
        tp->copied_seq = 0;
-        tp->rto = TCP_TIMEOUT_INIT;
+        inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
-        tp->retransmits = 0;
+        inet_csk(sk)->icsk_retransmits = 0;
        tcp_clear_retrans(tp);
 }
@@ -1808,7 +1795,7 @@ int tcp_connect(struct sock *sk)
        tcp_connect_init(sk);
-        buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
+        buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
        if (unlikely(buff == NULL))
                return -ENOBUFS;
@@ -1837,7 +1824,8 @@ int tcp_connect(struct sock *sk)
        TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
        /* Timer for repeating the SYN until an answer. */
-        tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
        return 0;
 }
@@ -1847,20 +1835,21 @@ int tcp_connect(struct sock *sk)
 */
 void tcp_send_delayed_ack(struct sock *sk)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
-        int ato = tp->ack.ato;
+        int ato = icsk->icsk_ack.ato;
        unsigned long timeout;
        if (ato > TCP_DELACK_MIN) {
+                const struct tcp_sock *tp = tcp_sk(sk);
                int max_ato = HZ/2;
-                if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED))
+                if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
                        max_ato = TCP_DELACK_MAX;
                /* Slow path, intersegment interval is "high". */
                /* If some rtt estimate is known, use it to bound delayed ack.
-                 * Do not use tp->rto here, use results of rtt measurements
+                 * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
                 * directly.
                 */
                if (tp->srtt) {
@@ -1877,21 +1866,22 @@ void tcp_send_delayed_ack(struct sock *sk)
        timeout = jiffies + ato;
        /* Use new timeout only if there wasn't a older one earlier. */
-        if (tp->ack.pending&TCP_ACK_TIMER) {
+        if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
                /* If delack timer was blocked or is about to expire,
                 * send ACK now.
                 */
-                if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) {
+                if (icsk->icsk_ack.blocked ||
+                    time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
                        tcp_send_ack(sk);
                        return;
                }
-                if (!time_before(timeout, tp->ack.timeout))
+                if (!time_before(timeout, icsk->icsk_ack.timeout))
-                        timeout = tp->ack.timeout;
+                        timeout = icsk->icsk_ack.timeout;
        }
-        tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER;
+        icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
-        tp->ack.timeout = timeout;
+        icsk->icsk_ack.timeout = timeout;
-        sk_reset_timer(sk, &tp->delack_timer, timeout);
+        sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
 }
 /* This routine sends an ack and also updates the window. */
@@ -1908,9 +1898,10 @@ void tcp_send_ack(struct sock *sk)
                 */
                buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
                if (buff == NULL) {
-                        tcp_schedule_ack(tp);
+                        inet_csk_schedule_ack(sk);
-                        tp->ack.ato = TCP_ATO_MIN;
+                        inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
-                        tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
+                        inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                                  TCP_DELACK_MAX, TCP_RTO_MAX);
                        return;
                }
@@ -1991,16 +1982,10 @@ int tcp_write_wakeup(struct sock *sk)
                            skb->len > mss) {
                                seg_size = min(seg_size, mss);
                                TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
-                                if (tcp_fragment(sk, skb, seg_size))
+                                if (tcp_fragment(sk, skb, seg_size, mss))
                                        return -1;
-                                /* SWS override triggered forced fragmentation.
-                                 * Disable TSO, the connection is too sick. */
-                                if (sk->sk_route_caps & NETIF_F_TSO) {
-                                        sock_set_flag(sk, SOCK_NO_LARGESEND);
-                                        sk->sk_route_caps &= ~NETIF_F_TSO;
-                                }
                        } else if (!tcp_skb_pcount(skb))
-                                tcp_set_skb_tso_segs(sk, skb);
+                                tcp_set_skb_tso_segs(sk, skb, mss);
                        TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
                        TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2024,6 +2009,7 @@ int tcp_write_wakeup(struct sock *sk)
 */
 void tcp_send_probe0(struct sock *sk)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        int err;
@@ -2031,28 +2017,31 @@ void tcp_send_probe0(struct sock *sk)
        if (tp->packets_out || !sk->sk_send_head) {
                /* Cancel probe timer, if it is not required. */
-                tp->probes_out = 0;
+                icsk->icsk_probes_out = 0;
-                tp->backoff = 0;
+                icsk->icsk_backoff = 0;
                return;
        }
        if (err <= 0) {
-                if (tp->backoff < sysctl_tcp_retries2)
+                if (icsk->icsk_backoff < sysctl_tcp_retries2)
-                        tp->backoff++;
+                        icsk->icsk_backoff++;
-                tp->probes_out++;
+                icsk->icsk_probes_out++;
-                tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, 
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 
-                                      min(tp->rto << tp->backoff, TCP_RTO_MAX));
+                                          min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
+                                          TCP_RTO_MAX);
        } else {
                /* If packet was not sent due to local congestion,
-                 * do not backoff and do not remember probes_out.
+                 * do not backoff and do not remember icsk_probes_out.
                 * Let local senders to fight for local resources.
                 *
                 * Use accumulated backoff yet.
                 */
-                if (!tp->probes_out)
+                if (!icsk->icsk_probes_out)
-                        tp->probes_out=1;
+                        icsk->icsk_probes_out = 1;
-                tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, 
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 
-                                      min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL));
+                                          min(icsk->icsk_rto << icsk->icsk_backoff,
+                                              TCP_RESOURCE_PROBE_INTERVAL),
+                                          TCP_RTO_MAX);
        }
 }
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 70e108e15c71..327770bf5522 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -16,9 +16,10 @@
 #define TCP_SCALABLE_AI_CNT     50U
 #define TCP_SCALABLE_MD_SCALE   3
-static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
                                    u32 in_flight, int flag)
 {
+        struct tcp_sock *tp = tcp_sk(sk);
        if (in_flight < tp->snd_cwnd)
                return;
@@ -35,8 +36,9 @@ static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt,
        tp->snd_cwnd_stamp = tcp_time_stamp;
 }
-static u32 tcp_scalable_ssthresh(struct tcp_sock *tp)
+static u32 tcp_scalable_ssthresh(struct sock *sk)
 {
+        const struct tcp_sock *tp = tcp_sk(sk);
        return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
 }
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 0084227438c2..415ee47ac1c5 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -36,49 +36,13 @@ static void tcp_write_timer(unsigned long);
 static void tcp_delack_timer(unsigned long);
 static void tcp_keepalive_timer (unsigned long data);
-#ifdef TCP_DEBUG
-const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
-EXPORT_SYMBOL(tcp_timer_bug_msg);
-#endif
-/*
- * Using different timers for retransmit, delayed acks and probes
- * We may wish use just one timer maintaining a list of expire jiffies 
- * to optimize.
- */
 void tcp_init_xmit_timers(struct sock *sk)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
+                                  &tcp_keepalive_timer);
-        init_timer(&tp->retransmit_timer);
-        tp->retransmit_timer.function=&tcp_write_timer;
-        tp->retransmit_timer.data = (unsigned long) sk;
-        tp->pending = 0;
-        init_timer(&tp->delack_timer);
-        tp->delack_timer.function=&tcp_delack_timer;
-        tp->delack_timer.data = (unsigned long) sk;
-        tp->ack.pending = 0;
-        init_timer(&sk->sk_timer);
-        sk->sk_timer.function   = &tcp_keepalive_timer;
-        sk->sk_timer.data       = (unsigned long)sk;
 }
-void tcp_clear_xmit_timers(struct sock *sk)
+EXPORT_SYMBOL(tcp_init_xmit_timers);
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        tp->pending = 0;
-        sk_stop_timer(sk, &tp->retransmit_timer);
-        tp->ack.pending = 0;
-        tp->ack.blocked = 0;
-        sk_stop_timer(sk, &tp->delack_timer);
-        sk_stop_timer(sk, &sk->sk_timer);
-}
 static void tcp_write_err(struct sock *sk)
 {
@@ -155,15 +119,15 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
 /* A write timeout has occurred. Process the after effects. */
 static int tcp_write_timeout(struct sock *sk)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        const struct inet_connection_sock *icsk = inet_csk(sk);
        int retry_until;
        if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
-                if (tp->retransmits)
+                if (icsk->icsk_retransmits)
                        dst_negative_advice(&sk->sk_dst_cache);
-                retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
+                retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
        } else {
-                if (tp->retransmits >= sysctl_tcp_retries1) {
+                if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
                        /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
                           hole detection. :-(
@@ -189,16 +153,16 @@ static int tcp_write_timeout(struct sock *sk)
                retry_until = sysctl_tcp_retries2;
                if (sock_flag(sk, SOCK_DEAD)) {
-                        int alive = (tp->rto < TCP_RTO_MAX);
+                        const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
 
                        retry_until = tcp_orphan_retries(sk, alive);
-                        if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
+                        if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until))
                                return 1;
                }
        }
-        if (tp->retransmits >= retry_until) {
+        if (icsk->icsk_retransmits >= retry_until) {
                /* Has it gone just too far? */
                tcp_write_err(sk);
                return 1;
@@ -210,26 +174,27 @@ static void tcp_delack_timer(unsigned long data)
 {
        struct sock *sk = (struct sock*)data;
        struct tcp_sock *tp = tcp_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
        bh_lock_sock(sk);
        if (sock_owned_by_user(sk)) {
                /* Try again later. */
-                tp->ack.blocked = 1;
+                icsk->icsk_ack.blocked = 1;
                NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
-                sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN);
+                sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
                goto out_unlock;
        }
        sk_stream_mem_reclaim(sk);
-        if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
+        if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
                goto out;
-        if (time_after(tp->ack.timeout, jiffies)) {
+        if (time_after(icsk->icsk_ack.timeout, jiffies)) {
-                sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
+                sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
                goto out;
        }
-        tp->ack.pending &= ~TCP_ACK_TIMER;
+        icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
        if (!skb_queue_empty(&tp->ucopy.prequeue)) {
                struct sk_buff *skb;
@@ -242,16 +207,16 @@ static void tcp_delack_timer(unsigned long data)
                tp->ucopy.memory = 0;
        }
-        if (tcp_ack_scheduled(tp)) {
+        if (inet_csk_ack_scheduled(sk)) {
-                if (!tp->ack.pingpong) {
+                if (!icsk->icsk_ack.pingpong) {
                        /* Delayed ACK missed: inflate ATO. */
-                        tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
+                        icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
                } else {
                        /* Delayed ACK missed: leave pingpong mode and
                         * deflate ATO.
                         */
-                        tp->ack.pingpong = 0;
+                        icsk->icsk_ack.pingpong = 0;
-                        tp->ack.ato = TCP_ATO_MIN;
+                        icsk->icsk_ack.ato      = TCP_ATO_MIN;
                }
                tcp_send_ack(sk);
                NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
@@ -268,11 +233,12 @@ out_unlock:
 static void tcp_probe_timer(struct sock *sk)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        int max_probes;
        if (tp->packets_out || !sk->sk_send_head) {
-                tp->probes_out = 0;
+                icsk->icsk_probes_out = 0;
                return;
        }
@@ -283,7 +249,7 @@ static void tcp_probe_timer(struct sock *sk)
         * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
         * this behaviour in Solaris down as a bug fix. [AC]
         *
-         * Let me to explain. probes_out is zeroed by incoming ACKs
+         * Let me to explain. icsk_probes_out is zeroed by incoming ACKs
         * even if they advertise zero window. Hence, connection is killed only
         * if we received no ACKs for normal connection timeout. It is not killed
         * only because window stays zero for some time, window may be zero
@@ -294,15 +260,15 @@ static void tcp_probe_timer(struct sock *sk)
        max_probes = sysctl_tcp_retries2;
        if (sock_flag(sk, SOCK_DEAD)) {
-                int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
+                const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX);
 
                max_probes = tcp_orphan_retries(sk, alive);
-                if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
+                if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes))
                        return;
        }
-        if (tp->probes_out > max_probes) {
+        if (icsk->icsk_probes_out > max_probes) {
                tcp_write_err(sk);
        } else {
                /* Only send another probe if we didn't close things up. */
@@ -317,6 +283,7 @@ static void tcp_probe_timer(struct sock *sk)
 static void tcp_retransmit_timer(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
        if (!tp->packets_out)
                goto out;
@@ -351,20 +318,21 @@ static void tcp_retransmit_timer(struct sock *sk)
        if (tcp_write_timeout(sk))
                goto out;
-        if (tp->retransmits == 0) {
+        if (icsk->icsk_retransmits == 0) {
-                if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
+                if (icsk->icsk_ca_state == TCP_CA_Disorder ||
+                    icsk->icsk_ca_state == TCP_CA_Recovery) {
                        if (tp->rx_opt.sack_ok) {
-                                if (tp->ca_state == TCP_CA_Recovery)
+                                if (icsk->icsk_ca_state == TCP_CA_Recovery)
                                        NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
                                else
                                        NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
                        } else {
-                                if (tp->ca_state == TCP_CA_Recovery)
+                                if (icsk->icsk_ca_state == TCP_CA_Recovery)
                                        NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
                                else
                                        NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
                        }
-                } else if (tp->ca_state == TCP_CA_Loss) {
+                } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
                        NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
                } else {
                        NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
@@ -381,10 +349,11 @@ static void tcp_retransmit_timer(struct sock *sk)
                /* Retransmission failed because of local congestion,
                 * do not backoff.
                 */
-                if (!tp->retransmits)
+                if (!icsk->icsk_retransmits)
-                        tp->retransmits=1;
+                        icsk->icsk_retransmits = 1;
-                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
+                inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-                                     min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
+                                          min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
+                                          TCP_RTO_MAX);
                goto out;
        }
@@ -403,13 +372,13 @@ static void tcp_retransmit_timer(struct sock *sk)
         * implemented ftp to mars will work nicely. We will have to fix
         * the 120 second clamps though!
         */
-        tp->backoff++;
+        icsk->icsk_backoff++;
-        tp->retransmits++;
+        icsk->icsk_retransmits++;
 out_reset_timer:
-        tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
+        icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
-        tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
-        if (tp->retransmits > sysctl_tcp_retries1)
+        if (icsk->icsk_retransmits > sysctl_tcp_retries1)
                __sk_dst_reset(sk);
 out:;
@@ -418,32 +387,32 @@ out:;
 static void tcp_write_timer(unsigned long data)
 {
        struct sock *sk = (struct sock*)data;
-        struct tcp_sock *tp = tcp_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
        int event;
        bh_lock_sock(sk);
        if (sock_owned_by_user(sk)) {
                /* Try again later */
-                sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20));
+                sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20));
                goto out_unlock;
        }
-        if (sk->sk_state == TCP_CLOSE || !tp->pending)
+        if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
                goto out;
-        if (time_after(tp->timeout, jiffies)) {
+        if (time_after(icsk->icsk_timeout, jiffies)) {
-                sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
+                sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
                goto out;
        }
-        event = tp->pending;
+        event = icsk->icsk_pending;
-        tp->pending = 0;
+        icsk->icsk_pending = 0;
        switch (event) {
-        case TCP_TIME_RETRANS:
+        case ICSK_TIME_RETRANS:
                tcp_retransmit_timer(sk);
                break;
-        case TCP_TIME_PROBE0:
+        case ICSK_TIME_PROBE0:
                tcp_probe_timer(sk);
                break;
        }
@@ -462,96 +431,8 @@ out_unlock:
 static void tcp_synack_timer(struct sock *sk)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL,
-        struct listen_sock *lopt = tp->accept_queue.listen_opt;
+                                   TCP_TIMEOUT_INIT, TCP_RTO_MAX);
-        int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
-        int thresh = max_retries;
-        unsigned long now = jiffies;
-        struct request_sock **reqp, *req;
-        int i, budget;
-        if (lopt == NULL || lopt->qlen == 0)
-                return;
-        /* Normally all the openreqs are young and become mature
-         * (i.e. converted to established socket) for first timeout.
-         * If synack was not acknowledged for 3 seconds, it means
-         * one of the following things: synack was lost, ack was lost,
-         * rtt is high or nobody planned to ack (i.e. synflood).
-         * When server is a bit loaded, queue is populated with old
-         * open requests, reducing effective size of queue.
-         * When server is well loaded, queue size reduces to zero
-         * after several minutes of work. It is not synflood,
-         * it is normal operation. The solution is pruning
-         * too old entries overriding normal timeout, when
-         * situation becomes dangerous.
-         *
-         * Essentially, we reserve half of room for young
-         * embrions; and abort old ones without pity, if old
-         * ones are about to clog our table.
-         */
-        if (lopt->qlen>>(lopt->max_qlen_log-1)) {
-                int young = (lopt->qlen_young<<1);
-                while (thresh > 2) {
-                        if (lopt->qlen < young)
-                                break;
-                        thresh--;
-                        young <<= 1;
-                }
-        }
-        if (tp->defer_accept)
-                max_retries = tp->defer_accept;
-        budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
-        i = lopt->clock_hand;
-        do {
-                reqp=&lopt->syn_table[i];
-                while ((req = *reqp) != NULL) {
-                        if (time_after_eq(now, req->expires)) {
-                                if ((req->retrans < thresh ||
-                                     (inet_rsk(req)->acked && req->retrans < max_retries))
-                                    && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) {
-                                        unsigned long timeo;
-                                        if (req->retrans++ == 0)
-                                                lopt->qlen_young--;
-                                        timeo = min((TCP_TIMEOUT_INIT << req->retrans),
-                                                    TCP_RTO_MAX);
-                                        req->expires = now + timeo;
-                                        reqp = &req->dl_next;
-                                        continue;
-                                }
-                                /* Drop this request */
-                                tcp_synq_unlink(tp, req, reqp);
-                                reqsk_queue_removed(&tp->accept_queue, req);
-                                reqsk_free(req);
-                                continue;
-                        }
-                        reqp = &req->dl_next;
-                }
-                i = (i+1)&(TCP_SYNQ_HSIZE-1);
-        } while (--budget > 0);
-        lopt->clock_hand = i;
-        if (lopt->qlen)
-                tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
-}
-void tcp_delete_keepalive_timer (struct sock *sk)
-{
-        sk_stop_timer(sk, &sk->sk_timer);
-}
-void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
-{
-        sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
 }
 void tcp_set_keepalive(struct sock *sk, int val)
@@ -560,15 +441,16 @@ void tcp_set_keepalive(struct sock *sk, int val)
                return;
        if (val && !sock_flag(sk, SOCK_KEEPOPEN))
-                tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
+                inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
        else if (!val)
-                tcp_delete_keepalive_timer(sk);
+                inet_csk_delete_keepalive_timer(sk);
 }
 static void tcp_keepalive_timer (unsigned long data)
 {
        struct sock *sk = (struct sock *) data;
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        __u32 elapsed;
@@ -576,7 +458,7 @@ static void tcp_keepalive_timer (unsigned long data)
        bh_lock_sock(sk);
        if (sock_owned_by_user(sk)) {
                /* Try again later. */ 
-                tcp_reset_keepalive_timer (sk, HZ/20);
+                inet_csk_reset_keepalive_timer (sk, HZ/20);
                goto out;
        }
@@ -587,7 +469,7 @@ static void tcp_keepalive_timer (unsigned long data)
        if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
                if (tp->linger2 >= 0) {
-                        int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
+                        const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
                        if (tmo > 0) {
                                tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
@@ -610,14 +492,14 @@ static void tcp_keepalive_timer (unsigned long data)
        elapsed = tcp_time_stamp - tp->rcv_tstamp;
        if (elapsed >= keepalive_time_when(tp)) {
-                if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
+                if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) ||
-                     (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
+                     (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
                        tcp_send_active_reset(sk, GFP_ATOMIC);
                        tcp_write_err(sk);
                        goto out;
                }
                if (tcp_write_wakeup(sk) <= 0) {
-                        tp->probes_out++;
+                        icsk->icsk_probes_out++;
                        elapsed = keepalive_intvl_when(tp);
                } else {
                        /* If keepalive was lost due to local congestion,
@@ -634,7 +516,7 @@ static void tcp_keepalive_timer (unsigned long data)
        sk_stream_mem_reclaim(sk);
 resched:
-        tcp_reset_keepalive_timer (sk, elapsed);
+        inet_csk_reset_keepalive_timer (sk, elapsed);
        goto out;
 death:  
@@ -644,8 +526,3 @@ out:
        bh_unlock_sock(sk);
        sock_put(sk);
 }
-EXPORT_SYMBOL(tcp_clear_xmit_timers);
-EXPORT_SYMBOL(tcp_delete_keepalive_timer);
-EXPORT_SYMBOL(tcp_init_xmit_timers);
-EXPORT_SYMBOL(tcp_reset_keepalive_timer);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 9bd443db5193..93c5f92070f9 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -35,7 +35,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/tcp_diag.h>
+#include <linux/inet_diag.h>
 #include <net/tcp.h>
@@ -82,9 +82,10 @@ struct vegas {
 * Instead we must wait until the completion of an RTT during
 * which we actually receive ACKs.
 */
-static inline void vegas_enable(struct tcp_sock *tp)
+static inline void vegas_enable(struct sock *sk)
 {
-        struct vegas *vegas = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct vegas *vegas = inet_csk_ca(sk);
        /* Begin taking Vegas samples next time we send something. */
        vegas->doing_vegas_now = 1;
@@ -97,19 +98,19 @@ static inline void vegas_enable(struct tcp_sock *tp)
 }
 /* Stop taking Vegas samples for now. */
-static inline void vegas_disable(struct tcp_sock *tp)
+static inline void vegas_disable(struct sock *sk)
 {
-        struct vegas *vegas = tcp_ca(tp);
+        struct vegas *vegas = inet_csk_ca(sk);
        vegas->doing_vegas_now = 0;
 }
-static void tcp_vegas_init(struct tcp_sock *tp)
+static void tcp_vegas_init(struct sock *sk)
 {
-        struct vegas *vegas = tcp_ca(tp);
+        struct vegas *vegas = inet_csk_ca(sk);
        vegas->baseRTT = 0x7fffffff;
-        vegas_enable(tp);
+        vegas_enable(sk);
 }
 /* Do RTT sampling needed for Vegas.
@@ -120,9 +121,9 @@ static void tcp_vegas_init(struct tcp_sock *tp)
 *   o min-filter RTT samples from a much longer window (forever for now)
 *     to find the propagation delay (baseRTT)
 */
-static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt)
+static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
 {
-        struct vegas *vegas = tcp_ca(tp);
+        struct vegas *vegas = inet_csk_ca(sk);
        u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
        /* Filter to find propagation delay: */
@@ -136,13 +137,13 @@ static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt)
        vegas->cntRTT++;
 }
-static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state)
+static void tcp_vegas_state(struct sock *sk, u8 ca_state)
 {
        if (ca_state == TCP_CA_Open)
-                vegas_enable(tp);
+                vegas_enable(sk);
        else
-                vegas_disable(tp);
+                vegas_disable(sk);
 }
 /*
@@ -154,20 +155,21 @@ static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state)
 * packets, _then_ we can make Vegas calculations
 * again.
 */
-static void tcp_vegas_cwnd_event(struct tcp_sock *tp, enum tcp_ca_event event)
+static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 {
        if (event == CA_EVENT_CWND_RESTART ||
            event == CA_EVENT_TX_START)
-                tcp_vegas_init(tp);
+                tcp_vegas_init(sk);
 }
-static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
                                 u32 seq_rtt, u32 in_flight, int flag)
 {
-        struct vegas *vegas = tcp_ca(tp);
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct vegas *vegas = inet_csk_ca(sk);
        if (!vegas->doing_vegas_now)
-                return tcp_reno_cong_avoid(tp, ack, seq_rtt, in_flight, flag);
+                return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
        /* The key players are v_beg_snd_una and v_beg_snd_nxt.
         *
@@ -219,7 +221,7 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
                 * but that's not too awful, since we're taking the min,
                 * rather than averaging.
                 */
-                tcp_vegas_rtt_calc(tp, seq_rtt*1000);
+                tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
                /* We do the Vegas calculations only if we got enough RTT
                 * samples that we can be reasonably sure that we got
@@ -359,14 +361,14 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack,
 }
 /* Extract info for Tcp socket info provided via netlink. */
-static void tcp_vegas_get_info(struct tcp_sock *tp, u32 ext,
+static void tcp_vegas_get_info(struct sock *sk, u32 ext,
                               struct sk_buff *skb)
 {
-        const struct vegas *ca = tcp_ca(tp);
+        const struct vegas *ca = inet_csk_ca(sk);
-        if (ext & (1<<(TCPDIAG_VEGASINFO-1))) {
+        if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
                struct tcpvegas_info *info;
-                info = RTA_DATA(__RTA_PUT(skb, TCPDIAG_VEGASINFO,
+                info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
                                          sizeof(*info)));
                info->tcpv_enabled = ca->doing_vegas_now;
@@ -393,7 +395,7 @@ static struct tcp_congestion_ops tcp_vegas = {
 static int __init tcp_vegas_register(void)
 {
-        BUG_ON(sizeof(struct vegas) > TCP_CA_PRIV_SIZE);
+        BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
        tcp_register_congestion_control(&tcp_vegas);
        return 0;
 }
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index ef827242c940..0c340c3756c2 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -8,7 +8,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/tcp_diag.h>
+#include <linux/inet_diag.h>
 #include <net/tcp.h>
 /* TCP Westwood structure */
@@ -40,9 +40,9 @@ struct westwood {
 * way as soon as possible. It will reasonably happen within the first
 * RTT period of the connection lifetime.
 */
-static void tcp_westwood_init(struct tcp_sock *tp)
+static void tcp_westwood_init(struct sock *sk)
 {
-        struct westwood *w = tcp_ca(tp);
+        struct westwood *w = inet_csk_ca(sk);
        w->bk = 0;
        w->bw_ns_est = 0;
@@ -51,7 +51,7 @@ static void tcp_westwood_init(struct tcp_sock *tp)
        w->cumul_ack = 0;
        w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT;
        w->rtt_win_sx = tcp_time_stamp;
-        w->snd_una = tp->snd_una;
+        w->snd_una = tcp_sk(sk)->snd_una;
 }
 /*
@@ -74,11 +74,11 @@ static inline void westwood_filter(struct westwood *w, u32 delta)
 * Called after processing group of packets.
 * but all westwood needs is the last sample of srtt.
 */
-static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt)
+static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
 {
-        struct westwood *w = tcp_ca(tp);
+        struct westwood *w = inet_csk_ca(sk);
        if (cnt > 0)
-                w->rtt = tp->srtt >> 3;
+                w->rtt = tcp_sk(sk)->srtt >> 3;
 }
 /*
@@ -86,9 +86,9 @@ static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt)
 * It updates RTT evaluation window if it is the right moment to do
 * it. If so it calls filter for evaluating bandwidth.
 */
-static void westwood_update_window(struct tcp_sock *tp)
+static void westwood_update_window(struct sock *sk)
 {
-        struct westwood *w = tcp_ca(tp);
+        struct westwood *w = inet_csk_ca(sk);
        s32 delta = tcp_time_stamp - w->rtt_win_sx;
        /*
@@ -114,11 +114,12 @@ static void westwood_update_window(struct tcp_sock *tp)
 * header prediction is successful. In such case in fact update is
 * straight forward and doesn't need any particular care.
 */
-static inline void westwood_fast_bw(struct tcp_sock *tp)
+static inline void westwood_fast_bw(struct sock *sk)
 {
-        struct westwood *w = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct westwood *w = inet_csk_ca(sk);
-        westwood_update_window(tp);
+        westwood_update_window(sk);
        w->bk += tp->snd_una - w->snd_una;
        w->snd_una = tp->snd_una;
@@ -130,9 +131,10 @@ static inline void westwood_fast_bw(struct tcp_sock *tp)
 * This function evaluates cumul_ack for evaluating bk in case of
 * delayed or partial acks.
 */
-static inline u32 westwood_acked_count(struct tcp_sock *tp)
+static inline u32 westwood_acked_count(struct sock *sk)
 {
-        struct westwood *w = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct westwood *w = inet_csk_ca(sk);
        w->cumul_ack = tp->snd_una - w->snd_una;
@@ -160,9 +162,10 @@ static inline u32 westwood_acked_count(struct tcp_sock *tp)
        return w->cumul_ack;
 }
-static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp)
+static inline u32 westwood_bw_rttmin(const struct sock *sk)
 {
-        struct westwood *w = tcp_ca(tp);
+        const struct tcp_sock *tp = tcp_sk(sk);
+        const struct westwood *w = inet_csk_ca(sk);
        return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2);
 }
@@ -172,31 +175,32 @@ static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp)
 * in packets we use mss_cache). Rttmin is guaranteed to be >= 2
 * so avoids ever returning 0.
 */
-static u32 tcp_westwood_cwnd_min(struct tcp_sock *tp)
+static u32 tcp_westwood_cwnd_min(struct sock *sk)
 {
-        return westwood_bw_rttmin(tp);
+        return westwood_bw_rttmin(sk);
 }
-static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event)
+static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
 {
-        struct westwood *w = tcp_ca(tp);
+        struct tcp_sock *tp = tcp_sk(sk);
+        struct westwood *w = inet_csk_ca(sk);
        switch(event) {
        case CA_EVENT_FAST_ACK:
-                westwood_fast_bw(tp);
+                westwood_fast_bw(sk);
                break;
        case CA_EVENT_COMPLETE_CWR:
-                tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(tp);
+                tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(sk);
                break;
        case CA_EVENT_FRTO:
-                tp->snd_ssthresh = westwood_bw_rttmin(tp);
+                tp->snd_ssthresh = westwood_bw_rttmin(sk);
                break;
        case CA_EVENT_SLOW_ACK:
-                westwood_update_window(tp);
+                westwood_update_window(sk);
-                w->bk += westwood_acked_count(tp);
+                w->bk += westwood_acked_count(sk);
                w->rtt_min = min(w->rtt, w->rtt_min);
                break;
@@ -208,15 +212,15 @@ static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event)
 /* Extract info for Tcp socket info provided via netlink. */
-static void tcp_westwood_info(struct tcp_sock *tp, u32 ext,
+static void tcp_westwood_info(struct sock *sk, u32 ext,
                              struct sk_buff *skb)
 {
-        const struct westwood *ca = tcp_ca(tp);
+        const struct westwood *ca = inet_csk_ca(sk);
-        if (ext & (1<<(TCPDIAG_VEGASINFO-1))) {
+        if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
                struct rtattr *rta;
                struct tcpvegas_info *info;
-                rta = __RTA_PUT(skb, TCPDIAG_VEGASINFO, sizeof(*info));
+                rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info));
                info = RTA_DATA(rta);
                info->tcpv_enabled = 1;
                info->tcpv_rttcnt = 0;
@@ -242,7 +246,7 @@ static struct tcp_congestion_ops tcp_westwood = {
 static int __init tcp_westwood_register(void)
 {
-        BUG_ON(sizeof(struct westwood) > TCP_CA_PRIV_SIZE);
+        BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
        return tcp_register_congestion_control(&tcp_westwood);
 }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7c24e64b443f..e5beca7de86c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -95,7 +95,8 @@
 #include <linux/ipv6.h>
 #include <linux/netdevice.h>
 #include <net/snmp.h>
-#include <net/tcp.h>
+#include <net/ip.h>
+#include <net/tcp_states.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
@@ -112,7 +113,7 @@
 *      Snmp MIB for the UDP layer
 */
-DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
+DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
@@ -628,7 +629,7 @@ back_from_confirm:
                /* ... which is an evident application bug. --ANK */
                release_sock(sk);
-                NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
+                LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
                err = -EINVAL;
                goto out;
        }
@@ -693,7 +694,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset,
        if (unlikely(!up->pending)) {
                release_sock(sk);
-                NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n"));
+                LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
                return -EINVAL;
        }
@@ -1102,7 +1103,7 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
                skb->ip_summed = CHECKSUM_UNNECESSARY;
                if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
                        return 0;
-                NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
+                LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n");
                skb->ip_summed = CHECKSUM_NONE;
        }
        if (skb->ip_summed != CHECKSUM_UNNECESSARY)
@@ -1181,14 +1182,13 @@ int udp_rcv(struct sk_buff *skb)
        return(0);
 short_packet:
-        NETDEBUG(if (net_ratelimit())
+        LIMIT_NETDEBUG(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
-                printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
+                       NIPQUAD(saddr),
-                        NIPQUAD(saddr),
+                       ntohs(uh->source),
-                        ntohs(uh->source),
+                       ulen,
-                        ulen,
+                       len,
-                        len,
+                       NIPQUAD(daddr),
-                        NIPQUAD(daddr),
+                       ntohs(uh->dest));
-                        ntohs(uh->dest)));
 no_header:
        UDP_INC_STATS_BH(UDP_MIB_INERRORS);
        kfree_skb(skb);
@@ -1199,13 +1199,12 @@ csum_error:
         * RFC1122: OK.  Discards the bad packet silently (as far as 
         * the network is concerned, anyway) as per 4.1.3.4 (MUST). 
         */
-        NETDEBUG(if (net_ratelimit())
+        LIMIT_NETDEBUG(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
-                 printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
+                       NIPQUAD(saddr),
-                        NIPQUAD(saddr),
+                       ntohs(uh->source),
-                        ntohs(uh->source),
+                       NIPQUAD(daddr),
-                        NIPQUAD(daddr),
+                       ntohs(uh->dest),
-                        ntohs(uh->dest),
+                       ulen);
-                        ulen));
 drop:
        UDP_INC_STATS_BH(UDP_MIB_INERRORS);
        kfree_skb(skb);
diff --git a/net/ipv4/utils.c b/net/ipv4/utils.c
deleted file mode 100644
index 6aecd7a43534..000000000000
--- a/net/ipv4/utils.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * INET         An implementation of the TCP/IP protocol suite for the LINUX
- *              operating system.  INET is implemented using the  BSD Socket
- *              interface as the means of communication with the user level.
- *
- *              Various kernel-resident INET utility functions; mainly
- *              for format conversion and debugging output.
- *
- * Version:     $Id: utils.c,v 1.8 2000/10/03 07:29:01 anton Exp $
- *
- * Author:      Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- *
- * Fixes:
- *              Alan Cox        :       verify_area check.
- *              Alan Cox        :       removed old debugging.
- *              Andi Kleen      :       add net_ratelimit()  
- *
- *              This program is free software; you can redistribute it and/or
- *              modify it under the terms of the GNU General Public License
- *              as published by the Free Software Foundation; either version
- *              2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-/*
- *      Convert an ASCII string to binary IP. 
- */
- 
-__u32 in_aton(const char *str)
-{
-        unsigned long l;
-        unsigned int val;
-        int i;
-        l = 0;
-        for (i = 0; i < 4; i++) 
-        {
-                l <<= 8;
-                if (*str != '\0') 
-                {
-                        val = 0;
-                        while (*str != '\0' && *str != '.') 
-                        {
-                                val *= 10;
-                                val += *str - '0';
-                                str++;
-                        }
-                        l |= val;
-                        if (*str != '\0') 
-                                str++;
-                }
-        }
-        return(htonl(l));
-}
-EXPORT_SYMBOL(in_aton);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 050611d7a967..d23e07fc81fa 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -128,8 +128,10 @@ void __init xfrm4_state_init(void)
        xfrm_state_register_afinfo(&xfrm4_state_afinfo);
 }
+#if 0
 void __exit xfrm4_state_fini(void)
 {
        xfrm_state_unregister_afinfo(&xfrm4_state_afinfo);
 }
+#endif  /*  0  */
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index e1fe360ed27a..afbb0d4cc305 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -78,10 +78,9 @@ static int ipip_rcv(struct sk_buff *skb)
 static void ipip_err(struct sk_buff *skb, u32 info)
 {
        struct xfrm_tunnel *handler = ipip_handler;
-        u32 arg = info;
        if (handler)
-                handler->err_handler(skb, &arg);
+                handler->err_handler(skb, info);
 }
 static int ipip_init_state(struct xfrm_state *x)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e66ca9381cfd..ab7a9124f985 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -1,6 +1,26 @@
 #
 # IPv6 configuration
-# 
+#
+#   IPv6 as module will cause a CRASH if you try to unload it
+config IPV6
+        tristate "The IPv6 protocol"
+        default m
+        select CRYPTO if IPV6_PRIVACY
+        select CRYPTO_MD5 if IPV6_PRIVACY
+        ---help---
+          This is complemental support for the IP version 6.
+          You will still be able to do traditional IPv4 networking as well.
+          For general information about IPv6, see
+          <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
+          For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
+          For specific information about IPv6 under Linux, read the HOWTO at
+          <http://www.bieringer.de/linux/IPv6/>.
+          To compile this protocol support as a module, choose M here: the 
+          module will be called ipv6.
 config IPV6_PRIVACY
        bool "IPv6: Privacy Extensions (RFC 3041) support"
        depends on IPV6
@@ -71,7 +91,6 @@ config INET6_TUNNEL
 config IPV6_TUNNEL
        tristate "IPv6: IPv6-in-IPv6 tunnel"
        depends on IPV6
-        select INET6_TUNNEL
        ---help---
          Support for IPv6-in-IPv6 tunnels described in RFC 2473.
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index b39e04940590..6460eec834b7 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,7 +8,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
                route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
                protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
                exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
-                ip6_flowlabel.o ipv6_syms.o
+                ip6_flowlabel.o ipv6_syms.o netfilter.o
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
        xfrm6_output.o
@@ -23,3 +23,5 @@ obj-$(CONFIG_NETFILTER)	+= netfilter/
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
 obj-y += exthdrs_core.o
+obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 77004b9456c0..6d6fb74f3b52 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1041,9 +1041,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
        const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
        const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
        u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
-        u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
+        u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
        int sk_ipv6only = ipv6_only_sock(sk);
-        int sk2_ipv6only = tcp_v6_ipv6only(sk2);
+        int sk2_ipv6only = inet_v6_ipv6only(sk2);
        int addr_type = ipv6_addr_type(sk_rcv_saddr6);
        int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -1126,7 +1126,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
        __ipv6_dev_mc_dec(idev, &maddr);
 }
-void addrconf_join_anycast(struct inet6_ifaddr *ifp)
+static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
 {
        struct in6_addr addr;
        ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1135,7 +1135,7 @@ void addrconf_join_anycast(struct inet6_ifaddr *ifp)
        ipv6_dev_ac_inc(ifp->idev->dev, &addr);
 }
-void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
+static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
 {
        struct in6_addr addr;
        ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2858,16 +2858,16 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
        skb = alloc_skb(size, GFP_ATOMIC);
        if (!skb) {
-                netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS);
                return;
        }
        if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
                kfree_skb(skb);
-                netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL);
                return;
        }
-        NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR;
+        NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR;
-        netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC);
+        netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC);
 }
 static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -2994,16 +2994,16 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
        
        skb = alloc_skb(size, GFP_ATOMIC);
        if (!skb) {
-                netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS);
                return;
        }
        if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
                kfree_skb(skb);
-                netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL);
                return;
        }
-        NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO;
+        NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO;
-        netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC);
+        netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC);
 }
 static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
@@ -3054,16 +3054,16 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
        skb = alloc_skb(size, GFP_ATOMIC);
        if (!skb) {
-                netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS);
                return;
        }
        if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
                kfree_skb(skb);
-                netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL);
                return;
        }
-        NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX;
+        NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX;
-        netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC);
+        netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC);
 }
 static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -3593,10 +3593,8 @@ void __exit addrconf_cleanup(void)
        rtnl_unlock();
 #ifdef CONFIG_IPV6_PRIVACY
-        if (likely(md5_tfm != NULL)) {
+        crypto_free_tfm(md5_tfm);
-                crypto_free_tfm(md5_tfm);
+        md5_tfm = NULL;
-                md5_tfm = NULL;
-        }
 #endif
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 28d9bcab0970..4f8795af2edb 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -44,6 +44,7 @@
 #include <linux/netdevice.h>
 #include <linux/icmpv6.h>
 #include <linux/smp_lock.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -66,45 +67,14 @@ MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
 MODULE_LICENSE("GPL");
-/* IPv6 procfs goodies... */
-#ifdef CONFIG_PROC_FS
-extern int raw6_proc_init(void);
-extern void raw6_proc_exit(void);
-extern int tcp6_proc_init(void);
-extern void tcp6_proc_exit(void);
-extern int udp6_proc_init(void);
-extern void udp6_proc_exit(void);
-extern int ipv6_misc_proc_init(void);
-extern void ipv6_misc_proc_exit(void);
-extern int ac6_proc_init(void);
-extern void ac6_proc_exit(void);
-extern int if6_proc_init(void);
-extern void if6_proc_exit(void);
-#endif
 int sysctl_ipv6_bindv6only;
-#ifdef INET_REFCNT_DEBUG
-atomic_t inet6_sock_nr;
-EXPORT_SYMBOL(inet6_sock_nr);
-#endif
 /* The inetsw table contains everything that inet_create needs to
 * build a new socket.
 */
 static struct list_head inetsw6[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw6_lock);
-static void inet6_sock_destruct(struct sock *sk)
-{
-        inet_sock_destruct(sk);
-#ifdef INET_REFCNT_DEBUG
-        atomic_dec(&inet6_sock_nr);
-#endif
-}
 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 {
        const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
@@ -185,7 +155,7 @@ static int inet6_create(struct socket *sock, int protocol)
                        inet->hdrincl = 1;
        }
-        sk->sk_destruct         = inet6_sock_destruct;
+        sk->sk_destruct         = inet_sock_destruct;
        sk->sk_family           = PF_INET6;
        sk->sk_protocol         = protocol;
@@ -212,12 +182,17 @@ static int inet6_create(struct socket *sock, int protocol)
                inet->pmtudisc = IP_PMTUDISC_DONT;
        else
                inet->pmtudisc = IP_PMTUDISC_WANT;
+        /* 
+         * Increment only the relevant sk_prot->socks debug field, this changes
+         * the previous behaviour of incrementing both the equivalent to
+         * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
+         *
+         * This allows better debug granularity as we'll know exactly how many
+         * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
+         * transport protocol socks. -acme
+         */
+        sk_refcnt_debug_inc(sk);
-#ifdef INET_REFCNT_DEBUG
-        atomic_inc(&inet6_sock_nr);
-        atomic_inc(&inet_sock_nr);
-#endif
        if (inet->num) {
                /* It assumes that any protocol which allows
                 * the user to assign a number at socket
@@ -513,11 +488,6 @@ static struct net_proto_family inet6_family_ops = {
        .owner  = THIS_MODULE,
 };
-#ifdef CONFIG_SYSCTL
-extern void ipv6_sysctl_register(void);
-extern void ipv6_sysctl_unregister(void);
-#endif
 /* Same as inet6_dgram_ops, sans udp_poll.  */
 static struct proto_ops inet6_sockraw_ops = {
        .family =       PF_INET6,
@@ -684,8 +654,6 @@ static void cleanup_ipv6_mibs(void)
        snmp6_mib_free((void **)udp_stats_in6);
 }
-extern int ipv6_misc_proc_init(void);
 static int __init inet6_init(void)
 {
        struct sk_buff *dummy_skb;
@@ -757,6 +725,9 @@ static int __init inet6_init(void)
        err = igmp6_init(&inet6_family_ops);
        if (err)
                goto igmp_fail;
+        err = ipv6_netfilter_init();
+        if (err)
+                goto netfilter_fail;
        /* Create /proc/foo6 entries. */
 #ifdef CONFIG_PROC_FS
        err = -ENOMEM;
@@ -813,6 +784,8 @@ proc_tcp6_fail:
        raw6_proc_exit();
 proc_raw6_fail:
 #endif
+        ipv6_netfilter_fini();
+netfilter_fail:
        igmp6_cleanup();
 igmp_fail:
        ndisc_cleanup();
@@ -852,6 +825,7 @@ static void __exit inet6_exit(void)
        ip6_route_cleanup();
        ipv6_packet_cleanup();
        igmp6_cleanup();
+        ipv6_netfilter_fini();
        ndisc_cleanup();
        icmpv6_cleanup();
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 986fdfdccbcd..f3629730eb15 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -131,10 +131,10 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
                case NEXTHDR_HOP:
                case NEXTHDR_DEST:
                        if (!zero_out_mutable_opts(exthdr.opth)) {
-                                LIMIT_NETDEBUG(printk(
+                                LIMIT_NETDEBUG(
                                        KERN_WARNING "overrun %sopts\n",
                                        nexthdr == NEXTHDR_HOP ?
-                                                "hop" : "dest"));
+                                                "hop" : "dest");
                                return -EINVAL;
                        }
                        break;
@@ -293,8 +293,7 @@ static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc
                skb_push(skb, skb->data - skb->nh.raw);
                ahp->icv(ahp, skb, ah->auth_data);
                if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
-                        LIMIT_NETDEBUG(
+                        LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
-                                printk(KERN_WARNING "ipsec ah authentication error\n"));
                        x->stats.integrity_failed++;
                        goto free_out;
                }
@@ -332,9 +331,9 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        if (!x)
                return;
-        NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
+        NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
-                        "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+                 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-               ntohl(ah->spi), NIP6(iph->daddr)));
+                 ntohl(ah->spi), NIP6(iph->daddr));
        xfrm_state_put(x);
 }
@@ -402,10 +401,8 @@ static int ah6_init_state(struct xfrm_state *x)
 error:
        if (ahp) {
-                if (ahp->work_icv)
+                kfree(ahp->work_icv);
-                        kfree(ahp->work_icv);
+                crypto_free_tfm(ahp->tfm);
-                if (ahp->tfm)
-                        crypto_free_tfm(ahp->tfm);
                kfree(ahp);
        }
        return -EINVAL;
@@ -418,14 +415,10 @@ static void ah6_destroy(struct xfrm_state *x)
        if (!ahp)
                return;
-        if (ahp->work_icv) {
+        kfree(ahp->work_icv);
-                kfree(ahp->work_icv);
+        ahp->work_icv = NULL;
-                ahp->work_icv = NULL;
+        crypto_free_tfm(ahp->tfm);
-        }
+        ahp->tfm = NULL;
-        if (ahp->tfm) {
-                crypto_free_tfm(ahp->tfm);
-                ahp->tfm = NULL;
-        }
        kfree(ahp);
 }
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 5229365cd8b4..01468fab3d3d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -29,6 +29,7 @@
 #include <net/addrconf.h>
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
+#include <net/tcp_states.h>
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
@@ -588,8 +589,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                        break;
                default:
-                        LIMIT_NETDEBUG(
+                        LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
-                                printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type));
+                                       cmsg->cmsg_type);
                        err = -EINVAL;
                        break;
                };
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 324db62515a2..9b27460f0cc7 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -212,8 +212,7 @@ static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, stru
                padlen = nexthdr[0];
                if (padlen+2 >= elen) {
-                        LIMIT_NETDEBUG(
+                        LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen);
-                                printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen));
                        ret = -EINVAL;
                        goto out;
                }
@@ -277,22 +276,14 @@ static void esp6_destroy(struct xfrm_state *x)
        if (!esp)
                return;
-        if (esp->conf.tfm) {
+        crypto_free_tfm(esp->conf.tfm);
-                crypto_free_tfm(esp->conf.tfm);
+        esp->conf.tfm = NULL;
-                esp->conf.tfm = NULL;
+        kfree(esp->conf.ivec);
-        }
+        esp->conf.ivec = NULL;
-        if (esp->conf.ivec) {
+        crypto_free_tfm(esp->auth.tfm);
-                kfree(esp->conf.ivec);
+        esp->auth.tfm = NULL;
-                esp->conf.ivec = NULL;
+        kfree(esp->auth.work_icv);
-        }
+        esp->auth.work_icv = NULL;
-        if (esp->auth.tfm) {
-                crypto_free_tfm(esp->auth.tfm);
-                esp->auth.tfm = NULL;
-        }
-        if (esp->auth.work_icv) {
-                kfree(esp->auth.work_icv);
-                esp->auth.work_icv = NULL;
-        }
        kfree(esp);
 }
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index e0839eafc3a9..5be6da2584ee 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -424,8 +424,8 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
                IP6CB(skb)->ra = optoff;
                return 1;
        }
-        LIMIT_NETDEBUG(
+        LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
-                 printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1]));
+                       skb->nh.raw[optoff+1]);
        kfree_skb(skb);
        return 0;
 }
@@ -437,8 +437,8 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
        u32 pkt_len;
        if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
-                LIMIT_NETDEBUG(
+                LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
-                         printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]));
+                               skb->nh.raw[optoff+1]);
                IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
                goto drop;
        }
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index ff3ec9822e36..fa8f1bb0aa52 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -67,7 +67,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
-DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
 /*
 *      The ICMP socket(s). This is the most convenient way to flow control
@@ -332,8 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
         *      for now we don't know that.
         */
        if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
-                LIMIT_NETDEBUG(
+                LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
-                        printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"));
                return;
        }
@@ -341,8 +340,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
         *      Never answer to a ICMP packet.
         */
        if (is_ineligible(skb)) {
-                LIMIT_NETDEBUG(
+                LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
-                        printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n")); 
                return;
        }
@@ -393,8 +391,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
        len = skb->len - msg.offset;
        len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
        if (len < 0) {
-                LIMIT_NETDEBUG(
+                LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
-                        printk(KERN_DEBUG "icmp: len problem\n"));
                goto out_dst_release;
        }
@@ -551,7 +548,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
        read_lock(&raw_v6_lock);
        if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
-                while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) {
+                while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
+                                            IP6CB(skb)->iif))) {
                        rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
                        sk = sk_next(sk);
                }
@@ -583,17 +581,15 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                skb->ip_summed = CHECKSUM_UNNECESSARY;
                if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
                                    skb->csum)) {
-                        LIMIT_NETDEBUG(
+                        LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n");
-                                printk(KERN_DEBUG "ICMPv6 hw checksum failed\n"));
                        skb->ip_summed = CHECKSUM_NONE;
                }
        }
        if (skb->ip_summed == CHECKSUM_NONE) {
                if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
                                    skb_checksum(skb, 0, skb->len, 0))) {
-                        LIMIT_NETDEBUG(
+                        LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
-                                printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
+                                       NIP6(*saddr), NIP6(*daddr));
-                                       NIP6(*saddr), NIP6(*daddr)));
                        goto discard_it;
                }
        }
@@ -669,8 +665,7 @@ static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                break;
        default:
-                LIMIT_NETDEBUG(
+                LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
-                        printk(KERN_DEBUG "icmpv6: msg of unknown type\n"));
                /* informational */
                if (type & ICMPV6_INFOMSG_MASK)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
new file mode 100644
index 000000000000..01d5f46d4e40
--- /dev/null
+++ b/net/ipv6/inet6_hashtables.c
@@ -0,0 +1,81 @@
+/*
+ * INET         An implementation of the TCP/IP protocol suite for the LINUX
+ *              operating system.  INET is implemented using the BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              Generic INET6 transport hashtables
+ *
+ * Authors:     Lotsa people, from code originally in tcp
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/inet_connection_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
+struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
+                                   const struct in6_addr *daddr,
+                                   const unsigned short hnum, const int dif)
+{
+        struct sock *sk;
+        const struct hlist_node *node;
+        struct sock *result = NULL;
+        int score, hiscore = 0;
+        read_lock(&hashinfo->lhash_lock);
+        sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
+                if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
+                        const struct ipv6_pinfo *np = inet6_sk(sk);
+                        
+                        score = 1;
+                        if (!ipv6_addr_any(&np->rcv_saddr)) {
+                                if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+                                        continue;
+                                score++;
+                        }
+                        if (sk->sk_bound_dev_if) {
+                                if (sk->sk_bound_dev_if != dif)
+                                        continue;
+                                score++;
+                        }
+                        if (score == 3) {
+                                result = sk;
+                                break;
+                        }
+                        if (score > hiscore) {
+                                hiscore = score;
+                                result = sk;
+                        }
+                }
+        }
+        if (result)
+                sock_hold(result);
+        read_unlock(&hashinfo->lhash_lock);
+        return result;
+}
+EXPORT_SYMBOL_GPL(inet6_lookup_listener);
+struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
+                          const struct in6_addr *saddr, const u16 sport,
+                          const struct in6_addr *daddr, const u16 dport,
+                          const int dif)
+{
+        struct sock *sk;
+        local_bh_disable();
+        sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+        local_bh_enable();
+        return sk;
+}
+EXPORT_SYMBOL_GPL(inet6_lookup);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1b354aa97934..16af874c9e8f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -49,7 +49,7 @@
 struct rt6_statistics   rt6_stats;
-static kmem_cache_t * fib6_node_kmem;
+static kmem_cache_t * fib6_node_kmem __read_mostly;
 enum fib_walk_state_t
 {
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 866f10726c58..6e3480426939 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -56,7 +56,7 @@ static inline int ip6_rcv_finish( struct sk_buff *skb)
        return dst_input(skb);
 }
-int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct ipv6hdr *hdr;
        u32             pkt_len;
@@ -166,8 +166,8 @@ resubmit:
        nexthdr = skb->nh.raw[nhoff];
        raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
-        if (raw_sk)
+        if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
-                ipv6_raw_deliver(skb, nexthdr);
+                raw_sk = NULL;
        hash = nexthdr & (MAX_INET_PROTOS - 1);
        if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
@@ -198,12 +198,13 @@ resubmit:
                if (!raw_sk) {
                        if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
                                IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
-                                icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff);
+                                icmpv6_send(skb, ICMPV6_PARAMPROB,
+                                            ICMPV6_UNK_NEXTHDR, nhoff,
+                                            skb->dev);
                        }
-                } else {
+                } else
                        IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
-                        kfree_skb(skb);
+                kfree_skb(skb);
-                }
        }
        rcu_read_unlock();
        return 0;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1f2c2f9e353f..01ef94f7c7f1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -153,51 +153,6 @@ int ip6_output(struct sk_buff *skb)
                return ip6_output2(skb);
 }
-#ifdef CONFIG_NETFILTER
-int ip6_route_me_harder(struct sk_buff *skb)
-{
-        struct ipv6hdr *iph = skb->nh.ipv6h;
-        struct dst_entry *dst;
-        struct flowi fl = {
-                .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
-                .nl_u =
-                { .ip6_u =
-                  { .daddr = iph->daddr,
-                    .saddr = iph->saddr, } },
-                .proto = iph->nexthdr,
-        };
-        dst = ip6_route_output(skb->sk, &fl);
-        if (dst->error) {
-                IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
-                LIMIT_NETDEBUG(
-                        printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
-                dst_release(dst);
-                return -EINVAL;
-        }
-        /* Drop old route. */
-        dst_release(skb->dst);
-        skb->dst = dst;
-        return 0;
-}
-#endif
-static inline int ip6_maybe_reroute(struct sk_buff *skb)
-{
-#ifdef CONFIG_NETFILTER
-        if (skb->nfcache & NFC_ALTERED){
-                if (ip6_route_me_harder(skb) != 0){
-                        kfree_skb(skb);
-                        return -EINVAL;
-                }
-        }
-#endif /* CONFIG_NETFILTER */
-        return dst_output(skb);
-}
 /*
 *      xmit an sk_buff (used by TCP)
 */
@@ -266,7 +221,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
        mtu = dst_mtu(dst);
        if ((skb->len <= mtu) || ipfragok) {
                IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-                return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
+                return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
+                                dst_output);
        }
        if (net_ratelimit())
@@ -321,7 +277,9 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
        read_lock(&ip6_ra_lock);
        for (ra = ip6_ra_chain; ra; ra = ra->next) {
                struct sock *sk = ra->sk;
-                if (sk && ra->sel == sel) {
+                if (sk && ra->sel == sel &&
+                    (!sk->sk_bound_dev_if ||
+                     sk->sk_bound_dev_if == skb->dev->ifindex)) {
                        if (last) {
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
                                if (skb2)
@@ -667,7 +625,7 @@ slow_path:
                 */
                if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
-                        NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
+                        NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
                        IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
                        err = -ENOMEM;
                        goto fail;
@@ -792,13 +750,8 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
        if (ipv6_addr_any(&fl->fl6_src)) {
                err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
-                if (err) {
+                if (err)
-#if IP6_DEBUG >= 2
-                        printk(KERN_DEBUG "ip6_dst_lookup: "
-                               "no available source address\n");
-#endif
                        goto out_err_release;
-                }
        }
        return 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index ba3b0c267f75..09613729404c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1110,11 +1110,39 @@ ip6ip6_fb_tnl_dev_init(struct net_device *dev)
        return 0;
 }
+#ifdef CONFIG_INET6_TUNNEL
 static struct xfrm6_tunnel ip6ip6_handler = {
-        .handler = ip6ip6_rcv,
+        .handler        = ip6ip6_rcv,
-        .err_handler = ip6ip6_err,
+        .err_handler    = ip6ip6_err,
 };
+static inline int ip6ip6_register(void)
+{
+        return xfrm6_tunnel_register(&ip6ip6_handler);
+}
+static inline int ip6ip6_unregister(void)
+{
+        return xfrm6_tunnel_deregister(&ip6ip6_handler);
+}
+#else
+static struct inet6_protocol xfrm6_tunnel_protocol = {
+        .handler        = ip6ip6_rcv,
+        .err_handler    = ip6ip6_err,
+        .flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+static inline int ip6ip6_register(void)
+{
+        return inet6_add_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6);
+}
+static inline int ip6ip6_unregister(void)
+{
+        return inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6);
+}
+#endif
 /**
 * ip6_tunnel_init - register protocol and reserve needed resources
 *
@@ -1125,7 +1153,7 @@ static int __init ip6_tunnel_init(void)
 {
        int  err;
-        if (xfrm6_tunnel_register(&ip6ip6_handler) < 0) {
+        if (ip6ip6_register() < 0) {
                printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
                return -EAGAIN;
        }
@@ -1144,7 +1172,7 @@ static int __init ip6_tunnel_init(void)
        }
        return 0;
 fail:
-        xfrm6_tunnel_deregister(&ip6ip6_handler);
+        ip6ip6_unregister();
        return err;
 }
@@ -1154,7 +1182,7 @@ fail:
 static void __exit ip6_tunnel_cleanup(void)
 {
-        if (xfrm6_tunnel_deregister(&ip6ip6_handler) < 0)
+        if (ip6ip6_unregister() < 0)
                printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
        unregister_netdev(ip6ip6_fb_tnl_dev);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 423feb46ccc0..85bfbc69b2c3 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -341,8 +341,7 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
        for_each_cpu(cpu) {
                struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
-                if (tfm)
+                crypto_free_tfm(tfm);
-                        crypto_free_tfm(tfm);
        }
        free_percpu(tfms);
 }
@@ -354,7 +353,7 @@ static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
        int cpu;
        /* This can be any valid CPU ID so we don't need locking. */
-        cpu = smp_processor_id();
+        cpu = raw_smp_processor_id();
        list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
                struct crypto_tfm *tfm;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index f3ef4c38d315..76466af8331e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -55,7 +55,7 @@
 #include <asm/uaccess.h>
-DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
 static struct packet_type ipv6_packet_type = {
        .type = __constant_htons(ETH_P_IPV6), 
@@ -109,13 +109,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
        return 0;
 }
-extern int ip6_mc_source(int add, int omode, struct sock *sk,
-        struct group_source_req *pgsr);
-extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
-extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
-        struct group_filter __user *optval, int __user *optlen);
 int ipv6_setsockopt(struct sock *sk, int level, int optname,
                    char __user *optval, int optlen)
 {
@@ -163,6 +156,13 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                        fl6_free_socklist(sk);
                        ipv6_sock_mc_close(sk);
+                        /*
+                         * Sock is moving from IPv6 to IPv4 (sk_prot), so
+                         * remove it from the refcnt debug socks count in the
+                         * original family...
+                         */
+                        sk_refcnt_debug_dec(sk);
                        if (sk->sk_protocol == IPPROTO_TCP) {
                                struct tcp_sock *tp = tcp_sk(sk);
@@ -192,9 +192,11 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
                                kfree_skb(pktopt);
                        sk->sk_destruct = inet_sock_destruct;
-#ifdef INET_REFCNT_DEBUG
+                        /*
-                        atomic_dec(&inet6_sock_nr);
+                         * ... and add it to the refcnt debug socks count
-#endif
+                         * in the new family. -acme
+                         */
+                        sk_refcnt_debug_inc(sk);
                        module_put(THIS_MODULE);
                        retv = 0;
                        break;
@@ -437,7 +439,6 @@ done:
        }
        case MCAST_MSFILTER:
        {
-                extern int sysctl_optmem_max;
                extern int sysctl_mld_max_msf;
                struct group_filter *gsf;
@@ -504,6 +505,9 @@ done:
                break;
        case IPV6_IPSEC_POLICY:
        case IPV6_XFRM_POLICY:
+                retv = -EPERM;
+                if (!capable(CAP_NET_ADMIN))
+                        break;
                retv = xfrm_user_policy(sk, optname, optval, optlen);
                break;
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index 5ade5a5d1990..37a4a99c9fe9 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -15,9 +15,6 @@ EXPORT_SYMBOL(ndisc_mc_map);
 EXPORT_SYMBOL(register_inet6addr_notifier);
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
 EXPORT_SYMBOL(ip6_route_output);
-#ifdef CONFIG_NETFILTER
-EXPORT_SYMBOL(ip6_route_me_harder);
-#endif
 EXPORT_SYMBOL(addrconf_lock);
 EXPORT_SYMBOL(ipv6_setsockopt);
 EXPORT_SYMBOL(ipv6_getsockopt);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7ae72d4c9bd2..a7eae30f4554 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -812,7 +812,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
                if (ipv6_chk_acast_addr(dev, &msg->target) ||
                    (idev->cnf.forwarding && 
                     pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) {
-                        if (skb->stamp.tv_sec != LOCALLY_ENQUEUED &&
+                        if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
                            skb->pkt_type != PACKET_HOST &&
                            inc != 0 &&
                            idev->nd_parms->proxy_delay != 0) {
@@ -1487,6 +1487,8 @@ int ndisc_rcv(struct sk_buff *skb)
                return 0;
        }
+        memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
        switch (msg->icmph.icmp6_type) {
        case NDISC_NEIGHBOUR_SOLICITATION:
                ndisc_recv_ns(skb);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
new file mode 100644
index 000000000000..f8626ebf90fd
--- /dev/null
+++ b/net/ipv6/netfilter.c
@@ -0,0 +1,104 @@
+#include <linux/config.h>
+#include <linux/init.h>
+#ifdef CONFIG_NETFILTER
+#include <linux/kernel.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/dst.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+int ip6_route_me_harder(struct sk_buff *skb)
+{
+        struct ipv6hdr *iph = skb->nh.ipv6h;
+        struct dst_entry *dst;
+        struct flowi fl = {
+                .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+                .nl_u =
+                { .ip6_u =
+                  { .daddr = iph->daddr,
+                    .saddr = iph->saddr, } },
+                .proto = iph->nexthdr,
+        };
+        dst = ip6_route_output(skb->sk, &fl);
+        if (dst->error) {
+                IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+                LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
+                dst_release(dst);
+                return -EINVAL;
+        }
+        /* Drop old route. */
+        dst_release(skb->dst);
+        skb->dst = dst;
+        return 0;
+}
+EXPORT_SYMBOL(ip6_route_me_harder);
+/*
+ * Extra routing may needed on local out, as the QUEUE target never
+ * returns control to the table.
+ */
+struct ip6_rt_info {
+        struct in6_addr daddr;
+        struct in6_addr saddr;
+};
+static void save(const struct sk_buff *skb, struct nf_info *info)
+{
+        struct ip6_rt_info *rt_info = nf_info_reroute(info);
+        if (info->hook == NF_IP6_LOCAL_OUT) {
+                struct ipv6hdr *iph = skb->nh.ipv6h;
+                rt_info->daddr = iph->daddr;
+                rt_info->saddr = iph->saddr;
+        }
+}
+static int reroute(struct sk_buff **pskb, const struct nf_info *info)
+{
+        struct ip6_rt_info *rt_info = nf_info_reroute(info);
+        if (info->hook == NF_IP6_LOCAL_OUT) {
+                struct ipv6hdr *iph = (*pskb)->nh.ipv6h;
+                if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
+                    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
+                        return ip6_route_me_harder(*pskb);
+        }
+        return 0;
+}
+static struct nf_queue_rerouter ip6_reroute = {
+        .rer_size       = sizeof(struct ip6_rt_info),
+        .save           = &save,
+        .reroute        = &reroute,
+};
+int __init ipv6_netfilter_init(void)
+{
+        return nf_register_queue_rerouter(PF_INET6, &ip6_reroute);
+}
+void ipv6_netfilter_fini(void)
+{
+        nf_unregister_queue_rerouter(PF_INET6);
+}
+#else /* CONFIG_NETFILTER */
+int __init ipv6_netfilter_init(void)
+{
+        return 0;
+}
+void ipv6_netfilter_fini(void)
+{
+}
+#endif /* CONFIG_NETFILTER */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 77ec704c9ee3..216fbe1ac65c 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -10,13 +10,16 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
 #  dep_tristate '  FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK
 #fi
 config IP6_NF_QUEUE
-        tristate "Userspace queueing via NETLINK"
+        tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
        ---help---
          This option adds a queue handler to the kernel for IPv6
-          packets which lets us to receive the filtered packets
+          packets which enables users to receive the filtered packets
-          with QUEUE target using libiptc as we can do with
+          with QUEUE target using libipq.
-          the IPv4 now.
+          THis option enables the old IPv6-only "ip6_queue" implementation
+          which has been obsoleted by the new "nfnetlink_queue" code (see
+          CONFIG_NETFILTER_NETLINK_QUEUE).
          (C) Fernando Anton 2001
          IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
@@ -196,6 +199,16 @@ config IP6_NF_TARGET_LOG
          To compile it as a module, choose M here.  If unsure, say N.
+config IP6_NF_TARGET_REJECT
+        tristate "REJECT target support"
+        depends on IP6_NF_FILTER
+        help
+          The REJECT target allows a filtering rule to specify that an ICMPv6
+          error should be issued in response to an incoming packet, rather
+          than silently being dropped.
+          To compile it as a module, choose M here.  If unsure, say N.
 #  if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then
 #    dep_tristate '    REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER
 #    if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
@@ -226,6 +239,22 @@ config IP6_NF_TARGET_MARK
          To compile it as a module, choose M here.  If unsure, say N.
+config IP6_NF_TARGET_HL
+        tristate  'HL (hoplimit) target support'
+        depends on IP6_NF_MANGLE
+        help
+          This option adds a `HL' target, which enables the user to decrement
+          the hoplimit value of the IPv6 header or set it to a given (lower)
+          value.
+        
+          While it is safe to decrement the hoplimit value, this option also
+          enables functionality to increment and set the hoplimit value of the
+          IPv6 header to arbitrary values.  This is EXTREMELY DANGEROUS since
+          you can easily create immortal packets that loop forever on the
+          network.  
+          To compile it as a module, choose M here.  If unsure, say N.
 #dep_tristate '  LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES
 config IP6_NF_RAW
        tristate  'raw table support (required for TRACE)'
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2e51714953b6..bd9a16a5cbba 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -20,7 +20,10 @@ obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o
 obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
 obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
 obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o
+obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o
 obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
 obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
+obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 750943e2d34e..aa11cf366efa 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -47,16 +47,10 @@
 #define NET_IPQ_QMAX 2088
 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
-struct ipq_rt_info {
-        struct in6_addr daddr;
-        struct in6_addr saddr;
-};
 struct ipq_queue_entry {
        struct list_head list;
        struct nf_info *info;
        struct sk_buff *skb;
-        struct ipq_rt_info rt_info;
 };
 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
@@ -76,7 +70,9 @@ static DECLARE_MUTEX(ipqnl_sem);
 static void
 ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
 {
+        local_bh_disable();
        nf_reinject(entry->skb, entry->info, verdict);
+        local_bh_enable();
        kfree(entry);
 }
@@ -209,6 +205,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
                break;
        
        case IPQ_COPY_PACKET:
+                if (entry->skb->ip_summed == CHECKSUM_HW &&
+                    (*errp = skb_checksum_help(entry->skb,
+                                               entry->info->outdev == NULL))) {
+                        read_unlock_bh(&queue_lock);
+                        return NULL;
+                }
                if (copy_range == 0 || copy_range > entry->skb->len)
                        data_len = entry->skb->len;
                else
@@ -236,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
        pmsg->packet_id       = (unsigned long )entry;
        pmsg->data_len        = data_len;
-        pmsg->timestamp_sec   = entry->skb->stamp.tv_sec;
+        pmsg->timestamp_sec   = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec;
-        pmsg->timestamp_usec  = entry->skb->stamp.tv_usec;
+        pmsg->timestamp_usec  = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec;
        pmsg->mark            = entry->skb->nfmark;
        pmsg->hook            = entry->info->hook;
        pmsg->hw_protocol     = entry->skb->protocol;
@@ -276,7 +278,8 @@ nlmsg_failure:
 }
 static int
-ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 
+                   unsigned int queuenum, void *data)
 {
        int status = -EINVAL;
        struct sk_buff *nskb;
@@ -294,13 +297,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
        entry->info = info;
        entry->skb = skb;
-        if (entry->info->hook == NF_IP_LOCAL_OUT) {
-                struct ipv6hdr *iph = skb->nh.ipv6h;
-                entry->rt_info.daddr = iph->daddr;
-                entry->rt_info.saddr = iph->saddr;
-        }
        nskb = ipq_build_packet_message(entry, &status);
        if (nskb == NULL)
                goto err_out_free;
@@ -376,22 +372,11 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
                }
                skb_put(e->skb, diff);
        }
-        if (!skb_ip_make_writable(&e->skb, v->data_len))
+        if (!skb_make_writable(&e->skb, v->data_len))
                return -ENOMEM;
        memcpy(e->skb->data, v->payload, v->data_len);
-        e->skb->nfcache |= NFC_ALTERED;
+        e->skb->ip_summed = CHECKSUM_NONE;
-        /*
-         * Extra routing may needed on local out, as the QUEUE target never
-         * returns control to the table.
-         * Not a nice way to cmp, but works
-         */
-        if (e->info->hook == NF_IP_LOCAL_OUT) {
-                struct ipv6hdr *iph = e->skb->nh.ipv6h;
-                if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) ||
-                    !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr))
-                        return ip6_route_me_harder(e->skb);
-        }
        return 0;
 }
@@ -667,6 +652,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
        return len;
 }
+static struct nf_queue_handler nfqh = {
+        .name   = "ip6_queue",
+        .outfn  = &ipq_enqueue_packet,
+};
 static int
 init_or_cleanup(int init)
 {
@@ -677,7 +667,8 @@ init_or_cleanup(int init)
                goto cleanup;
        netlink_register_notifier(&ipq_nl_notifier);
-        ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk);
+        ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk,
+                                      THIS_MODULE);
        if (ipqnl == NULL) {
                printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
                goto cleanup_netlink_notifier;
@@ -694,7 +685,7 @@ init_or_cleanup(int init)
        register_netdevice_notifier(&ipq_dev_notifier);
        ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
        
-        status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL);
+        status = nf_register_queue_handler(PF_INET6, &nfqh);
        if (status < 0) {
                printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
                goto cleanup_sysctl;
@@ -702,7 +693,7 @@ init_or_cleanup(int init)
        return status;
 cleanup:
-        nf_unregister_queue_handler(PF_INET6);
+        nf_unregister_queue_handlers(&nfqh);
        synchronize_net();
        ipq_flush(NF_DROP);
        
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 73034511c8db..1cb8adb2787f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -401,7 +401,6 @@ ip6t_do_table(struct sk_buff **pskb,
        do {
                IP_NF_ASSERT(e);
                IP_NF_ASSERT(back);
-                (*pskb)->nfcache |= e->nfcache;
                if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
                        &protoff, &offset)) {
                        struct ip6t_entry_target *t;
@@ -434,8 +433,8 @@ ip6t_do_table(struct sk_buff **pskb,
                                                         back->comefrom);
                                        continue;
                                }
-                                if (table_base + v
+                                if (table_base + v != (void *)e + e->next_offset
-                                    != (void *)e + e->next_offset) {
+                                    && !(e->ipv6.flags & IP6T_F_GOTO)) {
                                        /* Save old back ptr in next entry */
                                        struct ip6t_entry *next
                                                = (void *)e + e->next_offset;
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
new file mode 100644
index 000000000000..8f5549b72720
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -0,0 +1,118 @@
+/* 
+ * Hop Limit modification target for ip6tables
+ * Maciej Soltysiak <solt@dns.toxicfilms.tv>
+ * Based on HW's TTL module
+ *
+ * This software is distributed under the terms of GNU GPL
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_HL.h>
+MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
+MODULE_DESCRIPTION("IP tables Hop Limit modification module");
+MODULE_LICENSE("GPL");
+static unsigned int ip6t_hl_target(struct sk_buff **pskb, 
+                                   const struct net_device *in,
+                                   const struct net_device *out,
+                                   unsigned int hooknum,
+                                   const void *targinfo, void *userinfo)
+{
+        struct ipv6hdr *ip6h;
+        const struct ip6t_HL_info *info = targinfo;
+        u_int16_t diffs[2];
+        int new_hl;
+        if (!skb_make_writable(pskb, (*pskb)->len))
+                return NF_DROP;
+        ip6h = (*pskb)->nh.ipv6h;
+        switch (info->mode) {
+                case IP6T_HL_SET:
+                        new_hl = info->hop_limit;
+                        break;
+                case IP6T_HL_INC:
+                        new_hl = ip6h->hop_limit + info->hop_limit;
+                        if (new_hl > 255)
+                                new_hl = 255;
+                        break;
+                case IP6T_HL_DEC:
+                        new_hl = ip6h->hop_limit - info->hop_limit;
+                        if (new_hl < 0)
+                                new_hl = 0;
+                        break;
+                default:
+                        new_hl = ip6h->hop_limit;
+                        break;
+        }
+        if (new_hl != ip6h->hop_limit) {
+                diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF;
+                ip6h->hop_limit = new_hl;
+                diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8);
+        }
+        return IP6T_CONTINUE;
+}
+static int ip6t_hl_checkentry(const char *tablename,
+                const struct ip6t_entry *e,
+                void *targinfo,
+                unsigned int targinfosize,
+                unsigned int hook_mask)
+{
+        struct ip6t_HL_info *info = targinfo;
+        if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_HL_info))) {
+                printk(KERN_WARNING "ip6t_HL: targinfosize %u != %Zu\n",
+                                targinfosize,
+                                IP6T_ALIGN(sizeof(struct ip6t_HL_info)));
+                return 0;       
+        }       
+        if (strcmp(tablename, "mangle")) {
+                printk(KERN_WARNING "ip6t_HL: can only be called from "
+                        "\"mangle\" table, not \"%s\"\n", tablename);
+                return 0;
+        }
+        if (info->mode > IP6T_HL_MAXMODE) {
+                printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", 
+                        info->mode);
+                return 0;
+        }
+        if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) {
+                printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't "
+                        "make sense with value 0\n");
+                return 0;
+        }
+        
+        return 1;
+}
+static struct ip6t_target ip6t_HL = { 
+        .name           = "HL", 
+        .target         = ip6t_hl_target, 
+        .checkentry     = ip6t_hl_checkentry, 
+        .me             = THIS_MODULE
+};
+static int __init init(void)
+{
+        return ip6t_register_target(&ip6t_HL);
+}
+static void __exit fini(void)
+{
+        ip6t_unregister_target(&ip6t_HL);
+}
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index c44685e391b7..0cd1d1bd9033 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -26,10 +26,6 @@ MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
 MODULE_DESCRIPTION("IP6 tables LOG target module");
 MODULE_LICENSE("GPL");
-static unsigned int nflog = 1;
-module_param(nflog, int, 0400);
-MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
- 
 struct in_device;
 #include <net/route.h>
 #include <linux/netfilter_ipv6/ip6t_LOG.h>
@@ -44,7 +40,7 @@ struct in_device;
 static DEFINE_SPINLOCK(log_lock);
 /* One level of recursion won't kill us */
-static void dump_packet(const struct ip6t_log_info *info,
+static void dump_packet(const struct nf_loginfo *info,
                        const struct sk_buff *skb, unsigned int ip6hoff,
                        int recurse)
 {
@@ -53,6 +49,12 @@ static void dump_packet(const struct ip6t_log_info *info,
        struct ipv6hdr _ip6h, *ih;
        unsigned int ptr;
        unsigned int hdrlen = 0;
+        unsigned int logflags;
+        if (info->type == NF_LOG_TYPE_LOG)
+                logflags = info->u.log.logflags;
+        else
+                logflags = NF_LOG_MASK;
        ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
        if (ih == NULL) {
@@ -84,7 +86,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                }
                /* Max length: 48 "OPT (...) " */
-                if (info->logflags & IP6T_LOG_IPOPT)
+                if (logflags & IP6T_LOG_IPOPT)
                        printk("OPT ( ");
                switch (currenthdr) {
@@ -119,7 +121,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                case IPPROTO_ROUTING:
                case IPPROTO_HOPOPTS:
                        if (fragment) {
-                                if (info->logflags & IP6T_LOG_IPOPT)
+                                if (logflags & IP6T_LOG_IPOPT)
                                        printk(")");
                                return;
                        }
@@ -127,7 +129,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                        break;
                /* Max Length */
                case IPPROTO_AH:
-                        if (info->logflags & IP6T_LOG_IPOPT) {
+                        if (logflags & IP6T_LOG_IPOPT) {
                                struct ip_auth_hdr _ahdr, *ah;
                                /* Max length: 3 "AH " */
@@ -158,7 +160,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                        hdrlen = (hp->hdrlen+2)<<2;
                        break;
                case IPPROTO_ESP:
-                        if (info->logflags & IP6T_LOG_IPOPT) {
+                        if (logflags & IP6T_LOG_IPOPT) {
                                struct ip_esp_hdr _esph, *eh;
                                /* Max length: 4 "ESP " */
@@ -190,7 +192,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                        printk("Unknown Ext Hdr %u", currenthdr);
                        return;
                }
-                if (info->logflags & IP6T_LOG_IPOPT)
+                if (logflags & IP6T_LOG_IPOPT)
                        printk(") ");
                currenthdr = hp->nexthdr;
@@ -218,7 +220,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                printk("SPT=%u DPT=%u ",
                       ntohs(th->source), ntohs(th->dest));
                /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
-                if (info->logflags & IP6T_LOG_TCPSEQ)
+                if (logflags & IP6T_LOG_TCPSEQ)
                        printk("SEQ=%u ACK=%u ",
                               ntohl(th->seq), ntohl(th->ack_seq));
                /* Max length: 13 "WINDOW=65535 " */
@@ -245,7 +247,7 @@ static void dump_packet(const struct ip6t_log_info *info,
                /* Max length: 11 "URGP=65535 " */
                printk("URGP=%u ", ntohs(th->urg_ptr));
-                if ((info->logflags & IP6T_LOG_TCPOPT)
+                if ((logflags & IP6T_LOG_TCPOPT)
                    && th->doff * 4 > sizeof(struct tcphdr)) {
                        u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
                        unsigned int i;
@@ -349,7 +351,7 @@ static void dump_packet(const struct ip6t_log_info *info,
        }
        /* Max length: 15 "UID=4294967295 " */
-        if ((info->logflags & IP6T_LOG_UID) && recurse && skb->sk) {
+        if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
                read_lock_bh(&skb->sk->sk_callback_lock);
                if (skb->sk->sk_socket && skb->sk->sk_socket->file)
                        printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
@@ -357,25 +359,38 @@ static void dump_packet(const struct ip6t_log_info *info,
        }
 }
+static struct nf_loginfo default_loginfo = {
+        .type   = NF_LOG_TYPE_LOG,
+        .u = {
+                .log = {
+                        .level    = 0,
+                        .logflags = NF_LOG_MASK,
+                },
+        },
+};
 static void
-ip6t_log_packet(unsigned int hooknum,
+ip6t_log_packet(unsigned int pf,
+                unsigned int hooknum,
                const struct sk_buff *skb,
                const struct net_device *in,
                const struct net_device *out,
-                const struct ip6t_log_info *loginfo,
+                const struct nf_loginfo *loginfo,
-                const char *level_string,
                const char *prefix)
 {
+        if (!loginfo)
+                loginfo = &default_loginfo;
        spin_lock_bh(&log_lock);
-        printk(level_string);
+        printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, 
-        printk("%sIN=%s OUT=%s ",
+                prefix,
-                prefix == NULL ? loginfo->prefix : prefix,
                in ? in->name : "",
                out ? out->name : "");
        if (in && !out) {
+                unsigned int len;
                /* MAC logging for input chain only. */
                printk("MAC=");
-                if (skb->dev && skb->dev->hard_header_len &&
+                if (skb->dev && (len = skb->dev->hard_header_len) &&
                    skb->mac.raw != skb->nh.raw) {
                        unsigned char *p = skb->mac.raw;
                        int i;
@@ -384,9 +399,11 @@ ip6t_log_packet(unsigned int hooknum,
                            (p -= ETH_HLEN) < skb->head)
                                p = NULL;
-                        if (p != NULL)
+                        if (p != NULL) {
-                                for (i = 0; i < skb->dev->hard_header_len; i++)
+                                for (i = 0; i < len; i++)
-                                        printk("%02x", p[i]);
+                                        printk("%02x%s", p[i],
+                                               i == len - 1 ? "" : ":");
+                        }
                        printk(" ");
                        if (skb->dev->type == ARPHRD_SIT) {
@@ -413,29 +430,17 @@ ip6t_log_target(struct sk_buff **pskb,
                void *userinfo)
 {
        const struct ip6t_log_info *loginfo = targinfo;
-        char level_string[4] = "< >";
+        struct nf_loginfo li;
+        li.type = NF_LOG_TYPE_LOG;
+        li.u.log.level = loginfo->level;
+        li.u.log.logflags = loginfo->logflags;
-        level_string[1] = '0' + (loginfo->level % 8);
+        nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix);
-        ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL);
        return IP6T_CONTINUE;
 }
-static void
-ip6t_logfn(unsigned int hooknum,
-           const struct sk_buff *skb,
-           const struct net_device *in,
-           const struct net_device *out,
-           const char *prefix)
-{
-        struct ip6t_log_info loginfo = {
-                .level = 0,
-                .logflags = IP6T_LOG_MASK,
-                .prefix = ""
-        };
-        ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix);
-}
 static int ip6t_log_checkentry(const char *tablename,
                               const struct ip6t_entry *e,
@@ -472,20 +477,29 @@ static struct ip6t_target ip6t_log_reg = {
        .me             = THIS_MODULE,
 };
+static struct nf_logger ip6t_logger = {
+        .name           = "ip6t_LOG",
+        .logfn          = &ip6t_log_packet,
+        .me             = THIS_MODULE,
+};
 static int __init init(void)
 {
        if (ip6t_register_target(&ip6t_log_reg))
                return -EINVAL;
-        if (nflog)
+        if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
-                nf_log_register(PF_INET6, &ip6t_logfn);
+                printk(KERN_WARNING "ip6t_LOG: not logging via system console "
+                       "since somebody else already registered for PF_INET6\n");
+                /* we cannot make module load fail here, since otherwise
+                 * ip6tables userspace would abort */
+        }
        return 0;
 }
 static void __exit fini(void)
 {
-        if (nflog)
+        nf_log_unregister_logger(&ip6t_logger);
-                nf_log_unregister(PF_INET6, &ip6t_logfn);
        ip6t_unregister_target(&ip6t_log_reg);
 }
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
index d09ceb05013a..81924fcc5857 100644
--- a/net/ipv6/netfilter/ip6t_MARK.c
+++ b/net/ipv6/netfilter/ip6t_MARK.c
@@ -28,10 +28,9 @@ target(struct sk_buff **pskb,
 {
        const struct ip6t_mark_target_info *markinfo = targinfo;
-        if((*pskb)->nfmark != markinfo->mark) {
+        if((*pskb)->nfmark != markinfo->mark)
                (*pskb)->nfmark = markinfo->mark;
-                (*pskb)->nfcache |= NFC_ALTERED;
-        }
        return IP6T_CONTINUE;
 }
diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c
new file mode 100644
index 000000000000..c6e3730e7409
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_NFQUEUE.c
@@ -0,0 +1,70 @@
+/* ip6tables module for using new netfilter netlink queue
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as 
+ * published by the Free Software Foundation.
+ * 
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv4/ipt_NFQUEUE.h>
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("ip6tables NFQUEUE target");
+MODULE_LICENSE("GPL");
+static unsigned int
+target(struct sk_buff **pskb,
+       const struct net_device *in,
+       const struct net_device *out,
+       unsigned int hooknum,
+       const void *targinfo,
+       void *userinfo)
+{
+        const struct ipt_NFQ_info *tinfo = targinfo;
+        return NF_QUEUE_NR(tinfo->queuenum);
+}
+static int
+checkentry(const char *tablename,
+           const struct ip6t_entry *e,
+           void *targinfo,
+           unsigned int targinfosize,
+           unsigned int hook_mask)
+{
+        if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) {
+                printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n",
+                       targinfosize,
+                       IP6T_ALIGN(sizeof(struct ipt_NFQ_info)));
+                return 0;
+        }
+        return 1;
+}
+static struct ip6t_target ipt_NFQ_reg = {
+        .name           = "NFQUEUE",
+        .target         = target,
+        .checkentry     = checkentry,
+        .me             = THIS_MODULE,
+};
+static int __init init(void)
+{
+        return ip6t_register_target(&ipt_NFQ_reg);
+}
+static void __exit fini(void)
+{
+        ip6t_unregister_target(&ipt_NFQ_reg);
+}
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
new file mode 100644
index 000000000000..14316c3ebde4
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -0,0 +1,284 @@
+/*
+ * IP6 tables REJECT target module
+ * Linux INET6 implementation
+ *
+ * Copyright (C)2003 USAGI/WIDE Project
+ *
+ * Authors:
+ *      Yasuyuki Kozakai        <yasuyuki.kozakai@toshiba.co.jp>
+ *
+ * Based on net/ipv4/netfilter/ipt_REJECT.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmpv6.h>
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <net/icmp.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/flow.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_REJECT.h>
+MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
+MODULE_DESCRIPTION("IP6 tables REJECT target module");
+MODULE_LICENSE("GPL");
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+/* Send RST reply */
+static void send_reset(struct sk_buff *oldskb)
+{
+        struct sk_buff *nskb;
+        struct tcphdr otcph, *tcph;
+        unsigned int otcplen, hh_len;
+        int tcphoff, needs_ack;
+        struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h;
+        struct dst_entry *dst = NULL;
+        u8 proto;
+        struct flowi fl;
+        if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
+            (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
+                DEBUGP("ip6t_REJECT: addr is not unicast.\n");
+                return;
+        }
+        proto = oip6h->nexthdr;
+        tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
+        if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
+                DEBUGP("ip6t_REJECT: Can't get TCP header.\n");
+                return;
+        }
+        otcplen = oldskb->len - tcphoff;
+        /* IP header checks: fragment, too short. */
+        if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) {
+                DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n",
+                        proto, otcplen);
+                return;
+        }
+        if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
+                BUG();
+        /* No RST for RST. */
+        if (otcph.rst) {
+                DEBUGP("ip6t_REJECT: RST is set\n");
+                return;
+        }
+        /* Check checksum. */
+        if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
+                            skb_checksum(oldskb, tcphoff, otcplen, 0))) {
+                DEBUGP("ip6t_REJECT: TCP checksum is invalid\n");
+                return;
+        }
+        memset(&fl, 0, sizeof(fl));
+        fl.proto = IPPROTO_TCP;
+        ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr);
+        ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
+        fl.fl_ip_sport = otcph.dest;
+        fl.fl_ip_dport = otcph.source;
+        dst = ip6_route_output(NULL, &fl);
+        if (dst == NULL)
+                return;
+        if (dst->error ||
+            xfrm_lookup(&dst, &fl, NULL, 0)) {
+                dst_release(dst);
+                return;
+        }
+        hh_len = (dst->dev->hard_header_len + 15)&~15;
+        nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
+                         + sizeof(struct tcphdr) + dst->trailer_len,
+                         GFP_ATOMIC);
+        if (!nskb) {
+                if (net_ratelimit())
+                        printk("ip6t_REJECT: Can't alloc skb\n");
+                dst_release(dst);
+                return;
+        }
+        nskb->dst = dst;
+        skb_reserve(nskb, hh_len + dst->header_len);
+        ip6h = nskb->nh.ipv6h = (struct ipv6hdr *)
+                                        skb_put(nskb, sizeof(struct ipv6hdr));
+        ip6h->version = 6;
+        ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
+        ip6h->nexthdr = IPPROTO_TCP;
+        ip6h->payload_len = htons(sizeof(struct tcphdr));
+        ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
+        ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
+        tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+        /* Truncate to length (no data) */
+        tcph->doff = sizeof(struct tcphdr)/4;
+        tcph->source = otcph.dest;
+        tcph->dest = otcph.source;
+        if (otcph.ack) {
+                needs_ack = 0;
+                tcph->seq = otcph.ack_seq;
+                tcph->ack_seq = 0;
+        } else {
+                needs_ack = 1;
+                tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
+                                      + otcplen - (otcph.doff<<2));
+                tcph->seq = 0;
+        }
+        /* Reset flags */
+        ((u_int8_t *)tcph)[13] = 0;
+        tcph->rst = 1;
+        tcph->ack = needs_ack;
+        tcph->window = 0;
+        tcph->urg_ptr = 0;
+        tcph->check = 0;
+        /* Adjust TCP checksum */
+        tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr,
+                                      &nskb->nh.ipv6h->daddr,
+                                      sizeof(struct tcphdr), IPPROTO_TCP,
+                                      csum_partial((char *)tcph,
+                                                   sizeof(struct tcphdr), 0));
+        NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
+                dst_output);
+}
+static inline void
+send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
+{
+        if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL)
+                skb_in->dev = &loopback_dev;
+        icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
+}
+static unsigned int reject6_target(struct sk_buff **pskb,
+                           const struct net_device *in,
+                           const struct net_device *out,
+                           unsigned int hooknum,
+                           const void *targinfo,
+                           void *userinfo)
+{
+        const struct ip6t_reject_info *reject = targinfo;
+        DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__);
+        /* WARNING: This code causes reentry within ip6tables.
+           This means that the ip6tables jump stack is now crap.  We
+           must return an absolute verdict. --RR */
+        switch (reject->with) {
+        case IP6T_ICMP6_NO_ROUTE:
+                send_unreach(*pskb, ICMPV6_NOROUTE, hooknum);
+                break;
+        case IP6T_ICMP6_ADM_PROHIBITED:
+                send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum);
+                break;
+        case IP6T_ICMP6_NOT_NEIGHBOUR:
+                send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum);
+                break;
+        case IP6T_ICMP6_ADDR_UNREACH:
+                send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum);
+                break;
+        case IP6T_ICMP6_PORT_UNREACH:
+                send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum);
+                break;
+        case IP6T_ICMP6_ECHOREPLY:
+                /* Do nothing */
+                break;
+        case IP6T_TCP_RESET:
+                send_reset(*pskb);
+                break;
+        default:
+                if (net_ratelimit())
+                        printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with);
+                break;
+        }
+        return NF_DROP;
+}
+static int check(const char *tablename,
+                 const struct ip6t_entry *e,
+                 void *targinfo,
+                 unsigned int targinfosize,
+                 unsigned int hook_mask)
+{
+        const struct ip6t_reject_info *rejinfo = targinfo;
+        if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) {
+                DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize);
+                return 0;
+        }
+        /* Only allow these for packet filtering. */
+        if (strcmp(tablename, "filter") != 0) {
+                DEBUGP("ip6t_REJECT: bad table `%s'.\n", tablename);
+                return 0;
+        }
+        if ((hook_mask & ~((1 << NF_IP6_LOCAL_IN)
+                           | (1 << NF_IP6_FORWARD)
+                           | (1 << NF_IP6_LOCAL_OUT))) != 0) {
+                DEBUGP("ip6t_REJECT: bad hook mask %X\n", hook_mask);
+                return 0;
+        }
+        if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
+                printk("ip6t_REJECT: ECHOREPLY is not supported.\n");
+                return 0;
+        } else if (rejinfo->with == IP6T_TCP_RESET) {
+                /* Must specify that it's a TCP packet */
+                if (e->ipv6.proto != IPPROTO_TCP
+                    || (e->ipv6.invflags & IP6T_INV_PROTO)) {
+                        DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
+                        return 0;
+                }
+        }
+        return 1;
+}
+static struct ip6t_target ip6t_reject_reg = {
+        .name           = "REJECT",
+        .target         = reject6_target,
+        .checkentry     = check,
+        .me             = THIS_MODULE
+};
+static int __init init(void)
+{
+        if (ip6t_register_target(&ip6t_reject_reg))
+                return -EINVAL;
+        return 0;
+}
+static void __exit fini(void)
+{
+        ip6t_unregister_target(&ip6t_reject_reg);
+}
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index ab0e32d3de46..9b91decbfddb 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -20,71 +20,6 @@ MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("IP6 tables owner matching module");
 MODULE_LICENSE("GPL");
-static int
-match_pid(const struct sk_buff *skb, pid_t pid)
-{
-        struct task_struct *p;
-        struct files_struct *files;
-        int i;
-        read_lock(&tasklist_lock);
-        p = find_task_by_pid(pid);
-        if (!p)
-                goto out;
-        task_lock(p);
-        files = p->files;
-        if(files) {
-                spin_lock(&files->file_lock);
-                for (i=0; i < files->max_fds; i++) {
-                        if (fcheck_files(files, i) == skb->sk->sk_socket->file) {
-                                spin_unlock(&files->file_lock);
-                                task_unlock(p);
-                                read_unlock(&tasklist_lock);
-                                return 1;
-                        }
-                }
-                spin_unlock(&files->file_lock);
-        }
-        task_unlock(p);
-out:
-        read_unlock(&tasklist_lock);
-        return 0;
-}
-static int
-match_sid(const struct sk_buff *skb, pid_t sid)
-{
-        struct task_struct *g, *p;
-        struct file *file = skb->sk->sk_socket->file;
-        int i, found=0;
-        read_lock(&tasklist_lock);
-        do_each_thread(g, p) {
-                struct files_struct *files;
-                if (p->signal->session != sid)
-                        continue;
-                task_lock(p);
-                files = p->files;
-                if (files) {
-                        spin_lock(&files->file_lock);
-                        for (i=0; i < files->max_fds; i++) {
-                                if (fcheck_files(files, i) == file) {
-                                        found = 1;
-                                        break;
-                                }
-                        }
-                        spin_unlock(&files->file_lock);
-                }
-                task_unlock(p);
-                if (found)
-                        goto out;
-        } while_each_thread(g, p);
-out:
-        read_unlock(&tasklist_lock);
-        return found;
-}
 static int
 match(const struct sk_buff *skb,
@@ -112,18 +47,6 @@ match(const struct sk_buff *skb,
                        return 0;
        }
-        if(info->match & IP6T_OWNER_PID) {
-                if (!match_pid(skb, info->pid) ^
-                    !!(info->invert & IP6T_OWNER_PID))
-                        return 0;
-        }
-        if(info->match & IP6T_OWNER_SID) {
-                if (!match_sid(skb, info->sid) ^
-                    !!(info->invert & IP6T_OWNER_SID))
-                        return 0;
-        }
        return 1;
 }
@@ -134,6 +57,8 @@ checkentry(const char *tablename,
           unsigned int matchsize,
           unsigned int hook_mask)
 {
+        const struct ip6t_owner_info *info = matchinfo;
        if (hook_mask
            & ~((1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING))) {
                printk("ip6t_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
@@ -142,14 +67,13 @@ checkentry(const char *tablename,
        if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_owner_info)))
                return 0;
-#ifdef CONFIG_SMP
-        /* files->file_lock can not be used in a BH */
+        if (info->match & (IP6T_OWNER_PID|IP6T_OWNER_SID)) {
-        if (((struct ip6t_owner_info *)matchinfo)->match
+                printk("ipt_owner: pid and sid matching "
-            & (IP6T_OWNER_PID|IP6T_OWNER_SID)) {
+                       "not supported anymore\n");
-                printk("ip6t_owner: pid and sid matching is broken on SMP.\n");
                return 0;
        }
-#endif
        return 1;
 }
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e2b848ec9851..ed3a76b30fd9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -49,6 +49,7 @@
 #include <net/transp_v6.h>
 #include <net/udp.h>
 #include <net/inet_common.h>
+#include <net/tcp_states.h>
 #include <net/rawv6.h>
 #include <net/xfrm.h>
@@ -81,7 +82,8 @@ static void raw_v6_unhash(struct sock *sk)
 /* Grumble... icmp and ip_input want to get at this... */
 struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
-                             struct in6_addr *loc_addr, struct in6_addr *rmt_addr)
+                             struct in6_addr *loc_addr, struct in6_addr *rmt_addr,
+                             int dif)
 {
        struct hlist_node *node;
        int is_multicast = ipv6_addr_is_multicast(loc_addr);
@@ -94,6 +96,9 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
                            !ipv6_addr_equal(&np->daddr, rmt_addr))
                                continue;
+                        if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+                                continue;
                        if (!ipv6_addr_any(&np->rcv_saddr)) {
                                if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
                                        goto found;
@@ -137,11 +142,12 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
 *
 *      Caller owns SKB so we must make clones.
 */
-void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
+int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 {
        struct in6_addr *saddr;
        struct in6_addr *daddr;
        struct sock *sk;
+        int delivered = 0;
        __u8 hash;
        saddr = &skb->nh.ipv6h->saddr;
@@ -160,9 +166,10 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
        if (sk == NULL)
                goto out;
-        sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr);
+        sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
        while (sk) {
+                delivered = 1;
                if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
                        struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
@@ -170,10 +177,12 @@ void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
                        if (clone)
                                rawv6_rcv(sk, clone);
                }
-                sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr);
+                sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr,
+                                     IP6CB(skb)->iif);
        }
 out:
        read_unlock(&raw_v6_lock);
+        return delivered;
 }
 /* This cleans up af_inet6 a bit. -DaveM */
@@ -328,12 +337,13 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
        if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
                if (skb->ip_summed == CHECKSUM_HW) {
+                        skb_postpull_rcsum(skb, skb->nh.raw,
+                                           skb->h.raw - skb->nh.raw);
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
                        if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
                                            &skb->nh.ipv6h->daddr,
                                            skb->len, inet->num, skb->csum)) {
-                                LIMIT_NETDEBUG(
+                                LIMIT_NETDEBUG(KERN_DEBUG "raw v6 hw csum failure.\n");
-                                printk(KERN_DEBUG "raw v6 hw csum failure.\n"));
                                skb->ip_summed = CHECKSUM_NONE;
                        }
                }
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 59e7c6317872..9d9e04344c77 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -562,7 +562,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
        if (skb->dev)
                fq->iif = skb->dev->ifindex;
        skb->dev = NULL;
-        fq->stamp = skb->stamp;
+        skb_get_timestamp(skb, &fq->stamp);
        fq->meat += skb->len;
        atomic_add(skb->truesize, &ip6_frag_mem);
@@ -664,7 +664,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
        head->next = NULL;
        head->dev = dev;
-        head->stamp = fq->stamp;
+        skb_set_timestamp(head, &fq->stamp);
        head->nh.ipv6h->payload_len = htons(payload_len);
        *skb_in = head;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 878789b3122d..5d5bbb49ec78 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1372,7 +1372,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 *      Drop the packet on the floor
 */
-int ip6_pkt_discard(struct sk_buff *skb)
+static int ip6_pkt_discard(struct sk_buff *skb)
 {
        IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
@@ -1380,7 +1380,7 @@ int ip6_pkt_discard(struct sk_buff *skb)
        return 0;
 }
-int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct sk_buff *skb)
 {
        skb->dev = skb->dst->dev;
        return ip6_pkt_discard(skb);
@@ -1850,16 +1850,16 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
        
        skb = alloc_skb(size, gfp_any());
        if (!skb) {
-                netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
                return;
        }
        if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
                kfree_skb(skb);
-                netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
+                netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
                return;
        }
-        NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
+        NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
-        netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
+        netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
 }
 /*
@@ -1960,8 +1960,6 @@ static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
        return arg.len;
 }
-extern struct rt6_statistics rt6_stats;
 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 {
        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b788f55e139b..c3123c9e1a8e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -195,7 +195,6 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
        dev_hold(dev);
        ipip6_tunnel_link(nt);
-        /* Do not decrement MOD_USE_COUNT here. */
        return nt;
 failed:
@@ -771,7 +770,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
        return 0;
 }
-int __init ipip6_fb_tunnel_init(struct net_device *dev)
+static int __init ipip6_fb_tunnel_init(struct net_device *dev)
 {
        struct ip_tunnel *tunnel = dev->priv;
        struct iphdr *iph = &tunnel->parms.iph;
@@ -794,10 +793,28 @@ static struct net_protocol sit_protocol = {
        .err_handler    =       ipip6_err,
 };
+static void __exit sit_destroy_tunnels(void)
+{
+        int prio;
+        for (prio = 1; prio < 4; prio++) {
+                int h;
+                for (h = 0; h < HASH_SIZE; h++) {
+                        struct ip_tunnel *t;
+                        while ((t = tunnels[prio][h]) != NULL)
+                                unregister_netdevice(t->dev);
+                }
+        }
+}
 void __exit sit_cleanup(void)
 {
        inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
-        unregister_netdev(ipip6_fb_tunnel_dev);
+        rtnl_lock();
+        sit_destroy_tunnels();
+        unregister_netdevice(ipip6_fb_tunnel_dev);
+        rtnl_unlock();
 }
 int __init sit_init(void)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 3a18e0e6ffed..8eff9fa1e983 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -14,9 +14,6 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
-extern ctl_table ipv6_route_table[];
-extern ctl_table ipv6_icmp_table[];
 #ifdef CONFIG_SYSCTL
 static ctl_table ipv6_table[] = {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f6e288dc116e..794734f1d230 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -47,6 +47,7 @@
 #include <net/tcp.h>
 #include <net/ndisc.h>
+#include <net/inet6_hashtables.h>
 #include <net/ipv6.h>
 #include <net/transp_v6.h>
 #include <net/addrconf.h>
@@ -75,34 +76,11 @@ static int	tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
 static struct tcp_func ipv6_mapped;
 static struct tcp_func ipv6_specific;
-/* I have no idea if this is a good hash for v6 or not. -DaveM */
+static inline int tcp_v6_bind_conflict(const struct sock *sk,
-static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
+                                       const struct inet_bind_bucket *tb)
-                                    struct in6_addr *faddr, u16 fport)
 {
-        int hashent = (lport ^ fport);
+        const struct sock *sk2;
+        const struct hlist_node *node;
-        hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
-        hashent ^= hashent>>16;
-        hashent ^= hashent>>8;
-        return (hashent & (tcp_ehash_size - 1));
-}
-static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
-{
-        struct inet_sock *inet = inet_sk(sk);
-        struct ipv6_pinfo *np = inet6_sk(sk);
-        struct in6_addr *laddr = &np->rcv_saddr;
-        struct in6_addr *faddr = &np->daddr;
-        __u16 lport = inet->num;
-        __u16 fport = inet->dport;
-        return tcp_v6_hashfn(laddr, lport, faddr, fport);
-}
-static inline int tcp_v6_bind_conflict(struct sock *sk,
-                                       struct tcp_bind_bucket *tb)
-{
-        struct sock *sk2;
-        struct hlist_node *node;
        /* We must walk the whole port owner list in this case. -DaveM */
        sk_for_each_bound(sk2, node, &tb->owners) {
@@ -126,8 +104,8 @@ static inline int tcp_v6_bind_conflict(struct sock *sk,
 */
 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 {
-        struct tcp_bind_hashbucket *head;
+        struct inet_bind_hashbucket *head;
-        struct tcp_bind_bucket *tb;
+        struct inet_bind_bucket *tb;
        struct hlist_node *node;
        int ret;
@@ -138,37 +116,42 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
                int remaining = (high - low) + 1;
                int rover;
-                spin_lock(&tcp_portalloc_lock);
+                spin_lock(&tcp_hashinfo.portalloc_lock);
-                if (tcp_port_rover < low)
+                if (tcp_hashinfo.port_rover < low)
                        rover = low;
                else
-                        rover = tcp_port_rover;
+                        rover = tcp_hashinfo.port_rover;
                do {    rover++;
                        if (rover > high)
                                rover = low;
-                        head = &tcp_bhash[tcp_bhashfn(rover)];
+                        head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
                        spin_lock(&head->lock);
-                        tb_for_each(tb, node, &head->chain)
+                        inet_bind_bucket_for_each(tb, node, &head->chain)
                                if (tb->port == rover)
                                        goto next;
                        break;
                next:
                        spin_unlock(&head->lock);
                } while (--remaining > 0);
-                tcp_port_rover = rover;
+                tcp_hashinfo.port_rover = rover;
-                spin_unlock(&tcp_portalloc_lock);
+                spin_unlock(&tcp_hashinfo.portalloc_lock);
-                /* Exhausted local port range during search? */
+                /* Exhausted local port range during search?  It is not
+                 * possible for us to be holding one of the bind hash
+                 * locks if this test triggers, because if 'remaining'
+                 * drops to zero, we broke out of the do/while loop at
+                 * the top level, not from the 'break;' statement.
+                 */
                ret = 1;
-                if (remaining <= 0)
+                if (unlikely(remaining <= 0))
                        goto fail;
                /* OK, here is the one we will use. */
                snum = rover;
        } else {
-                head = &tcp_bhash[tcp_bhashfn(snum)];
+                head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
                spin_lock(&head->lock);
-                tb_for_each(tb, node, &head->chain)
+                inet_bind_bucket_for_each(tb, node, &head->chain)
                        if (tb->port == snum)
                                goto tb_found;
        }
@@ -187,8 +170,11 @@ tb_found:
        }
 tb_not_found:
        ret = 1;
-        if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
+        if (tb == NULL) {
-                goto fail_unlock;
+                tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
+                if (tb == NULL)
+                        goto fail_unlock;
+        }
        if (hlist_empty(&tb->owners)) {
                if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
                        tb->fastreuse = 1;
@@ -199,9 +185,9 @@ tb_not_found:
                tb->fastreuse = 0;
 success:
-        if (!tcp_sk(sk)->bind_hash)
+        if (!inet_csk(sk)->icsk_bind_hash)
-                tcp_bind_hash(sk, tb, snum);
+                inet_bind_hash(sk, tb, snum);
-        BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
+        BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
        ret = 0;
 fail_unlock:
@@ -219,13 +205,13 @@ static __inline__ void __tcp_v6_hash(struct sock *sk)
        BUG_TRAP(sk_unhashed(sk));
        if (sk->sk_state == TCP_LISTEN) {
-                list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+                list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
-                lock = &tcp_lhash_lock;
+                lock = &tcp_hashinfo.lhash_lock;
-                tcp_listen_wlock();
+                inet_listen_wlock(&tcp_hashinfo);
        } else {
-                sk->sk_hashent = tcp_v6_sk_hashfn(sk);
+                sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
-                list = &tcp_ehash[sk->sk_hashent].chain;
+                list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
-                lock = &tcp_ehash[sk->sk_hashent].lock;
+                lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
                write_lock(lock);
        }
@@ -250,131 +236,11 @@ static void tcp_v6_hash(struct sock *sk)
        }
 }
-static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
-{
-        struct sock *sk;
-        struct hlist_node *node;
-        struct sock *result = NULL;
-        int score, hiscore;
-        hiscore=0;
-        read_lock(&tcp_lhash_lock);
-        sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
-                if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
-                        struct ipv6_pinfo *np = inet6_sk(sk);
-                        
-                        score = 1;
-                        if (!ipv6_addr_any(&np->rcv_saddr)) {
-                                if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
-                                        continue;
-                                score++;
-                        }
-                        if (sk->sk_bound_dev_if) {
-                                if (sk->sk_bound_dev_if != dif)
-                                        continue;
-                                score++;
-                        }
-                        if (score == 3) {
-                                result = sk;
-                                break;
-                        }
-                        if (score > hiscore) {
-                                hiscore = score;
-                                result = sk;
-                        }
-                }
-        }
-        if (result)
-                sock_hold(result);
-        read_unlock(&tcp_lhash_lock);
-        return result;
-}
-/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
- * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
- *
- * The sockhash lock must be held as a reader here.
- */
-static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
-                                                       struct in6_addr *daddr, u16 hnum,
-                                                       int dif)
-{
-        struct tcp_ehash_bucket *head;
-        struct sock *sk;
-        struct hlist_node *node;
-        __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
-        int hash;
-        /* Optimize here for direct hit, only listening connections can
-         * have wildcards anyways.
-         */
-        hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
-        head = &tcp_ehash[hash];
-        read_lock(&head->lock);
-        sk_for_each(sk, node, &head->chain) {
-                /* For IPV6 do the cheaper port and family tests first. */
-                if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
-                        goto hit; /* You sunk my battleship! */
-        }
-        /* Must check for a TIME_WAIT'er before going to listener hash. */
-        sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
-                /* FIXME: acme: check this... */
-                struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
-                if(*((__u32 *)&(tw->tw_dport))  == ports        &&
-                   sk->sk_family                == PF_INET6) {
-                        if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
-                           ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
-                           (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
-                                goto hit;
-                }
-        }
-        read_unlock(&head->lock);
-        return NULL;
-hit:
-        sock_hold(sk);
-        read_unlock(&head->lock);
-        return sk;
-}
-static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
-                                           struct in6_addr *daddr, u16 hnum,
-                                           int dif)
-{
-        struct sock *sk;
-        sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
-        if (sk)
-                return sk;
-        return tcp_v6_lookup_listener(daddr, hnum, dif);
-}
-inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
-                                  struct in6_addr *daddr, u16 dport,
-                                  int dif)
-{
-        struct sock *sk;
-        local_bh_disable();
-        sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
-        local_bh_enable();
-        return sk;
-}
-EXPORT_SYMBOL_GPL(tcp_v6_lookup);
 /*
 * Open request hash tables.
 */
-static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
+static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
 {
        u32 a, b, c;
@@ -394,14 +260,15 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
        return c & (TCP_SYNQ_HSIZE - 1);
 }
-static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
+static struct request_sock *tcp_v6_search_req(const struct sock *sk,
                                              struct request_sock ***prevp,
                                              __u16 rport,
                                              struct in6_addr *raddr,
                                              struct in6_addr *laddr,
                                              int iif)
 {
-        struct listen_sock *lopt = tp->accept_queue.listen_opt;
+        const struct inet_connection_sock *icsk = inet_csk(sk);
+        struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
        struct request_sock *req, **prev;  
        for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
@@ -446,44 +313,48 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
        }
 }
-static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
+static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
-                                      struct tcp_tw_bucket **twp)
+                                      struct inet_timewait_sock **twp)
 {
        struct inet_sock *inet = inet_sk(sk);
-        struct ipv6_pinfo *np = inet6_sk(sk);
+        const struct ipv6_pinfo *np = inet6_sk(sk);
-        struct in6_addr *daddr = &np->rcv_saddr;
+        const struct in6_addr *daddr = &np->rcv_saddr;
-        struct in6_addr *saddr = &np->daddr;
+        const struct in6_addr *saddr = &np->daddr;
-        int dif = sk->sk_bound_dev_if;
+        const int dif = sk->sk_bound_dev_if;
-        u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
+        const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-        int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
+        const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
-        struct tcp_ehash_bucket *head = &tcp_ehash[hash];
+                                       tcp_hashinfo.ehash_size);
+        struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
        struct sock *sk2;
-        struct hlist_node *node;
+        const struct hlist_node *node;
-        struct tcp_tw_bucket *tw;
+        struct inet_timewait_sock *tw;
        write_lock(&head->lock);
        /* Check TIME-WAIT sockets first. */
-        sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
+        sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-                tw = (struct tcp_tw_bucket*)sk2;
+                const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
+                tw = inet_twsk(sk2);
                if(*((__u32 *)&(tw->tw_dport))  == ports        &&
                   sk2->sk_family               == PF_INET6     &&
-                   ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
+                   ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
-                   ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
+                   ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)     &&
                   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+                        const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
                        struct tcp_sock *tp = tcp_sk(sk);
-                        if (tw->tw_ts_recent_stamp &&
+                        if (tcptw->tw_ts_recent_stamp &&
-                            (!twp || (sysctl_tcp_tw_reuse &&
+                            (!twp ||
-                                      xtime.tv_sec - 
+                             (sysctl_tcp_tw_reuse &&
-                                      tw->tw_ts_recent_stamp > 1))) {
+                              xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
                                /* See comment in tcp_ipv4.c */
-                                tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
+                                tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
                                if (!tp->write_seq)
                                        tp->write_seq = 1;
-                                tp->rx_opt.ts_recent = tw->tw_ts_recent;
+                                tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
-                                tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
+                                tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
                                sock_hold(sk2);
                                goto unique;
                        } else
@@ -494,7 +365,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
        /* And established part... */
        sk_for_each(sk2, node, &head->chain) {
-                if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
+                if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
                        goto not_unique;
        }
@@ -510,10 +381,10 @@ unique:
                NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
        } else if (tw) {
                /* Silly. Should hash-dance instead... */
-                tcp_tw_deschedule(tw);
+                inet_twsk_deschedule(tw, &tcp_death_row);
                NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-                tcp_tw_put(tw);
+                inet_twsk_put(tw);
        }
        return 0;
@@ -535,8 +406,8 @@ static inline u32 tcpv6_port_offset(const struct sock *sk)
 static int tcp_v6_hash_connect(struct sock *sk)
 {
        unsigned short snum = inet_sk(sk)->num;
-        struct tcp_bind_hashbucket *head;
+        struct inet_bind_hashbucket *head;
-        struct tcp_bind_bucket *tb;
+        struct inet_bind_bucket *tb;
        int ret;
        if (!snum) {
@@ -548,19 +419,19 @@ static int tcp_v6_hash_connect(struct sock *sk)
                static u32 hint;
                u32 offset = hint + tcpv6_port_offset(sk);
                struct hlist_node *node;
-                struct tcp_tw_bucket *tw = NULL;
+                struct inet_timewait_sock *tw = NULL;
                local_bh_disable();
                for (i = 1; i <= range; i++) {
                        port = low + (i + offset) % range;
-                        head = &tcp_bhash[tcp_bhashfn(port)];
+                        head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
                        spin_lock(&head->lock);
                        /* Does not bother with rcv_saddr checks,
                         * because the established check is already
                         * unique enough.
                         */
-                        tb_for_each(tb, node, &head->chain) {
+                        inet_bind_bucket_for_each(tb, node, &head->chain) {
                                if (tb->port == port) {
                                        BUG_TRAP(!hlist_empty(&tb->owners));
                                        if (tb->fastreuse >= 0)
@@ -573,7 +444,7 @@ static int tcp_v6_hash_connect(struct sock *sk)
                                }
                        }
-                        tb = tcp_bucket_create(head, port);
+                        tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
                        if (!tb) {
                                spin_unlock(&head->lock);
                                break;
@@ -592,7 +463,7 @@ ok:
                hint += i;
                /* Head lock still held and bh's disabled */
-                tcp_bind_hash(sk, tb, port);
+                inet_bind_hash(sk, tb, port);
                if (sk_unhashed(sk)) {
                        inet_sk(sk)->sport = htons(port);
                        __tcp_v6_hash(sk);
@@ -600,16 +471,16 @@ ok:
                spin_unlock(&head->lock);
                if (tw) {
-                        tcp_tw_deschedule(tw);
+                        inet_twsk_deschedule(tw, &tcp_death_row);
-                        tcp_tw_put(tw);
+                        inet_twsk_put(tw);
                }
                ret = 0;
                goto out;
        }
-        head  = &tcp_bhash[tcp_bhashfn(snum)];
+        head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
-        tb  = tcp_sk(sk)->bind_hash;
+        tb   = inet_csk(sk)->icsk_bind_hash;
        spin_lock_bh(&head->lock);
        if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
@@ -626,11 +497,6 @@ out:
        }
 }
-static __inline__ int tcp_v6_iif(struct sk_buff *skb)
-{
-        return IP6CB(skb)->iif;
-}
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
                          int addr_len)
 {
@@ -822,14 +688,15 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                int type, int code, int offset, __u32 info)
 {
        struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
-        struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
+        const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
        struct ipv6_pinfo *np;
        struct sock *sk;
        int err;
        struct tcp_sock *tp; 
        __u32 seq;
-        sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
+        sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
+                          th->source, skb->dev->ifindex);
        if (sk == NULL) {
                ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
@@ -837,7 +704,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        }
        if (sk->sk_state == TCP_TIME_WAIT) {
-                tcp_tw_put((struct tcp_tw_bucket*)sk);
+                inet_twsk_put((struct inet_timewait_sock *)sk);
                return;
        }
@@ -915,8 +782,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                if (sock_owned_by_user(sk))
                        goto out;
-                req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
+                req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
-                                        &hdr->saddr, tcp_v6_iif(skb));
+                                        &hdr->saddr, inet6_iif(skb));
                if (!req)
                        goto out;
@@ -930,7 +797,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                        goto out;
                }
-                tcp_synq_drop(sk, req, prev);
+                inet_csk_reqsk_queue_drop(sk, req, prev);
                goto out;
        case TCP_SYN_SENT:
@@ -1127,7 +994,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
                                    buff->csum);
        fl.proto = IPPROTO_TCP;
-        fl.oif = tcp_v6_iif(skb);
+        fl.oif = inet6_iif(skb);
        fl.fl_ip_dport = t1->dest;
        fl.fl_ip_sport = t1->source;
@@ -1196,7 +1063,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
                                    buff->csum);
        fl.proto = IPPROTO_TCP;
-        fl.oif = tcp_v6_iif(skb);
+        fl.oif = inet6_iif(skb);
        fl.fl_ip_dport = t1->dest;
        fl.fl_ip_sport = t1->source;
@@ -1215,12 +1082,14 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 {
-        struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
+        struct inet_timewait_sock *tw = inet_twsk(sk);
+        const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
-        tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
+        tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
-                        tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+                        tcptw->tw_ts_recent);
-        tcp_tw_put(tw);
+        inet_twsk_put(tw);
 }
 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
@@ -1232,28 +1101,25 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 {
        struct request_sock *req, **prev;
-        struct tcphdr *th = skb->h.th;
+        const struct tcphdr *th = skb->h.th;
-        struct tcp_sock *tp = tcp_sk(sk);
        struct sock *nsk;
        /* Find possible connection requests. */
-        req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
+        req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
-                                &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
+                                &skb->nh.ipv6h->daddr, inet6_iif(skb));
        if (req)
                return tcp_check_req(sk, skb, req, prev);
-        nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
+        nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
-                                          th->source,
+                                         th->source, &skb->nh.ipv6h->daddr,
-                                          &skb->nh.ipv6h->daddr,
+                                         ntohs(th->dest), inet6_iif(skb));
-                                          ntohs(th->dest),
-                                          tcp_v6_iif(skb));
        if (nsk) {
                if (nsk->sk_state != TCP_TIME_WAIT) {
                        bh_lock_sock(nsk);
                        return nsk;
                }
-                tcp_tw_put((struct tcp_tw_bucket*)nsk);
+                inet_twsk_put((struct inet_timewait_sock *)nsk);
                return NULL;
        }
@@ -1266,12 +1132,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
 {
-        struct tcp_sock *tp = tcp_sk(sk);
+        struct inet_connection_sock *icsk = inet_csk(sk);
-        struct listen_sock *lopt = tp->accept_queue.listen_opt;
+        struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-        u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
+        const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
-        reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
+        reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
-        tcp_synq_added(sk);
+        inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
 }
@@ -1296,13 +1162,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        /*
         *      There are no SYN attacks on IPv6, yet...        
         */
-        if (tcp_synq_is_full(sk) && !isn) {
+        if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
                if (net_ratelimit())
                        printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
                goto drop;              
        }
-        if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
                goto drop;
        req = reqsk_alloc(&tcp6_request_sock_ops);
@@ -1334,7 +1200,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        /* So that link locals have meaning */
        if (!sk->sk_bound_dev_if &&
            ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
-                treq->iif = tcp_v6_iif(skb);
+                treq->iif = inet6_iif(skb);
        if (isn == 0) 
                isn = tcp_v6_init_sequence(sk,skb);
@@ -1399,15 +1265,14 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
                newsk->sk_backlog_rcv = tcp_v4_do_rcv;
                newnp->pktoptions  = NULL;
                newnp->opt         = NULL;
-                newnp->mcast_oif   = tcp_v6_iif(skb);
+                newnp->mcast_oif   = inet6_iif(skb);
                newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
-                /* Charge newly allocated IPv6 socket. Though it is mapped,
+                /*
-                 * it is IPv6 yet.
+                 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
+                 * here, tcp_create_openreq_child now does this for us, see the comment in
+                 * that function for the gory details. -acme
                 */
-#ifdef INET_REFCNT_DEBUG
-                atomic_inc(&inet6_sock_nr);
-#endif
                /* It is tricky place. Until this moment IPv4 tcp
                   worked with IPv6 af_tcp.af_specific.
@@ -1462,10 +1327,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        if (newsk == NULL)
                goto out;
-        /* Charge newly allocated IPv6 socket */
+        /*
-#ifdef INET_REFCNT_DEBUG
+         * No need to charge this sock to the relevant IPv6 refcnt debug socks
-        atomic_inc(&inet6_sock_nr);
+         * count here, tcp_create_openreq_child now does this for us, see the
-#endif
+         * comment in that function for the gory details. -acme
+         */
        ip6_dst_store(newsk, dst, NULL);
        newsk->sk_route_caps = dst->dev->features &
@@ -1504,7 +1370,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
                        skb_set_owner_r(newnp->pktoptions, newsk);
        }
        newnp->opt        = NULL;
-        newnp->mcast_oif  = tcp_v6_iif(skb);
+        newnp->mcast_oif  = inet6_iif(skb);
        newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
        /* Clone native IPv6 options from listening socket (if any)
@@ -1531,7 +1397,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
        __tcp_v6_hash(newsk);
-        tcp_inherit_port(sk, newsk);
+        inet_inherit_port(&tcp_hashinfo, sk, newsk);
        return newsk;
@@ -1552,7 +1418,7 @@ static int tcp_v6_checksum_init(struct sk_buff *skb)
                if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
                                  &skb->nh.ipv6h->daddr,skb->csum))
                        return 0;
-                LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
+                LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
        }
        if (skb->len <= 76) {
                if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
@@ -1679,7 +1545,7 @@ ipv6_pktoptions:
        if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
            !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
                if (np->rxopt.bits.rxinfo)
-                        np->mcast_oif = tcp_v6_iif(opt_skb);
+                        np->mcast_oif = inet6_iif(opt_skb);
                if (np->rxopt.bits.rxhlim)
                        np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
                if (ipv6_opt_accepted(sk, opt_skb)) {
@@ -1734,8 +1600,9 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
        TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
        TCP_SKB_CB(skb)->sacked = 0;
-        sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
+        sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
-                             &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
+                            &skb->nh.ipv6h->daddr, ntohs(th->dest),
+                            inet6_iif(skb));
        if (!sk)
                goto no_tcp_socket;
@@ -1790,26 +1657,29 @@ discard_and_relse:
 do_time_wait:
        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-                tcp_tw_put((struct tcp_tw_bucket *) sk);
+                inet_twsk_put((struct inet_timewait_sock *)sk);
                goto discard_it;
        }
        if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
                TCP_INC_STATS_BH(TCP_MIB_INERRS);
-                tcp_tw_put((struct tcp_tw_bucket *) sk);
+                inet_twsk_put((struct inet_timewait_sock *)sk);
                goto discard_it;
        }
-        switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
+        switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
-                                          skb, th, skb->len)) {
+                                           skb, th)) {
        case TCP_TW_SYN:
        {
                struct sock *sk2;
-                sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
+                sk2 = inet6_lookup_listener(&tcp_hashinfo,
+                                            &skb->nh.ipv6h->daddr,
+                                            ntohs(th->dest), inet6_iif(skb));
                if (sk2 != NULL) {
-                        tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
+                        struct inet_timewait_sock *tw = inet_twsk(sk);
-                        tcp_tw_put((struct tcp_tw_bucket *)sk);
+                        inet_twsk_deschedule(tw, &tcp_death_row);
+                        inet_twsk_put(tw);
                        sk = sk2;
                        goto process;
                }
@@ -1978,7 +1848,7 @@ static struct tcp_func ipv6_specific = {
 static struct tcp_func ipv6_mapped = {
        .queue_xmit     =       ip_queue_xmit,
        .send_check     =       tcp_v4_send_check,
-        .rebuild_header =       tcp_v4_rebuild_header,
+        .rebuild_header =       inet_sk_rebuild_header,
        .conn_request   =       tcp_v6_conn_request,
        .syn_recv_sock  =       tcp_v6_syn_recv_sock,
        .remember_stamp =       tcp_v4_remember_stamp,
@@ -1997,13 +1867,14 @@ static struct tcp_func ipv6_mapped = {
 */
 static int tcp_v6_init_sock(struct sock *sk)
 {
+        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        skb_queue_head_init(&tp->out_of_order_queue);
        tcp_init_xmit_timers(sk);
        tcp_prequeue_init(tp);
-        tp->rto  = TCP_TIMEOUT_INIT;
+        icsk->icsk_rto = TCP_TIMEOUT_INIT;
        tp->mdev = TCP_TIMEOUT_INIT;
        /* So many TCP implementations out there (incorrectly) count the
@@ -2025,7 +1896,7 @@ static int tcp_v6_init_sock(struct sock *sk)
        sk->sk_state = TCP_CLOSE;
        tp->af_specific = &ipv6_specific;
-        tp->ca_ops = &tcp_init_congestion_ops;
+        icsk->icsk_ca_ops = &tcp_init_congestion_ops;
        sk->sk_write_space = sk_stream_write_space;
        sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
@@ -2039,8 +1910,6 @@ static int tcp_v6_init_sock(struct sock *sk)
 static int tcp_v6_destroy_sock(struct sock *sk)
 {
-        extern int tcp_v4_destroy_sock(struct sock *sk);
        tcp_v4_destroy_sock(sk);
        return inet6_destroy_sock(sk);
 }
@@ -2086,18 +1955,20 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
        unsigned long timer_expires;
        struct inet_sock *inet = inet_sk(sp);
        struct tcp_sock *tp = tcp_sk(sp);
+        const struct inet_connection_sock *icsk = inet_csk(sp);
        struct ipv6_pinfo *np = inet6_sk(sp);
        dest  = &np->daddr;
        src   = &np->rcv_saddr;
        destp = ntohs(inet->dport);
        srcp  = ntohs(inet->sport);
-        if (tp->pending == TCP_TIME_RETRANS) {
+        if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
                timer_active    = 1;
-                timer_expires   = tp->timeout;
+                timer_expires   = icsk->icsk_timeout;
-        } else if (tp->pending == TCP_TIME_PROBE0) {
+        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
                timer_active    = 4;
-                timer_expires   = tp->timeout;
+                timer_expires   = icsk->icsk_timeout;
        } else if (timer_pending(&sp->sk_timer)) {
                timer_active    = 2;
                timer_expires   = sp->sk_timer.expires;
@@ -2118,28 +1989,31 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
                   tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
                   timer_active,
                   jiffies_to_clock_t(timer_expires - jiffies),
-                   tp->retransmits,
+                   icsk->icsk_retransmits,
                   sock_i_uid(sp),
-                   tp->probes_out,
+                   icsk->icsk_probes_out,
                   sock_i_ino(sp),
                   atomic_read(&sp->sk_refcnt), sp,
-                   tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
+                   icsk->icsk_rto,
+                   icsk->icsk_ack.ato,
+                   (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
                   tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
                   );
 }
 static void get_timewait6_sock(struct seq_file *seq, 
-                               struct tcp_tw_bucket *tw, int i)
+                               struct inet_timewait_sock *tw, int i)
 {
        struct in6_addr *dest, *src;
        __u16 destp, srcp;
+        struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
        int ttd = tw->tw_ttd - jiffies;
        if (ttd < 0)
                ttd = 0;
-        dest  = &tw->tw_v6_daddr;
+        dest = &tcp6tw->tw_v6_daddr;
-        src   = &tw->tw_v6_rcv_saddr;
+        src  = &tcp6tw->tw_v6_rcv_saddr;
        destp = ntohs(tw->tw_dport);
        srcp  = ntohs(tw->tw_sport);
@@ -2214,7 +2088,7 @@ struct proto tcpv6_prot = {
        .close                  = tcp_close,
        .connect                = tcp_v6_connect,
        .disconnect             = tcp_disconnect,
-        .accept                 = tcp_accept,
+        .accept                 = inet_csk_accept,
        .ioctl                  = tcp_ioctl,
        .init                   = tcp_v6_init_sock,
        .destroy                = tcp_v6_destroy_sock,
@@ -2231,11 +2105,13 @@ struct proto tcpv6_prot = {
        .sockets_allocated      = &tcp_sockets_allocated,
        .memory_allocated       = &tcp_memory_allocated,
        .memory_pressure        = &tcp_memory_pressure,
+        .orphan_count           = &tcp_orphan_count,
        .sysctl_mem             = sysctl_tcp_mem,
        .sysctl_wmem            = sysctl_tcp_wmem,
        .sysctl_rmem            = sysctl_tcp_rmem,
        .max_header             = MAX_TCP_HEADER,
        .obj_size               = sizeof(struct tcp6_sock),
+        .twsk_obj_size          = sizeof(struct tcp6_timewait_sock),
        .rsk_prot               = &tcp6_request_sock_ops,
 };
@@ -2245,8 +2121,6 @@ static struct inet6_protocol tcpv6_protocol = {
        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
-extern struct proto_ops inet6_stream_ops;
 static struct inet_protosw tcpv6_protosw = {
        .type           =       SOCK_STREAM,
        .protocol       =       IPPROTO_TCP,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index eff050ac7049..390d750449ce 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -51,6 +51,7 @@
 #include <net/udp.h>
 #include <net/raw.h>
 #include <net/inet_common.h>
+#include <net/tcp_states.h>
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
@@ -58,7 +59,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
+DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
 /* Grrr, addr_type already calculated by caller, but I don't want
 * to add some silly "cookie" argument to this method just for that.
@@ -477,8 +478,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
                /* RFC 2460 section 8.1 says that we SHOULD log
                   this error. Well, it is reasonable.
                 */
-                LIMIT_NETDEBUG(
+                LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
-                        printk(KERN_INFO "IPv6: udp checksum is 0\n"));
                goto discard;
        }
@@ -493,7 +493,7 @@ static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
        if (skb->ip_summed==CHECKSUM_HW) {
                skb->ip_summed = CHECKSUM_UNNECESSARY;
                if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) {
-                        LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v6 hw csum failure.\n"));
+                        LIMIT_NETDEBUG(KERN_DEBUG "udp v6 hw csum failure.\n");
                        skb->ip_summed = CHECKSUM_NONE;
                }
        }
@@ -825,7 +825,7 @@ back_from_confirm:
                /* ... which is an evident application bug. --ANK */
                release_sock(sk);
-                LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n"));
+                LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
                err = -EINVAL;
                goto out;
        }
@@ -1054,8 +1054,6 @@ struct proto udpv6_prot = {
        .obj_size =     sizeof(struct udp6_sock),
 };
-extern struct proto_ops inet6_dgram_ops;
 static struct inet_protosw udpv6_protosw = {
        .type =      SOCK_DGRAM,
        .protocol =  IPPROTO_UDP,
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 60c26c87277e..fbef7826a74f 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -79,7 +79,7 @@ static u32 xfrm6_tunnel_spi;
 #define XFRM6_TUNNEL_SPI_MIN    1
 #define XFRM6_TUNNEL_SPI_MAX    0xffffffff
-static kmem_cache_t *xfrm6_tunnel_spi_kmem;
+static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly;
 #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
 #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig
index a16237c0e783..980a826f5d02 100644
--- a/net/ipx/Kconfig
+++ b/net/ipx/Kconfig
@@ -1,6 +1,39 @@
 #
 # IPX configuration
 #
+config IPX
+        tristate "The IPX protocol"
+        select LLC
+        ---help---
+          This is support for the Novell networking protocol, IPX, commonly
+          used for local networks of Windows machines.  You need it if you
+          want to access Novell NetWare file or print servers using the Linux
+          Novell client ncpfs (available from
+          <ftp://platan.vc.cvut.cz/pub/linux/ncpfs/>) or from
+          within the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO,
+          available from <http://www.tldp.org/docs.html#howto>).  In order
+          to do the former, you'll also have to say Y to "NCP file system
+          support", below.
+          IPX is similar in scope to IP, while SPX, which runs on top of IPX,
+          is similar to TCP. There is also experimental support for SPX in
+          Linux (see "SPX networking", below).
+          To turn your Linux box into a fully featured NetWare file server and
+          IPX router, say Y here and fetch either lwared from
+          <ftp://ibiblio.org/pub/Linux/system/network/daemons/> or
+          mars_nwe from <ftp://www.compu-art.de/mars_nwe/>. For more
+          information, read the IPX-HOWTO available from
+          <http://www.tldp.org/docs.html#howto>.
+          General information about how to connect Linux, Windows machines and
+          Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
+          The IPX driver would enlarge your kernel by about 16 KB. To compile
+          this driver as a module, choose M here: the module will be called ipx.
+          Unless you want to integrate your Linux box with a local Novell
+          network, say N.
 config IPX_INTERN
        bool "IPX: Full internal IPX network"
        depends on IPX
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 5a27e5df5886..34b3bb868409 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -44,7 +44,6 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/string.h>
-#include <linux/tcp.h>
 #include <linux/types.h>
 #include <linux/termios.h>
@@ -52,6 +51,7 @@
 #include <net/p8022.h>
 #include <net/psnap.h>
 #include <net/sock.h>
+#include <net/tcp_states.h>
 #include <asm/uaccess.h>
@@ -1627,7 +1627,7 @@ out:
        return rc;
 }
-static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        /* NULL here for pt means the packet was looped back */
        struct ipx_interface *intrfc;
@@ -1796,8 +1796,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
                                     copied);
        if (rc)
                goto out_free;
-        if (skb->stamp.tv_sec)
+        if (skb->tstamp.off_sec)
-                sk->sk_stamp = skb->stamp;
+                skb_get_timestamp(skb, &sk->sk_stamp);
        msg->msg_namelen = sizeof(*sipx);
@@ -1940,9 +1940,7 @@ static struct notifier_block ipx_dev_notifier = {
 };
 extern struct datalink_proto *make_EII_client(void);
-extern struct datalink_proto *make_8023_client(void);
 extern void destroy_EII_client(struct datalink_proto *);
-extern void destroy_8023_client(struct datalink_proto *);
 static unsigned char ipx_8022_type = 0xE0;
 static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 };
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index b6761913445a..1f73d9ea434d 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -10,7 +10,7 @@
 #include <linux/proc_fs.h>
 #include <linux/spinlock.h>
 #include <linux/seq_file.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
 #include <net/ipx.h>
 static __inline__ struct ipx_interface *ipx_get_interface_idx(loff_t pos)
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 92c6e8d4e731..6f92f9c62990 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -56,7 +56,7 @@
 #include <asm/uaccess.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <net/irda/af_irda.h>
diff --git a/net/irda/irlan/irlan_filter.c b/net/irda/irlan/irlan_filter.c
index 343c5d4a1a1d..ca7d358dab52 100644
--- a/net/irda/irlan/irlan_filter.c
+++ b/net/irda/irlan/irlan_filter.c
@@ -27,6 +27,7 @@
 #include <linux/seq_file.h>
 #include <net/irda/irlan_common.h>
+#include <net/irda/irlan_filter.h>
 /*
 * Function irlan_filter_request (self, skb)
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 6dafbb43b529..3e9a06abbdd0 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -988,9 +988,6 @@ void irlap_resend_rejected_frames(struct irlap_cb *self, int command)
                        IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__);
                        return;
                }
-                /* Unlink tx_skb from list */
-                tx_skb->next = tx_skb->prev = NULL;
-                tx_skb->list = NULL;
                /* Clear old Nr field + poll bit */
                tx_skb->data[1] &= 0x0f;
@@ -1063,9 +1060,6 @@ void irlap_resend_rejected_frame(struct irlap_cb *self, int command)
                        IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__);
                        return;
                }
-                /* Unlink tx_skb from list */
-                tx_skb->next = tx_skb->prev = NULL;
-                tx_skb->list = NULL;
                /* Clear old Nr field + poll bit */
                tx_skb->data[1] &= 0x0f;
@@ -1309,7 +1303,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
 * Jean II
 */
 int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
-                     struct packet_type *ptype)
+                     struct packet_type *ptype, struct net_device *orig_dev)
 {
        struct irlap_info info;
        struct irlap_cb *self;
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 7a4a4d7fbe66..c19e9ce05a3a 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -53,7 +53,6 @@ struct irlmp_cb *irlmp = NULL;
 /* These can be altered by the sysctl interface */
 int  sysctl_discovery         = 0;
 int  sysctl_discovery_timeout = 3; /* 3 seconds by default */
-EXPORT_SYMBOL(sysctl_discovery_timeout);
 int  sysctl_discovery_slots   = 6; /* 6 slots by default */
 int  sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ;
 char sysctl_devname[65];
@@ -67,7 +66,6 @@ const char *irlmp_reasons[] = {
        "LM_INIT_DISCONNECT",
        "ERROR, NOT USED",
 };
-EXPORT_SYMBOL(irlmp_reasons);
 /*
 * Function irlmp_init (void)
@@ -675,7 +673,6 @@ struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance)
        return new;
 }
-EXPORT_SYMBOL(irlmp_dup);
 /*
 * Function irlmp_disconnect_request (handle, userdata)
diff --git a/net/irda/irmod.c b/net/irda/irmod.c
index 6ffaed4544e9..634901dd156f 100644
--- a/net/irda/irmod.c
+++ b/net/irda/irmod.c
@@ -54,7 +54,7 @@ extern int  irsock_init(void);
 extern void irsock_cleanup(void);
 /* irlap_frame.c */
 extern int  irlap_driver_rcv(struct sk_buff *, struct net_device *, 
-                             struct packet_type *);
+                             struct packet_type *, struct net_device *);
 /*
 * Module parameters
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 9004f7349a76..b391cb3893d4 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -517,9 +517,6 @@ extern int
        irda_irnet_init(void);          /* Initialise IrDA part of IrNET */
 extern void
        irda_irnet_cleanup(void);       /* Teardown IrDA part of IrNET */
-/* ---------------------------- MODULE ---------------------------- */
-extern int
-        irnet_init(void);               /* Initialise IrNET module */
 /**************************** VARIABLES ****************************/
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index f8f984bb9922..e53bf9e0053e 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -1107,7 +1107,7 @@ ppp_irnet_cleanup(void)
 /*
 * Module main entry point
 */
-int __init
+static int __init
 irnet_init(void)
 {
  int err;
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index b0dd3ea35999..1ba8c7106639 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -822,7 +822,6 @@ void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name,
        return entry;
 }
-EXPORT_SYMBOL(hashbin_find_next);
 /*
 * Function hashbin_get_first (hashbin)
diff --git a/net/irda/qos.c b/net/irda/qos.c
index df732d56cc57..ddfb5c502a90 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -37,6 +37,7 @@
 #include <net/irda/parameters.h>
 #include <net/irda/qos.h>
 #include <net/irda/irlap.h>
+#include <net/irda/irlap_frame.h>
 /*
 * Maximum values of the baud rate we negociate with the other end.
diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig
new file mode 100644
index 000000000000..f0b5efb31a00
--- /dev/null
+++ b/net/lapb/Kconfig
@@ -0,0 +1,22 @@
+#
+# LAPB Data Link Drive
+#
+config LAPB
+        tristate "LAPB Data Link Driver (EXPERIMENTAL)"
+        depends on EXPERIMENTAL
+        ---help---
+          Link Access Procedure, Balanced (LAPB) is the data link layer (i.e.
+          the lower) part of the X.25 protocol. It offers a reliable
+          connection service to exchange data frames with one other host, and
+          it is used to transport higher level protocols (mostly X.25 Packet
+          Layer, the higher part of X.25, but others are possible as well).
+          Usually, LAPB is used with specialized X.21 network cards, but Linux
+          currently supports LAPB only over Ethernet connections. If you want
+          to use LAPB connections over Ethernet, say Y here and to "LAPB over
+          Ethernet driver" below. Read
+          <file:Documentation/networking/lapb-module.txt> for technical
+          details.
+          To compile this driver as a module, choose M here: the
+          module will be called lapb.  If unsure, say N.
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c
index 5de05a0bc0ff..8b5eefd70f03 100644
--- a/net/lapb/lapb_subr.c
+++ b/net/lapb/lapb_subr.c
@@ -78,7 +78,7 @@ void lapb_requeue_frames(struct lapb_cb *lapb)
                if (!skb_prev)
                        skb_queue_head(&lapb->write_queue, skb);
                else
-                        skb_append(skb_prev, skb);
+                        skb_append(skb_prev, skb, &lapb->write_queue);
                skb_prev = skb;
        }
 }
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 20b4cfebd74c..66f55e514b56 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -23,13 +23,13 @@
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/tcp.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <net/llc.h>
 #include <net/llc_sap.h>
 #include <net/llc_pdu.h>
 #include <net/llc_conn.h>
+#include <net/tcp_states.h>
 /* remember: uninitialized global data is zeroed because its in .bss */
 static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
@@ -714,7 +714,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
        if (uaddr)
                memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr));
        msg->msg_namelen = sizeof(*uaddr);
-        if (!skb->list) {
+        if (!skb->next) {
 dgram_free:
                kfree_skb(skb);
        }
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index eba812a9c69c..4c644bc70eae 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -16,7 +16,7 @@
 #include <net/llc_sap.h>
 #include <net/llc_conn.h>
 #include <net/sock.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
 #include <net/llc_c_ev.h>
 #include <net/llc_c_ac.h>
 #include <net/llc_c_st.h>
@@ -71,7 +71,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
        if (!ev->ind_prim && !ev->cfm_prim) {
                /* indicate or confirm not required */
-                if (!skb->list)
+                /* XXX this is not very pretty, perhaps we should store
+                 * XXX indicate/confirm-needed state in the llc_conn_state_ev
+                 * XXX control block of the SKB instead? -DaveM
+                 */
+                if (!skb->next)
                        goto out_kfree_skb;
                goto out_skb_put;
        }
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 5ff02c080a0b..9727455bf0e7 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -103,7 +103,8 @@ out:
 struct llc_sap *llc_sap_open(unsigned char lsap,
                             int (*func)(struct sk_buff *skb,
                                         struct net_device *dev,
-                                         struct packet_type *pt))
+                                         struct packet_type *pt,
+                                         struct net_device *orig_dev))
 {
        struct llc_sap *sap = llc_sap_find(lsap);
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index 0f9fc48aeaf9..0f84f66018e4 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -15,7 +15,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-#include <linux/tcp.h>
 #include <asm/errno.h>
 #include <net/llc_if.h>
 #include <net/llc_sap.h>
@@ -25,6 +24,7 @@
 #include <net/llc_c_ev.h>
 #include <net/llc_c_ac.h>
 #include <net/llc_c_st.h>
+#include <net/tcp_states.h>
 u8 llc_mac_null_var[IFHWADDRLEN];
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 4da6976efc9c..13b46240b7a1 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -132,7 +132,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
 *      data now), it queues this frame in the connection's backlog.
 */
 int llc_rcv(struct sk_buff *skb, struct net_device *dev,
-            struct packet_type *pt)
+            struct packet_type *pt, struct net_device *orig_dev)
 {
        struct llc_sap *sap;
        struct llc_pdu_sn *pdu;
@@ -165,7 +165,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
         * LLC functionality
         */
        if (sap->rcv_func) {
-                sap->rcv_func(skb, dev, pt);
+                sap->rcv_func(skb, dev, pt, orig_dev);
                goto out;
        }
        dest = llc_pdu_type(skb);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 965c94eb4bbc..34228ef14985 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -21,7 +21,7 @@
 #include <net/llc_s_ev.h>
 #include <net/llc_s_st.h>
 #include <net/sock.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
 #include <linux/llc.h>
 /**
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
new file mode 100644
index 000000000000..8296b38bf270
--- /dev/null
+++ b/net/netfilter/Kconfig
@@ -0,0 +1,24 @@
+config NETFILTER_NETLINK
+       tristate "Netfilter netlink interface"
+       help
+         If this option is enabled, the kernel will include support
+         for the new netfilter netlink interface.
+config NETFILTER_NETLINK_QUEUE
+        tristate "Netfilter NFQUEUE over NFNETLINK interface"
+        depends on NETFILTER_NETLINK
+        help
+          If this option isenabled, the kernel will include support
+          for queueing packets via NFNETLINK.
+          
+config NETFILTER_NETLINK_LOG
+        tristate "Netfilter LOG over NFNETLINK interface"
+        depends on NETFILTER_NETLINK
+        help
+          If this option is enabled, the kernel will include support
+          for logging packets via NFNETLINK.
+          This obsoletes the existing ipt_ULOG and ebg_ulog mechanisms,
+          and is also scheduled to replace the old syslog-based ipt_LOG
+          and ip6t_LOG modules.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
new file mode 100644
index 000000000000..b3b44f8b415a
--- /dev/null
+++ b/net/netfilter/Makefile
@@ -0,0 +1,7 @@
+netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
+obj-$(CONFIG_NETFILTER) = netfilter.o
+obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
+obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
+obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
new file mode 100644
index 000000000000..1ceb1a6c254b
--- /dev/null
+++ b/net/netfilter/core.c
@@ -0,0 +1,216 @@
+/* netfilter.c: look after the filters for various protocols. 
+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
+ *
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+ * Rusty Russell (C)2000 -- This code is GPL.
+ *
+ * February 2000: Modified by James Morris to have 1 queue per protocol.
+ * 15-Mar-2000:   Added NF_REPEAT --RR.
+ * 08-May-2003:   Internal logging interface added by Jozsef Kadlecsik.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include "nf_internals.h"
+/* In this code, we can be waiting indefinitely for userspace to
+ * service a packet if a hook returns NF_QUEUE.  We could keep a count
+ * of skbuffs queued for userspace, and not deregister a hook unless
+ * this is zero, but that sucks.  Now, we simply check when the
+ * packets come back: if the hook is gone, the packet is discarded. */
+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
+EXPORT_SYMBOL(nf_hooks);
+static DEFINE_SPINLOCK(nf_hook_lock);
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+        struct list_head *i;
+        spin_lock_bh(&nf_hook_lock);
+        list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
+                if (reg->priority < ((struct nf_hook_ops *)i)->priority)
+                        break;
+        }
+        list_add_rcu(&reg->list, i->prev);
+        spin_unlock_bh(&nf_hook_lock);
+        synchronize_net();
+        return 0;
+}
+EXPORT_SYMBOL(nf_register_hook);
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+        spin_lock_bh(&nf_hook_lock);
+        list_del_rcu(&reg->list);
+        spin_unlock_bh(&nf_hook_lock);
+        synchronize_net();
+}
+EXPORT_SYMBOL(nf_unregister_hook);
+unsigned int nf_iterate(struct list_head *head,
+                        struct sk_buff **skb,
+                        int hook,
+                        const struct net_device *indev,
+                        const struct net_device *outdev,
+                        struct list_head **i,
+                        int (*okfn)(struct sk_buff *),
+                        int hook_thresh)
+{
+        unsigned int verdict;
+        /*
+         * The caller must not block between calls to this
+         * function because of risk of continuing from deleted element.
+         */
+        list_for_each_continue_rcu(*i, head) {
+                struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
+                if (hook_thresh > elem->priority)
+                        continue;
+                /* Optimization: we don't need to hold module
+                   reference here, since function can't sleep. --RR */
+                verdict = elem->hook(hook, skb, indev, outdev, okfn);
+                if (verdict != NF_ACCEPT) {
+#ifdef CONFIG_NETFILTER_DEBUG
+                        if (unlikely((verdict & NF_VERDICT_MASK)
+                                                        > NF_MAX_VERDICT)) {
+                                NFDEBUG("Evil return from %p(%u).\n",
+                                        elem->hook, hook);
+                                continue;
+                        }
+#endif
+                        if (verdict != NF_REPEAT)
+                                return verdict;
+                        *i = (*i)->prev;
+                }
+        }
+        return NF_ACCEPT;
+}
+/* Returns 1 if okfn() needs to be executed by the caller,
+ * -EPERM for NF_DROP, 0 otherwise. */
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+                 struct net_device *indev,
+                 struct net_device *outdev,
+                 int (*okfn)(struct sk_buff *),
+                 int hook_thresh)
+{
+        struct list_head *elem;
+        unsigned int verdict;
+        int ret = 0;
+        /* We may already have this, but read-locks nest anyway */
+        rcu_read_lock();
+        elem = &nf_hooks[pf][hook];
+next_hook:
+        verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+                             outdev, &elem, okfn, hook_thresh);
+        if (verdict == NF_ACCEPT || verdict == NF_STOP) {
+                ret = 1;
+                goto unlock;
+        } else if (verdict == NF_DROP) {
+                kfree_skb(*pskb);
+                ret = -EPERM;
+        } else if ((verdict & NF_VERDICT_MASK)  == NF_QUEUE) {
+                NFDEBUG("nf_hook: Verdict = QUEUE.\n");
+                if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn,
+                              verdict >> NF_VERDICT_BITS))
+                        goto next_hook;
+        }
+unlock:
+        rcu_read_unlock();
+        return ret;
+}
+EXPORT_SYMBOL(nf_hook_slow);
+int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
+{
+        struct sk_buff *nskb;
+        if (writable_len > (*pskb)->len)
+                return 0;
+        /* Not exclusive use of packet?  Must copy. */
+        if (skb_shared(*pskb) || skb_cloned(*pskb))
+                goto copy_skb;
+        return pskb_may_pull(*pskb, writable_len);
+copy_skb:
+        nskb = skb_copy(*pskb, GFP_ATOMIC);
+        if (!nskb)
+                return 0;
+        BUG_ON(skb_is_nonlinear(nskb));
+        /* Rest of kernel will get very unhappy if we pass it a
+           suddenly-orphaned skbuff */
+        if ((*pskb)->sk)
+                skb_set_owner_w(nskb, (*pskb)->sk);
+        kfree_skb(*pskb);
+        *pskb = nskb;
+        return 1;
+}
+EXPORT_SYMBOL(skb_make_writable);
+/* This does not belong here, but locally generated errors need it if connection
+   tracking in use: without this, connection may not be in hash table, and hence
+   manufactured ICMP or RST packets will not be associated with it. */
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+EXPORT_SYMBOL(ip_ct_attach);
+void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
+{
+        void (*attach)(struct sk_buff *, struct sk_buff *);
+        if (skb->nfct && (attach = ip_ct_attach) != NULL) {
+                mb(); /* Just to be sure: must be read before executing this */
+                attach(new, skb);
+        }
+}
+EXPORT_SYMBOL(nf_ct_attach);
+#ifdef CONFIG_PROC_FS
+struct proc_dir_entry *proc_net_netfilter;
+EXPORT_SYMBOL(proc_net_netfilter);
+#endif
+void __init netfilter_init(void)
+{
+        int i, h;
+        for (i = 0; i < NPROTO; i++) {
+                for (h = 0; h < NF_MAX_HOOKS; h++)
+                        INIT_LIST_HEAD(&nf_hooks[i][h]);
+        }
+#ifdef CONFIG_PROC_FS
+        proc_net_netfilter = proc_mkdir("netfilter", proc_net);
+        if (!proc_net_netfilter)
+                panic("cannot create netfilter proc entry");
+#endif
+        if (netfilter_queue_init() < 0)
+                panic("cannot initialize nf_queue");
+        if (netfilter_log_init() < 0)
+                panic("cannot initialize nf_log");
+}
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
new file mode 100644
index 000000000000..6bdee2910617
--- /dev/null
+++ b/net/netfilter/nf_internals.h
@@ -0,0 +1,39 @@
+#ifndef _NF_INTERNALS_H
+#define _NF_INTERNALS_H
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#ifdef CONFIG_NETFILTER_DEBUG
+#define NFDEBUG(format, args...)  printk(format , ## args)
+#else
+#define NFDEBUG(format, args...)
+#endif
+/* core.c */
+extern unsigned int nf_iterate(struct list_head *head,
+                                struct sk_buff **skb,
+                                int hook,
+                                const struct net_device *indev,
+                                const struct net_device *outdev,
+                                struct list_head **i,
+                                int (*okfn)(struct sk_buff *),
+                                int hook_thresh);
+/* nf_queue.c */
+extern int nf_queue(struct sk_buff **skb, 
+                    struct list_head *elem, 
+                    int pf, unsigned int hook,
+                    struct net_device *indev,
+                    struct net_device *outdev,
+                    int (*okfn)(struct sk_buff *),
+                    unsigned int queuenum);
+extern int __init netfilter_queue_init(void);
+/* nf_log.c */
+extern int __init netfilter_log_init(void);
+#endif
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
new file mode 100644
index 000000000000..3e76bd0824a2
--- /dev/null
+++ b/net/netfilter/nf_log.c
@@ -0,0 +1,178 @@
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/seq_file.h>
+#include <net/protocol.h>
+#include "nf_internals.h"
+/* Internal logging interface, which relies on the real 
+   LOG target modules */
+#define NF_LOG_PREFIXLEN                128
+static struct nf_logger *nf_logging[NPROTO]; /* = NULL */
+static DEFINE_SPINLOCK(nf_log_lock);
+/* return EBUSY if somebody else is registered, EEXIST if the same logger
+ * is registred, 0 on success. */
+int nf_log_register(int pf, struct nf_logger *logger)
+{
+        int ret = -EBUSY;
+        if (pf >= NPROTO)
+                return -EINVAL;
+        /* Any setup of logging members must be done before
+         * substituting pointer. */
+        spin_lock(&nf_log_lock);
+        if (!nf_logging[pf]) {
+                rcu_assign_pointer(nf_logging[pf], logger);
+                ret = 0;
+        } else if (nf_logging[pf] == logger)
+                ret = -EEXIST;
+        spin_unlock(&nf_log_lock);
+        return ret;
+}               
+EXPORT_SYMBOL(nf_log_register);
+int nf_log_unregister_pf(int pf)
+{
+        if (pf >= NPROTO)
+                return -EINVAL;
+        spin_lock(&nf_log_lock);
+        nf_logging[pf] = NULL;
+        spin_unlock(&nf_log_lock);
+        /* Give time to concurrent readers. */
+        synchronize_net();
+        return 0;
+}
+EXPORT_SYMBOL(nf_log_unregister_pf);
+void nf_log_unregister_logger(struct nf_logger *logger)
+{
+        int i;
+        spin_lock(&nf_log_lock);
+        for (i = 0; i < NPROTO; i++) {
+                if (nf_logging[i] == logger)
+                        nf_logging[i] = NULL;
+        }
+        spin_unlock(&nf_log_lock);
+        synchronize_net();
+}
+EXPORT_SYMBOL(nf_log_unregister_logger);
+void nf_log_packet(int pf,
+                   unsigned int hooknum,
+                   const struct sk_buff *skb,
+                   const struct net_device *in,
+                   const struct net_device *out,
+                   struct nf_loginfo *loginfo,
+                   const char *fmt, ...)
+{
+        va_list args;
+        char prefix[NF_LOG_PREFIXLEN];
+        struct nf_logger *logger;
+        
+        rcu_read_lock();
+        logger = rcu_dereference(nf_logging[pf]);
+        if (logger) {
+                va_start(args, fmt);
+                vsnprintf(prefix, sizeof(prefix), fmt, args);
+                va_end(args);
+                /* We must read logging before nf_logfn[pf] */
+                logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
+        } else if (net_ratelimit()) {
+                printk(KERN_WARNING "nf_log_packet: can\'t log since "
+                       "no backend logging module loaded in! Please either "
+                       "load one, or disable logging explicitly\n");
+        }
+        rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_packet);
+#ifdef CONFIG_PROC_FS
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+        rcu_read_lock();
+        if (*pos >= NPROTO)
+                return NULL;
+        return pos;
+}
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+        (*pos)++;
+        if (*pos >= NPROTO)
+                return NULL;
+        return pos;
+}
+static void seq_stop(struct seq_file *s, void *v)
+{
+        rcu_read_unlock();
+}
+static int seq_show(struct seq_file *s, void *v)
+{
+        loff_t *pos = v;
+        const struct nf_logger *logger;
+        logger = rcu_dereference(nf_logging[*pos]);
+        if (!logger)
+                return seq_printf(s, "%2lld NONE\n", *pos);
+        
+        return seq_printf(s, "%2lld %s\n", *pos, logger->name);
+}
+static struct seq_operations nflog_seq_ops = {
+        .start  = seq_start,
+        .next   = seq_next,
+        .stop   = seq_stop,
+        .show   = seq_show,
+};
+static int nflog_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &nflog_seq_ops);
+}
+static struct file_operations nflog_file_ops = {
+        .owner   = THIS_MODULE,
+        .open    = nflog_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release,
+};
+#endif /* PROC_FS */
+int __init netfilter_log_init(void)
+{
+#ifdef CONFIG_PROC_FS
+        struct proc_dir_entry *pde;
+        pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter);
+        if (!pde)
+                return -1;
+        pde->proc_fops = &nflog_file_ops;
+#endif
+        return 0;
+}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
new file mode 100644
index 000000000000..d10d552d9c40
--- /dev/null
+++ b/net/netfilter/nf_queue.c
@@ -0,0 +1,343 @@
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/seq_file.h>
+#include <net/protocol.h>
+#include "nf_internals.h"
+/* 
+ * A queue handler may be registered for each protocol.  Each is protected by
+ * long term mutex.  The handler must provide an an outfn() to accept packets
+ * for queueing and must reinject all packets it receives, no matter what.
+ */
+static struct nf_queue_handler *queue_handler[NPROTO];
+static struct nf_queue_rerouter *queue_rerouter;
+static DEFINE_RWLOCK(queue_handler_lock);
+/* return EBUSY when somebody else is registered, return EEXIST if the
+ * same handler is registered, return 0 in case of success. */
+int nf_register_queue_handler(int pf, struct nf_queue_handler *qh)
+{      
+        int ret;
+        if (pf >= NPROTO)
+                return -EINVAL;
+        write_lock_bh(&queue_handler_lock);
+        if (queue_handler[pf] == qh)
+                ret = -EEXIST;
+        else if (queue_handler[pf])
+                ret = -EBUSY;
+        else {
+                queue_handler[pf] = qh;
+                ret = 0;
+        }
+        write_unlock_bh(&queue_handler_lock);
+        return ret;
+}
+EXPORT_SYMBOL(nf_register_queue_handler);
+/* The caller must flush their queue before this */
+int nf_unregister_queue_handler(int pf)
+{
+        if (pf >= NPROTO)
+                return -EINVAL;
+        write_lock_bh(&queue_handler_lock);
+        queue_handler[pf] = NULL;
+        write_unlock_bh(&queue_handler_lock);
+        
+        return 0;
+}
+EXPORT_SYMBOL(nf_unregister_queue_handler);
+int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer)
+{
+        if (pf >= NPROTO)
+                return -EINVAL;
+        write_lock_bh(&queue_handler_lock);
+        memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf]));
+        write_unlock_bh(&queue_handler_lock);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(nf_register_queue_rerouter);
+int nf_unregister_queue_rerouter(int pf)
+{
+        if (pf >= NPROTO)
+                return -EINVAL;
+        write_lock_bh(&queue_handler_lock);
+        memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf]));
+        write_unlock_bh(&queue_handler_lock);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter);
+void nf_unregister_queue_handlers(struct nf_queue_handler *qh)
+{
+        int pf;
+        write_lock_bh(&queue_handler_lock);
+        for (pf = 0; pf < NPROTO; pf++)  {
+                if (queue_handler[pf] == qh)
+                        queue_handler[pf] = NULL;
+        }
+        write_unlock_bh(&queue_handler_lock);
+}
+EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
+/* 
+ * Any packet that leaves via this function must come back 
+ * through nf_reinject().
+ */
+int nf_queue(struct sk_buff **skb, 
+             struct list_head *elem, 
+             int pf, unsigned int hook,
+             struct net_device *indev,
+             struct net_device *outdev,
+             int (*okfn)(struct sk_buff *),
+             unsigned int queuenum)
+{
+        int status;
+        struct nf_info *info;
+#ifdef CONFIG_BRIDGE_NETFILTER
+        struct net_device *physindev = NULL;
+        struct net_device *physoutdev = NULL;
+#endif
+        /* QUEUE == DROP if noone is waiting, to be safe. */
+        read_lock(&queue_handler_lock);
+        if (!queue_handler[pf]->outfn) {
+                read_unlock(&queue_handler_lock);
+                kfree_skb(*skb);
+                return 1;
+        }
+        info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC);
+        if (!info) {
+                if (net_ratelimit())
+                        printk(KERN_ERR "OOM queueing packet %p\n",
+                               *skb);
+                read_unlock(&queue_handler_lock);
+                kfree_skb(*skb);
+                return 1;
+        }
+        *info = (struct nf_info) { 
+                (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
+        /* If it's going away, ignore hook. */
+        if (!try_module_get(info->elem->owner)) {
+                read_unlock(&queue_handler_lock);
+                kfree(info);
+                return 0;
+        }
+        /* Bump dev refs so they don't vanish while packet is out */
+        if (indev) dev_hold(indev);
+        if (outdev) dev_hold(outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+        if ((*skb)->nf_bridge) {
+                physindev = (*skb)->nf_bridge->physindev;
+                if (physindev) dev_hold(physindev);
+                physoutdev = (*skb)->nf_bridge->physoutdev;
+                if (physoutdev) dev_hold(physoutdev);
+        }
+#endif
+        if (queue_rerouter[pf].save)
+                queue_rerouter[pf].save(*skb, info);
+        status = queue_handler[pf]->outfn(*skb, info, queuenum,
+                                          queue_handler[pf]->data);
+        if (status >= 0 && queue_rerouter[pf].reroute)
+                status = queue_rerouter[pf].reroute(skb, info);
+        read_unlock(&queue_handler_lock);
+        if (status < 0) {
+                /* James M doesn't say fuck enough. */
+                if (indev) dev_put(indev);
+                if (outdev) dev_put(outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+                if (physindev) dev_put(physindev);
+                if (physoutdev) dev_put(physoutdev);
+#endif
+                module_put(info->elem->owner);
+                kfree(info);
+                kfree_skb(*skb);
+                return 1;
+        }
+        return 1;
+}
+void nf_reinject(struct sk_buff *skb, struct nf_info *info,
+                 unsigned int verdict)
+{
+        struct list_head *elem = &info->elem->list;
+        struct list_head *i;
+        rcu_read_lock();
+        /* Release those devices we held, or Alexey will kill me. */
+        if (info->indev) dev_put(info->indev);
+        if (info->outdev) dev_put(info->outdev);
+#ifdef CONFIG_BRIDGE_NETFILTER
+        if (skb->nf_bridge) {
+                if (skb->nf_bridge->physindev)
+                        dev_put(skb->nf_bridge->physindev);
+                if (skb->nf_bridge->physoutdev)
+                        dev_put(skb->nf_bridge->physoutdev);
+        }
+#endif
+        /* Drop reference to owner of hook which queued us. */
+        module_put(info->elem->owner);
+        list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
+                if (i == elem) 
+                        break;
+        }
+  
+        if (elem == &nf_hooks[info->pf][info->hook]) {
+                /* The module which sent it to userspace is gone. */
+                NFDEBUG("%s: module disappeared, dropping packet.\n",
+                        __FUNCTION__);
+                verdict = NF_DROP;
+        }
+        /* Continue traversal iff userspace said ok... */
+        if (verdict == NF_REPEAT) {
+                elem = elem->prev;
+                verdict = NF_ACCEPT;
+        }
+        if (verdict == NF_ACCEPT) {
+        next_hook:
+                verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
+                                     &skb, info->hook, 
+                                     info->indev, info->outdev, &elem,
+                                     info->okfn, INT_MIN);
+        }
+        switch (verdict & NF_VERDICT_MASK) {
+        case NF_ACCEPT:
+                info->okfn(skb);
+                break;
+        case NF_QUEUE:
+                if (!nf_queue(&skb, elem, info->pf, info->hook, 
+                              info->indev, info->outdev, info->okfn,
+                              verdict >> NF_VERDICT_BITS))
+                        goto next_hook;
+                break;
+        }
+        rcu_read_unlock();
+        if (verdict == NF_DROP)
+                kfree_skb(skb);
+        kfree(info);
+        return;
+}
+EXPORT_SYMBOL(nf_reinject);
+#ifdef CONFIG_PROC_FS
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+        if (*pos >= NPROTO)
+                return NULL;
+        return pos;
+}
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+        (*pos)++;
+        if (*pos >= NPROTO)
+                return NULL;
+        return pos;
+}
+static void seq_stop(struct seq_file *s, void *v)
+{
+}
+static int seq_show(struct seq_file *s, void *v)
+{
+        int ret;
+        loff_t *pos = v;
+        struct nf_queue_handler *qh;
+        read_lock_bh(&queue_handler_lock);
+        qh = queue_handler[*pos];
+        if (!qh)
+                ret = seq_printf(s, "%2lld NONE\n", *pos);
+        else
+                ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
+        read_unlock_bh(&queue_handler_lock);
+        return ret;
+}
+static struct seq_operations nfqueue_seq_ops = {
+        .start  = seq_start,
+        .next   = seq_next,
+        .stop   = seq_stop,
+        .show   = seq_show,
+};
+static int nfqueue_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &nfqueue_seq_ops);
+}
+static struct file_operations nfqueue_file_ops = {
+        .owner   = THIS_MODULE,
+        .open    = nfqueue_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release,
+};
+#endif /* PROC_FS */
+int __init netfilter_queue_init(void)
+{
+#ifdef CONFIG_PROC_FS
+        struct proc_dir_entry *pde;
+#endif
+        queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter),
+                                 GFP_KERNEL);
+        if (!queue_rerouter)
+                return -ENOMEM;
+#ifdef CONFIG_PROC_FS
+        pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter);
+        if (!pde) {
+                kfree(queue_rerouter);
+                return -1;
+        }
+        pde->proc_fops = &nfqueue_file_ops;
+#endif
+        memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter));
+        return 0;
+}
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
new file mode 100644
index 000000000000..61a833a9caa6
--- /dev/null
+++ b/net/netfilter/nf_sockopt.c
@@ -0,0 +1,132 @@
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <net/sock.h>
+#include "nf_internals.h"
+/* Sockopts only registered and called from user context, so
+   net locking would be overkill.  Also, [gs]etsockopt calls may
+   sleep. */
+static DECLARE_MUTEX(nf_sockopt_mutex);
+static LIST_HEAD(nf_sockopts);
+/* Do exclusive ranges overlap? */
+static inline int overlap(int min1, int max1, int min2, int max2)
+{
+        return max1 > min2 && min1 < max2;
+}
+/* Functions to register sockopt ranges (exclusive). */
+int nf_register_sockopt(struct nf_sockopt_ops *reg)
+{
+        struct list_head *i;
+        int ret = 0;
+        if (down_interruptible(&nf_sockopt_mutex) != 0)
+                return -EINTR;
+        list_for_each(i, &nf_sockopts) {
+                struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
+                if (ops->pf == reg->pf
+                    && (overlap(ops->set_optmin, ops->set_optmax, 
+                                reg->set_optmin, reg->set_optmax)
+                        || overlap(ops->get_optmin, ops->get_optmax, 
+                                   reg->get_optmin, reg->get_optmax))) {
+                        NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+                                ops->set_optmin, ops->set_optmax, 
+                                ops->get_optmin, ops->get_optmax, 
+                                reg->set_optmin, reg->set_optmax,
+                                reg->get_optmin, reg->get_optmax);
+                        ret = -EBUSY;
+                        goto out;
+                }
+        }
+        list_add(&reg->list, &nf_sockopts);
+out:
+        up(&nf_sockopt_mutex);
+        return ret;
+}
+EXPORT_SYMBOL(nf_register_sockopt);
+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
+{
+        /* No point being interruptible: we're probably in cleanup_module() */
+ restart:
+        down(&nf_sockopt_mutex);
+        if (reg->use != 0) {
+                /* To be woken by nf_sockopt call... */
+                /* FIXME: Stuart Young's name appears gratuitously. */
+                set_current_state(TASK_UNINTERRUPTIBLE);
+                reg->cleanup_task = current;
+                up(&nf_sockopt_mutex);
+                schedule();
+                goto restart;
+        }
+        list_del(&reg->list);
+        up(&nf_sockopt_mutex);
+}
+EXPORT_SYMBOL(nf_unregister_sockopt);
+/* Call get/setsockopt() */
+static int nf_sockopt(struct sock *sk, int pf, int val, 
+                      char __user *opt, int *len, int get)
+{
+        struct list_head *i;
+        struct nf_sockopt_ops *ops;
+        int ret;
+        if (down_interruptible(&nf_sockopt_mutex) != 0)
+                return -EINTR;
+        list_for_each(i, &nf_sockopts) {
+                ops = (struct nf_sockopt_ops *)i;
+                if (ops->pf == pf) {
+                        if (get) {
+                                if (val >= ops->get_optmin
+                                    && val < ops->get_optmax) {
+                                        ops->use++;
+                                        up(&nf_sockopt_mutex);
+                                        ret = ops->get(sk, val, opt, len);
+                                        goto out;
+                                }
+                        } else {
+                                if (val >= ops->set_optmin
+                                    && val < ops->set_optmax) {
+                                        ops->use++;
+                                        up(&nf_sockopt_mutex);
+                                        ret = ops->set(sk, val, opt, *len);
+                                        goto out;
+                                }
+                        }
+                }
+        }
+        up(&nf_sockopt_mutex);
+        return -ENOPROTOOPT;
+        
+ out:
+        down(&nf_sockopt_mutex);
+        ops->use--;
+        if (ops->cleanup_task)
+                wake_up_process(ops->cleanup_task);
+        up(&nf_sockopt_mutex);
+        return ret;
+}
+int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
+                  int len)
+{
+        return nf_sockopt(sk, pf, val, opt, &len, 0);
+}
+EXPORT_SYMBOL(nf_setsockopt);
+int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
+{
+        return nf_sockopt(sk, pf, val, opt, len, 1);
+}
+EXPORT_SYMBOL(nf_getsockopt);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
new file mode 100644
index 000000000000..49a3900e3d32
--- /dev/null
+++ b/net/netfilter/nfnetlink.c
@@ -0,0 +1,376 @@
+/* Netfilter messages via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>,
+ * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * Initial netfilter messages via netlink development funded and
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/skbuff.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <net/sock.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/netfilter.h>
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
+static char __initdata nfversion[] = "0.30";
+#if 0
+#define DEBUGP(format, args...) \
+                printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \
+                        __LINE__, __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+static struct sock *nfnl = NULL;
+static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
+DECLARE_MUTEX(nfnl_sem);
+void nfnl_lock(void)
+{
+        nfnl_shlock();
+}
+void nfnl_unlock(void)
+{
+        nfnl_shunlock();
+}
+int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
+{
+        DEBUGP("registering subsystem ID %u\n", n->subsys_id);
+        nfnl_lock();
+        if (subsys_table[n->subsys_id]) {
+                nfnl_unlock();
+                return -EBUSY;
+        }
+        subsys_table[n->subsys_id] = n;
+        nfnl_unlock();
+        return 0;
+}
+int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n)
+{
+        DEBUGP("unregistering subsystem ID %u\n", n->subsys_id);
+        nfnl_lock();
+        subsys_table[n->subsys_id] = NULL;
+        nfnl_unlock();
+        return 0;
+}
+static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
+{
+        u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
+        if (subsys_id >= NFNL_SUBSYS_COUNT
+            || subsys_table[subsys_id] == NULL)
+                return NULL;
+        return subsys_table[subsys_id];
+}
+static inline struct nfnl_callback *
+nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss)
+{
+        u_int8_t cb_id = NFNL_MSG_TYPE(type);
+        
+        if (cb_id >= ss->cb_count) {
+                DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count);
+                return NULL;
+        }
+        return &ss->cb[cb_id];
+}
+void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen,
+                const void *data)
+{
+        struct nfattr *nfa;
+        int size = NFA_LENGTH(attrlen);
+        nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size));
+        nfa->nfa_type = attrtype;
+        nfa->nfa_len  = size;
+        memcpy(NFA_DATA(nfa), data, attrlen);
+        memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size);
+}
+int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
+{
+        memset(tb, 0, sizeof(struct nfattr *) * maxattr);
+        while (NFA_OK(nfa, len)) {
+                unsigned flavor = nfa->nfa_type;
+                if (flavor && flavor <= maxattr)
+                        tb[flavor-1] = nfa;
+                nfa = NFA_NEXT(nfa, len);
+        }
+        return 0;
+}
+/**
+ * nfnetlink_check_attributes - check and parse nfnetlink attributes
+ *
+ * subsys: nfnl subsystem for which this message is to be parsed
+ * nlmsghdr: netlink message to be checked/parsed
+ * cda: array of pointers, needs to be at least subsys->attr_count big
+ *
+ */
+static int
+nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
+                           struct nlmsghdr *nlh, struct nfattr *cda[])
+{
+        int min_len;
+        u_int16_t attr_count;
+        u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+        if (unlikely(cb_id >= subsys->cb_count)) {
+                DEBUGP("msgtype %u >= %u, returning\n",
+                        cb_id, subsys->cb_count);
+                return -EINVAL;
+        }
+        min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg));
+        if (unlikely(nlh->nlmsg_len < min_len))
+                return -EINVAL;
+        attr_count = subsys->cb[cb_id].attr_count;
+        memset(cda, 0, sizeof(struct nfattr *) * attr_count);
+        /* check attribute lengths. */
+        if (likely(nlh->nlmsg_len > min_len)) {
+                struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh));
+                int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
+                while (NFA_OK(attr, attrlen)) {
+                        unsigned flavor = attr->nfa_type;
+                        if (flavor) {
+                                if (flavor > attr_count)
+                                        return -EINVAL;
+                                cda[flavor - 1] = attr;
+                        }
+                        attr = NFA_NEXT(attr, attrlen);
+                }
+        }
+        /* implicit: if nlmsg_len == min_len, we return 0, and an empty
+         * (zeroed) cda[] array. The message is valid, but empty. */
+        return 0;
+}
+int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+{
+        int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
+        int err = 0;
+        NETLINK_CB(skb).dst_group = group;
+        if (echo)
+                atomic_inc(&skb->users);
+        netlink_broadcast(nfnl, skb, pid, group, allocation);
+        if (echo)
+                err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT);
+        return err;
+}
+int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
+{
+        return netlink_unicast(nfnl, skb, pid, flags);
+}
+/* Process one complete nfnetlink message. */
+static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
+                                    struct nlmsghdr *nlh, int *errp)
+{
+        struct nfnl_callback *nc;
+        struct nfnetlink_subsystem *ss;
+        int type, err = 0;
+        DEBUGP("entered; subsys=%u, msgtype=%u\n",
+                 NFNL_SUBSYS_ID(nlh->nlmsg_type),
+                 NFNL_MSG_TYPE(nlh->nlmsg_type));
+        /* Only requests are handled by kernel now. */
+        if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
+                DEBUGP("received non-request message\n");
+                return 0;
+        }
+        /* All the messages must at least contain nfgenmsg */
+        if (nlh->nlmsg_len < 
+                        NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) {
+                DEBUGP("received message was too short\n");
+                return 0;
+        }
+        type = nlh->nlmsg_type;
+        ss = nfnetlink_get_subsys(type);
+        if (!ss) {
+#ifdef CONFIG_KMOD
+                /* don't call nfnl_shunlock, since it would reenter
+                 * with further packet processing */
+                up(&nfnl_sem);
+                request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
+                nfnl_shlock();
+                ss = nfnetlink_get_subsys(type);
+                if (!ss)
+#endif
+                goto err_inval;
+        }
+        nc = nfnetlink_find_client(type, ss);
+        if (!nc) {
+                DEBUGP("unable to find client for type %d\n", type);
+                goto err_inval;
+        }
+        if (nc->cap_required && 
+            !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) {
+                DEBUGP("permission denied for type %d\n", type);
+                *errp = -EPERM;
+                return -1;
+        }
+        {
+                u_int16_t attr_count = 
+                        ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count;
+                struct nfattr *cda[attr_count];
+                memset(cda, 0, sizeof(struct nfattr *) * attr_count);
+                
+                err = nfnetlink_check_attributes(ss, nlh, cda);
+                if (err < 0)
+                        goto err_inval;
+                DEBUGP("calling handler\n");
+                err = nc->call(nfnl, skb, nlh, cda, errp);
+                *errp = err;
+                return err;
+        }
+err_inval:
+        DEBUGP("returning -EINVAL\n");
+        *errp = -EINVAL;
+        return -1;
+}
+/* Process one packet of messages. */
+static inline int nfnetlink_rcv_skb(struct sk_buff *skb)
+{
+        int err;
+        struct nlmsghdr *nlh;
+        while (skb->len >= NLMSG_SPACE(0)) {
+                u32 rlen;
+                nlh = (struct nlmsghdr *)skb->data;
+                if (nlh->nlmsg_len < sizeof(struct nlmsghdr)
+                    || skb->len < nlh->nlmsg_len)
+                        return 0;
+                rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+                if (rlen > skb->len)
+                        rlen = skb->len;
+                if (nfnetlink_rcv_msg(skb, nlh, &err)) {
+                        if (!err)
+                                return -1;
+                        netlink_ack(skb, nlh, err);
+                } else
+                        if (nlh->nlmsg_flags & NLM_F_ACK)
+                                netlink_ack(skb, nlh, 0);
+                skb_pull(skb, rlen);
+        }
+        return 0;
+}
+static void nfnetlink_rcv(struct sock *sk, int len)
+{
+        do {
+                struct sk_buff *skb;
+                if (nfnl_shlock_nowait())
+                        return;
+                while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+                        if (nfnetlink_rcv_skb(skb)) {
+                                if (skb->len)
+                                        skb_queue_head(&sk->sk_receive_queue,
+                                                       skb);
+                                else
+                                        kfree_skb(skb);
+                                break;
+                        }
+                        kfree_skb(skb);
+                }
+                /* don't call nfnl_shunlock, since it would reenter
+                 * with further packet processing */
+                up(&nfnl_sem);
+        } while(nfnl && nfnl->sk_receive_queue.qlen);
+}
+static void __exit nfnetlink_exit(void)
+{
+        printk("Removing netfilter NETLINK layer.\n");
+        sock_release(nfnl->sk_socket);
+        return;
+}
+static int __init nfnetlink_init(void)
+{
+        printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+        nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
+                                     nfnetlink_rcv, THIS_MODULE);
+        if (!nfnl) {
+                printk(KERN_ERR "cannot initialize nfnetlink!\n");
+                return -1;
+        }
+        return 0;
+}
+module_init(nfnetlink_init);
+module_exit(nfnetlink_exit);
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
+EXPORT_SYMBOL_GPL(nfnetlink_send);
+EXPORT_SYMBOL_GPL(nfnetlink_unicast);
+EXPORT_SYMBOL_GPL(nfattr_parse);
+EXPORT_SYMBOL_GPL(__nfa_fill);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
new file mode 100644
index 000000000000..ff5601ceedcb
--- /dev/null
+++ b/net/netfilter/nfnetlink_log.c
@@ -0,0 +1,1055 @@
+/*
+ * This is a module which is used for logging packets to userspace via
+ * nfetlink.
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * Based on the old ipv4-only ipt_ULOG.c:
+ * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_log.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+#include <net/sock.h>
+#include <asm/atomic.h>
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include "../bridge/br_private.h"
+#endif
+#define NFULNL_NLBUFSIZ_DEFAULT 4096
+#define NFULNL_TIMEOUT_DEFAULT  100     /* every second */
+#define NFULNL_QTHRESH_DEFAULT  100     /* 100 packets */
+#define PRINTR(x, args...)      do { if (net_ratelimit()) \
+                                     printk(x, ## args); } while (0);
+#if 0
+#define UDEBUG(x, args ...)     printk(KERN_DEBUG "%s(%d):%s(): " x,       \
+                                        __FILE__, __LINE__, __FUNCTION__,  \
+                                        ## args)
+#else
+#define UDEBUG(x, ...)
+#endif
+struct nfulnl_instance {
+        struct hlist_node hlist;        /* global list of instances */
+        spinlock_t lock;
+        atomic_t use;                   /* use count */
+        unsigned int qlen;              /* number of nlmsgs in skb */
+        struct sk_buff *skb;            /* pre-allocatd skb */
+        struct nlmsghdr *lastnlh;       /* netlink header of last msg in skb */
+        struct timer_list timer;
+        int peer_pid;                   /* PID of the peer process */
+        /* configurable parameters */
+        unsigned int flushtimeout;      /* timeout until queue flush */
+        unsigned int nlbufsiz;          /* netlink buffer allocation size */
+        unsigned int qthreshold;        /* threshold of the queue */
+        u_int32_t copy_range;
+        u_int16_t group_num;            /* number of this queue */
+        u_int8_t copy_mode;     
+};
+static DEFINE_RWLOCK(instances_lock);
+#define INSTANCE_BUCKETS        16
+static struct hlist_head instance_table[INSTANCE_BUCKETS];
+static unsigned int hash_init;
+static inline u_int8_t instance_hashfn(u_int16_t group_num)
+{
+        return ((group_num & 0xff) % INSTANCE_BUCKETS);
+}
+static struct nfulnl_instance *
+__instance_lookup(u_int16_t group_num)
+{
+        struct hlist_head *head;
+        struct hlist_node *pos;
+        struct nfulnl_instance *inst;
+        UDEBUG("entering (group_num=%u)\n", group_num);
+        head = &instance_table[instance_hashfn(group_num)];
+        hlist_for_each_entry(inst, pos, head, hlist) {
+                if (inst->group_num == group_num)
+                        return inst;
+        }
+        return NULL;
+}
+static inline void
+instance_get(struct nfulnl_instance *inst)
+{
+        atomic_inc(&inst->use);
+}
+static struct nfulnl_instance *
+instance_lookup_get(u_int16_t group_num)
+{
+        struct nfulnl_instance *inst;
+        read_lock_bh(&instances_lock);
+        inst = __instance_lookup(group_num);
+        if (inst)
+                instance_get(inst);
+        read_unlock_bh(&instances_lock);
+        return inst;
+}
+static void
+instance_put(struct nfulnl_instance *inst)
+{
+        if (inst && atomic_dec_and_test(&inst->use)) {
+                UDEBUG("kfree(inst=%p)\n", inst);
+                kfree(inst);
+        }
+}
+static void nfulnl_timer(unsigned long data);
+static struct nfulnl_instance *
+instance_create(u_int16_t group_num, int pid)
+{
+        struct nfulnl_instance *inst;
+        UDEBUG("entering (group_num=%u, pid=%d)\n", group_num,
+                pid);
+        write_lock_bh(&instances_lock); 
+        if (__instance_lookup(group_num)) {
+                inst = NULL;
+                UDEBUG("aborting, instance already exists\n");
+                goto out_unlock;
+        }
+        inst = kmalloc(sizeof(*inst), GFP_ATOMIC);
+        if (!inst)
+                goto out_unlock;
+        memset(inst, 0, sizeof(*inst));
+        INIT_HLIST_NODE(&inst->hlist);
+        inst->lock = SPIN_LOCK_UNLOCKED;
+        /* needs to be two, since we _put() after creation */
+        atomic_set(&inst->use, 2);
+        init_timer(&inst->timer);
+        inst->timer.function = nfulnl_timer;
+        inst->timer.data = (unsigned long)inst;
+        /* don't start timer yet. (re)start it  with every packet */
+        inst->peer_pid = pid;
+        inst->group_num = group_num;
+        inst->qthreshold        = NFULNL_QTHRESH_DEFAULT;
+        inst->flushtimeout      = NFULNL_TIMEOUT_DEFAULT;
+        inst->nlbufsiz          = NFULNL_NLBUFSIZ_DEFAULT;
+        inst->copy_mode         = NFULNL_COPY_PACKET;
+        inst->copy_range        = 0xffff;
+        if (!try_module_get(THIS_MODULE))
+                goto out_free;
+        hlist_add_head(&inst->hlist, 
+                       &instance_table[instance_hashfn(group_num)]);
+        UDEBUG("newly added node: %p, next=%p\n", &inst->hlist, 
+                inst->hlist.next);
+        write_unlock_bh(&instances_lock);
+        return inst;
+out_free:
+        instance_put(inst);
+out_unlock:
+        write_unlock_bh(&instances_lock);
+        return NULL;
+}
+static int __nfulnl_send(struct nfulnl_instance *inst);
+static void
+_instance_destroy2(struct nfulnl_instance *inst, int lock)
+{
+        /* first pull it out of the global list */
+        if (lock)
+                write_lock_bh(&instances_lock);
+        UDEBUG("removing instance %p (queuenum=%u) from hash\n",
+                inst, inst->group_num);
+        hlist_del(&inst->hlist);
+        if (lock)
+                write_unlock_bh(&instances_lock);
+        /* then flush all pending packets from skb */
+        spin_lock_bh(&inst->lock);
+        if (inst->skb) {
+                if (inst->qlen)
+                        __nfulnl_send(inst);
+                if (inst->skb) {
+                        kfree_skb(inst->skb);
+                        inst->skb = NULL;
+                }
+        }
+        spin_unlock_bh(&inst->lock);
+        /* and finally put the refcount */
+        instance_put(inst);
+        module_put(THIS_MODULE);
+}
+static inline void
+__instance_destroy(struct nfulnl_instance *inst)
+{
+        _instance_destroy2(inst, 0);
+}
+static inline void
+instance_destroy(struct nfulnl_instance *inst)
+{
+        _instance_destroy2(inst, 1);
+}
+static int
+nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
+                  unsigned int range)
+{
+        int status = 0;
+        spin_lock_bh(&inst->lock);
+        
+        switch (mode) {
+        case NFULNL_COPY_NONE:
+        case NFULNL_COPY_META:
+                inst->copy_mode = mode;
+                inst->copy_range = 0;
+                break;
+                
+        case NFULNL_COPY_PACKET:
+                inst->copy_mode = mode;
+                /* we're using struct nfattr which has 16bit nfa_len */
+                if (range > 0xffff)
+                        inst->copy_range = 0xffff;
+                else
+                        inst->copy_range = range;
+                break;
+                
+        default:
+                status = -EINVAL;
+                break;
+        }
+        spin_unlock_bh(&inst->lock);
+        return status;
+}
+static int
+nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz)
+{
+        int status;
+        spin_lock_bh(&inst->lock);
+        if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT)
+                status = -ERANGE;
+        else if (nlbufsiz > 131072)
+                status = -ERANGE;
+        else {
+                inst->nlbufsiz = nlbufsiz;
+                status = 0;
+        }
+        spin_unlock_bh(&inst->lock);
+        return status;
+}
+static int
+nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
+{
+        spin_lock_bh(&inst->lock);
+        inst->flushtimeout = timeout;
+        spin_unlock_bh(&inst->lock);
+        return 0;
+}
+static int
+nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
+{
+        spin_lock_bh(&inst->lock);
+        inst->qthreshold = qthresh;
+        spin_unlock_bh(&inst->lock);
+        return 0;
+}
+static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, 
+                                        unsigned int pkt_size)
+{
+        struct sk_buff *skb;
+        UDEBUG("entered (%u, %u)\n", inst_size, pkt_size);
+        /* alloc skb which should be big enough for a whole multipart
+         * message.  WARNING: has to be <= 128k due to slab restrictions */
+        skb = alloc_skb(inst_size, GFP_ATOMIC);
+        if (!skb) {
+                PRINTR("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
+                        inst_size);
+                /* try to allocate only as much as we need for current
+                 * packet */
+                skb = alloc_skb(pkt_size, GFP_ATOMIC);
+                if (!skb)
+                        PRINTR("nfnetlink_log: can't even alloc %u bytes\n",
+                                pkt_size);
+        }
+        return skb;
+}
+static int
+__nfulnl_send(struct nfulnl_instance *inst)
+{
+        int status;
+        if (timer_pending(&inst->timer))
+                del_timer(&inst->timer);
+        if (inst->qlen > 1)
+                inst->lastnlh->nlmsg_type = NLMSG_DONE;
+        status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
+        if (status < 0) {
+                UDEBUG("netlink_unicast() failed\n");
+                /* FIXME: statistics */
+        }
+        inst->qlen = 0;
+        inst->skb = NULL;
+        inst->lastnlh = NULL;
+        return status;
+}
+static void nfulnl_timer(unsigned long data)
+{
+        struct nfulnl_instance *inst = (struct nfulnl_instance *)data; 
+        UDEBUG("timer function called, flushing buffer\n");
+        spin_lock_bh(&inst->lock);
+        __nfulnl_send(inst);
+        instance_put(inst);
+        spin_unlock_bh(&inst->lock);
+}
+static inline int 
+__build_packet_message(struct nfulnl_instance *inst,
+                        const struct sk_buff *skb, 
+                        unsigned int data_len,
+                        unsigned int pf,
+                        unsigned int hooknum,
+                        const struct net_device *indev,
+                        const struct net_device *outdev,
+                        const struct nf_loginfo *li,
+                        const char *prefix)
+{
+        unsigned char *old_tail;
+        struct nfulnl_msg_packet_hdr pmsg;
+        struct nlmsghdr *nlh;
+        struct nfgenmsg *nfmsg;
+        u_int32_t tmp_uint;
+        UDEBUG("entered\n");
+                
+        old_tail = inst->skb->tail;
+        nlh = NLMSG_PUT(inst->skb, 0, 0, 
+                        NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
+                        sizeof(struct nfgenmsg));
+        nfmsg = NLMSG_DATA(nlh);
+        nfmsg->nfgen_family = pf;
+        nfmsg->version = NFNETLINK_V0;
+        nfmsg->res_id = htons(inst->group_num);
+        pmsg.hw_protocol        = htons(skb->protocol);
+        pmsg.hook               = hooknum;
+        NFA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg);
+        if (prefix) {
+                int slen = strlen(prefix);
+                if (slen > NFULNL_PREFIXLEN)
+                        slen = NFULNL_PREFIXLEN;
+                NFA_PUT(inst->skb, NFULA_PREFIX, slen, prefix);
+        }
+        if (indev) {
+                tmp_uint = htonl(indev->ifindex);
+#ifndef CONFIG_BRIDGE_NETFILTER
+                NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint),
+                        &tmp_uint);
+#else
+                if (pf == PF_BRIDGE) {
+                        /* Case 1: outdev is physical input device, we need to
+                         * look for bridge group (when called from
+                         * netfilter_bridge) */
+                        NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+                                sizeof(tmp_uint), &tmp_uint);
+                        /* this is the bridge group "brX" */
+                        tmp_uint = htonl(indev->br_port->br->dev->ifindex);
+                        NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV,
+                                sizeof(tmp_uint), &tmp_uint);
+                } else {
+                        /* Case 2: indev is bridge group, we need to look for
+                         * physical device (when called from ipv4) */
+                        NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV,
+                                sizeof(tmp_uint), &tmp_uint);
+                        if (skb->nf_bridge && skb->nf_bridge->physindev) {
+                                tmp_uint = 
+                                    htonl(skb->nf_bridge->physindev->ifindex);
+                                NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV,
+                                        sizeof(tmp_uint), &tmp_uint);
+                        }
+                }
+#endif
+        }
+        if (outdev) {
+                tmp_uint = htonl(outdev->ifindex);
+#ifndef CONFIG_BRIDGE_NETFILTER
+                NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint),
+                        &tmp_uint);
+#else
+                if (pf == PF_BRIDGE) {
+                        /* Case 1: outdev is physical output device, we need to
+                         * look for bridge group (when called from
+                         * netfilter_bridge) */
+                        NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+                                sizeof(tmp_uint), &tmp_uint);
+                        /* this is the bridge group "brX" */
+                        tmp_uint = htonl(outdev->br_port->br->dev->ifindex);
+                        NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV,
+                                sizeof(tmp_uint), &tmp_uint);
+                } else {
+                        /* Case 2: indev is a bridge group, we need to look
+                         * for physical device (when called from ipv4) */
+                        NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV,
+                                sizeof(tmp_uint), &tmp_uint);
+                        if (skb->nf_bridge) {
+                                tmp_uint = 
+                                    htonl(skb->nf_bridge->physoutdev->ifindex);
+                                NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
+                                        sizeof(tmp_uint), &tmp_uint);
+                        }
+                }
+#endif
+        }
+        if (skb->nfmark) {
+                tmp_uint = htonl(skb->nfmark);
+                NFA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint);
+        }
+        if (indev && skb->dev && skb->dev->hard_header_parse) {
+                struct nfulnl_msg_packet_hw phw;
+                phw.hw_addrlen = 
+                        skb->dev->hard_header_parse((struct sk_buff *)skb, 
+                                                    phw.hw_addr);
+                phw.hw_addrlen = htons(phw.hw_addrlen);
+                NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
+        }
+        if (skb->tstamp.off_sec) {
+                struct nfulnl_msg_packet_timestamp ts;
+                ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec);
+                ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec);
+                NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
+        }
+        /* UID */
+        if (skb->sk) {
+                read_lock_bh(&skb->sk->sk_callback_lock);
+                if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
+                        u_int32_t uid = htonl(skb->sk->sk_socket->file->f_uid);
+                        /* need to unlock here since NFA_PUT may goto */
+                        read_unlock_bh(&skb->sk->sk_callback_lock);
+                        NFA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid);
+                } else
+                        read_unlock_bh(&skb->sk->sk_callback_lock);
+        }
+        if (data_len) {
+                struct nfattr *nfa;
+                int size = NFA_LENGTH(data_len);
+                if (skb_tailroom(inst->skb) < (int)NFA_SPACE(data_len)) {
+                        printk(KERN_WARNING "nfnetlink_log: no tailroom!\n");
+                        goto nlmsg_failure;
+                }
+                nfa = (struct nfattr *)skb_put(inst->skb, NFA_ALIGN(size));
+                nfa->nfa_type = NFULA_PAYLOAD;
+                nfa->nfa_len = size;
+                if (skb_copy_bits(skb, 0, NFA_DATA(nfa), data_len))
+                        BUG();
+        }
+                
+        nlh->nlmsg_len = inst->skb->tail - old_tail;
+        return 0;
+nlmsg_failure:
+        UDEBUG("nlmsg_failure\n");
+nfattr_failure:
+        PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
+        return -1;
+}
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+static struct nf_loginfo default_loginfo = {
+        .type =         NF_LOG_TYPE_ULOG,
+        .u = {
+                .ulog = {
+                        .copy_len       = 0xffff,
+                        .group          = 0,
+                        .qthreshold     = 1,
+                },
+        },
+};
+/* log handler for internal netfilter logging api */
+static void
+nfulnl_log_packet(unsigned int pf,
+                  unsigned int hooknum,
+                  const struct sk_buff *skb,
+                  const struct net_device *in,
+                  const struct net_device *out,
+                  const struct nf_loginfo *li_user,
+                  const char *prefix)
+{
+        unsigned int size, data_len;
+        struct nfulnl_instance *inst;
+        const struct nf_loginfo *li;
+        unsigned int qthreshold;
+        unsigned int nlbufsiz;
+        if (li_user && li_user->type == NF_LOG_TYPE_ULOG) 
+                li = li_user;
+        else
+                li = &default_loginfo;
+        inst = instance_lookup_get(li->u.ulog.group);
+        if (!inst)
+                inst = instance_lookup_get(0);
+        if (!inst) {
+                PRINTR("nfnetlink_log: trying to log packet, "
+                        "but no instance for group %u\n", li->u.ulog.group);
+                return;
+        }
+        /* all macros expand to constant values at compile time */
+        /* FIXME: do we want to make the size calculation conditional based on
+         * what is actually present?  way more branches and checks, but more
+         * memory efficient... */
+        size =    NLMSG_SPACE(sizeof(struct nfgenmsg))
+                + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hdr))
+                + NFA_SPACE(sizeof(u_int32_t))  /* ifindex */
+                + NFA_SPACE(sizeof(u_int32_t))  /* ifindex */
+#ifdef CONFIG_BRIDGE_NETFILTER
+                + NFA_SPACE(sizeof(u_int32_t))  /* ifindex */
+                + NFA_SPACE(sizeof(u_int32_t))  /* ifindex */
+#endif
+                + NFA_SPACE(sizeof(u_int32_t))  /* mark */
+                + NFA_SPACE(sizeof(u_int32_t))  /* uid */
+                + NFA_SPACE(NFULNL_PREFIXLEN)   /* prefix */
+                + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hw))
+                + NFA_SPACE(sizeof(struct nfulnl_msg_packet_timestamp));
+        UDEBUG("initial size=%u\n", size);
+        spin_lock_bh(&inst->lock);
+        qthreshold = inst->qthreshold;
+        /* per-rule qthreshold overrides per-instance */
+        if (qthreshold > li->u.ulog.qthreshold)
+                qthreshold = li->u.ulog.qthreshold;
+        
+        switch (inst->copy_mode) {
+        case NFULNL_COPY_META:
+        case NFULNL_COPY_NONE:
+                data_len = 0;
+                break;
+        
+        case NFULNL_COPY_PACKET:
+                if (inst->copy_range == 0 
+                    || inst->copy_range > skb->len)
+                        data_len = skb->len;
+                else
+                        data_len = inst->copy_range;
+                
+                size += NFA_SPACE(data_len);
+                UDEBUG("copy_packet, therefore size now %u\n", size);
+                break;
+        
+        default:
+                spin_unlock_bh(&inst->lock);
+                instance_put(inst);
+                return;
+        }
+        if (size > inst->nlbufsiz)
+                nlbufsiz = size;
+        else
+                nlbufsiz = inst->nlbufsiz;
+        if (!inst->skb) {
+                if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
+                        UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
+                                inst->nlbufsiz, size);
+                        goto alloc_failure;
+                }
+        } else if (inst->qlen >= qthreshold ||
+                   size > skb_tailroom(inst->skb)) {
+                /* either the queue len is too high or we don't have
+                 * enough room in the skb left. flush to userspace. */
+                UDEBUG("flushing old skb\n");
+                __nfulnl_send(inst);
+                if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
+                        UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
+                                inst->nlbufsiz, size);
+                        goto alloc_failure;
+                }
+        }
+        UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
+        inst->qlen++;
+        __build_packet_message(inst, skb, data_len, pf,
+                                hooknum, in, out, li, prefix);
+        /* timer_pending always called within inst->lock, so there
+         * is no chance of a race here */
+        if (!timer_pending(&inst->timer)) {
+                instance_get(inst);
+                inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100);
+                add_timer(&inst->timer);
+        }
+        spin_unlock_bh(&inst->lock);
+        return;
+alloc_failure:
+        spin_unlock_bh(&inst->lock);
+        instance_put(inst);
+        UDEBUG("error allocating skb\n");
+        /* FIXME: statistics */
+}
+static int
+nfulnl_rcv_nl_event(struct notifier_block *this,
+                   unsigned long event, void *ptr)
+{
+        struct netlink_notify *n = ptr;
+        if (event == NETLINK_URELEASE &&
+            n->protocol == NETLINK_NETFILTER && n->pid) {
+                int i;
+                /* destroy all instances for this pid */
+                write_lock_bh(&instances_lock);
+                for  (i = 0; i < INSTANCE_BUCKETS; i++) {
+                        struct hlist_node *tmp, *t2;
+                        struct nfulnl_instance *inst;
+                        struct hlist_head *head = &instance_table[i];
+                        hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+                                UDEBUG("node = %p\n", inst);
+                                if (n->pid == inst->peer_pid)
+                                        __instance_destroy(inst);
+                        }
+                }
+                write_unlock_bh(&instances_lock);
+        }
+        return NOTIFY_DONE;
+}
+static struct notifier_block nfulnl_rtnl_notifier = {
+        .notifier_call  = nfulnl_rcv_nl_event,
+};
+static int
+nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
+                  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+{
+        return -ENOTSUPP;
+}
+static struct nf_logger nfulnl_logger = {
+        .name   = "nfnetlink_log",
+        .logfn  = &nfulnl_log_packet,
+        .me     = THIS_MODULE,
+};
+static const int nfula_min[NFULA_MAX] = {
+        [NFULA_PACKET_HDR-1]    = sizeof(struct nfulnl_msg_packet_hdr),
+        [NFULA_MARK-1]          = sizeof(u_int32_t),
+        [NFULA_TIMESTAMP-1]     = sizeof(struct nfulnl_msg_packet_timestamp),
+        [NFULA_IFINDEX_INDEV-1] = sizeof(u_int32_t),
+        [NFULA_IFINDEX_OUTDEV-1]= sizeof(u_int32_t),
+        [NFULA_HWADDR-1]        = sizeof(struct nfulnl_msg_packet_hw),
+        [NFULA_PAYLOAD-1]       = 0,
+        [NFULA_PREFIX-1]        = 0,
+        [NFULA_UID-1]           = sizeof(u_int32_t),
+};
+static const int nfula_cfg_min[NFULA_CFG_MAX] = {
+        [NFULA_CFG_CMD-1]       = sizeof(struct nfulnl_msg_config_cmd),
+        [NFULA_CFG_MODE-1]      = sizeof(struct nfulnl_msg_config_mode),
+        [NFULA_CFG_TIMEOUT-1]   = sizeof(u_int32_t),
+        [NFULA_CFG_QTHRESH-1]   = sizeof(u_int32_t),
+        [NFULA_CFG_NLBUFSIZ-1]  = sizeof(u_int32_t),
+};
+static int
+nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
+                   struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp)
+{
+        struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+        u_int16_t group_num = ntohs(nfmsg->res_id);
+        struct nfulnl_instance *inst;
+        int ret = 0;
+        UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
+        if (nfattr_bad_size(nfula, NFULA_CFG_MAX, nfula_cfg_min)) {
+                UDEBUG("bad attribute size\n");
+                return -EINVAL;
+        }
+        inst = instance_lookup_get(group_num);
+        if (nfula[NFULA_CFG_CMD-1]) {
+                u_int8_t pf = nfmsg->nfgen_family;
+                struct nfulnl_msg_config_cmd *cmd;
+                cmd = NFA_DATA(nfula[NFULA_CFG_CMD-1]);
+                UDEBUG("found CFG_CMD for\n");
+                switch (cmd->command) {
+                case NFULNL_CFG_CMD_BIND:
+                        if (inst) {
+                                ret = -EBUSY;
+                                goto out_put;
+                        }
+                        inst = instance_create(group_num,
+                                               NETLINK_CB(skb).pid);
+                        if (!inst) {
+                                ret = -EINVAL;
+                                goto out_put;
+                        }
+                        break;
+                case NFULNL_CFG_CMD_UNBIND:
+                        if (!inst) {
+                                ret = -ENODEV;
+                                goto out_put;
+                        }
+                        if (inst->peer_pid != NETLINK_CB(skb).pid) {
+                                ret = -EPERM;
+                                goto out_put;
+                        }
+                        instance_destroy(inst);
+                        break;
+                case NFULNL_CFG_CMD_PF_BIND:
+                        UDEBUG("registering log handler for pf=%u\n", pf);
+                        ret = nf_log_register(pf, &nfulnl_logger);
+                        break;
+                case NFULNL_CFG_CMD_PF_UNBIND:
+                        UDEBUG("unregistering log handler for pf=%u\n", pf);
+                        /* This is a bug and a feature.  We cannot unregister
+                         * other handlers, like nfnetlink_inst can */
+                        nf_log_unregister_pf(pf);
+                        break;
+                default:
+                        ret = -EINVAL;
+                        break;
+                }
+        } else {
+                if (!inst) {
+                        UDEBUG("no config command, and no instance for "
+                                "group=%u pid=%u =>ENOENT\n",
+                                group_num, NETLINK_CB(skb).pid);
+                        ret = -ENOENT;
+                        goto out_put;
+                }
+                if (inst->peer_pid != NETLINK_CB(skb).pid) {
+                        UDEBUG("no config command, and wrong pid\n");
+                        ret = -EPERM;
+                        goto out_put;
+                }
+        }
+        if (nfula[NFULA_CFG_MODE-1]) {
+                struct nfulnl_msg_config_mode *params;
+                params = NFA_DATA(nfula[NFULA_CFG_MODE-1]);
+                nfulnl_set_mode(inst, params->copy_mode,
+                                ntohs(params->copy_range));
+        }
+        if (nfula[NFULA_CFG_TIMEOUT-1]) {
+                u_int32_t timeout = 
+                        *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]);
+                nfulnl_set_timeout(inst, ntohl(timeout));
+        }
+        if (nfula[NFULA_CFG_NLBUFSIZ-1]) {
+                u_int32_t nlbufsiz = 
+                        *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]);
+                nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
+        }
+        if (nfula[NFULA_CFG_QTHRESH-1]) {
+                u_int32_t qthresh = 
+                        *(u_int16_t *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]);
+                nfulnl_set_qthresh(inst, ntohl(qthresh));
+        }
+out_put:
+        instance_put(inst);
+        return ret;
+}
+static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = {
+        [NFULNL_MSG_PACKET]     = { .call = nfulnl_recv_unsupp,
+                                    .attr_count = NFULA_MAX,
+                                    .cap_required = CAP_NET_ADMIN, },
+        [NFULNL_MSG_CONFIG]     = { .call = nfulnl_recv_config,
+                                    .attr_count = NFULA_CFG_MAX,
+                                    .cap_required = CAP_NET_ADMIN },
+};
+static struct nfnetlink_subsystem nfulnl_subsys = {
+        .name           = "log",
+        .subsys_id      = NFNL_SUBSYS_ULOG,
+        .cb_count       = NFULNL_MSG_MAX,
+        .cb             = nfulnl_cb,
+};
+#ifdef CONFIG_PROC_FS
+struct iter_state {
+        unsigned int bucket;
+};
+static struct hlist_node *get_first(struct seq_file *seq)
+{
+        struct iter_state *st = seq->private;
+        if (!st)
+                return NULL;
+        for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
+                if (!hlist_empty(&instance_table[st->bucket]))
+                        return instance_table[st->bucket].first;
+        }
+        return NULL;
+}
+static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+{
+        struct iter_state *st = seq->private;
+        h = h->next;
+        while (!h) {
+                if (++st->bucket >= INSTANCE_BUCKETS)
+                        return NULL;
+                h = instance_table[st->bucket].first;
+        }
+        return h;
+}
+static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+{
+        struct hlist_node *head;
+        head = get_first(seq);
+        if (head)
+                while (pos && (head = get_next(seq, head)))
+                        pos--;
+        return pos ? NULL : head;
+}
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+        read_lock_bh(&instances_lock);
+        return get_idx(seq, *pos);
+}
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+        (*pos)++;
+        return get_next(s, v);
+}
+static void seq_stop(struct seq_file *s, void *v)
+{
+        read_unlock_bh(&instances_lock);
+}
+static int seq_show(struct seq_file *s, void *v)
+{
+        const struct nfulnl_instance *inst = v;
+        return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", 
+                          inst->group_num,
+                          inst->peer_pid, inst->qlen, 
+                          inst->copy_mode, inst->copy_range,
+                          inst->flushtimeout, atomic_read(&inst->use));
+}
+static struct seq_operations nful_seq_ops = {
+        .start  = seq_start,
+        .next   = seq_next,
+        .stop   = seq_stop,
+        .show   = seq_show,
+};
+static int nful_open(struct inode *inode, struct file *file)
+{
+        struct seq_file *seq;
+        struct iter_state *is;
+        int ret;
+        is = kmalloc(sizeof(*is), GFP_KERNEL);
+        if (!is)
+                return -ENOMEM;
+        memset(is, 0, sizeof(*is));
+        ret = seq_open(file, &nful_seq_ops);
+        if (ret < 0)
+                goto out_free;
+        seq = file->private_data;
+        seq->private = is;
+        return ret;
+out_free:
+        kfree(is);
+        return ret;
+}
+static struct file_operations nful_file_ops = {
+        .owner   = THIS_MODULE,
+        .open    = nful_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release_private,
+};
+#endif /* PROC_FS */
+static int
+init_or_cleanup(int init)
+{
+        int i, status = -ENOMEM;
+#ifdef CONFIG_PROC_FS
+        struct proc_dir_entry *proc_nful;
+#endif
+        
+        if (!init)
+                goto cleanup;
+        for (i = 0; i < INSTANCE_BUCKETS; i++)
+                INIT_HLIST_HEAD(&instance_table[i]);
+        
+        /* it's not really all that important to have a random value, so
+         * we can do this from the init function, even if there hasn't
+         * been that much entropy yet */
+        get_random_bytes(&hash_init, sizeof(hash_init));
+        netlink_register_notifier(&nfulnl_rtnl_notifier);
+        status = nfnetlink_subsys_register(&nfulnl_subsys);
+        if (status < 0) {
+                printk(KERN_ERR "log: failed to create netlink socket\n");
+                goto cleanup_netlink_notifier;
+        }
+#ifdef CONFIG_PROC_FS
+        proc_nful = create_proc_entry("nfnetlink_log", 0440,
+                                      proc_net_netfilter);
+        if (!proc_nful)
+                goto cleanup_subsys;
+        proc_nful->proc_fops = &nful_file_ops;
+#endif
+        return status;
+cleanup:
+        nf_log_unregister_logger(&nfulnl_logger);
+#ifdef CONFIG_PROC_FS
+        remove_proc_entry("nfnetlink_log", proc_net_netfilter);
+cleanup_subsys:
+#endif
+        nfnetlink_subsys_unregister(&nfulnl_subsys);
+cleanup_netlink_notifier:
+        netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+        return status;
+}
+static int __init init(void)
+{
+        
+        return init_or_cleanup(1);
+}
+static void __exit fini(void)
+{
+        init_or_cleanup(0);
+}
+MODULE_DESCRIPTION("netfilter userspace logging");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
+module_init(init);
+module_exit(fini);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
new file mode 100644
index 000000000000..249bddb28acd
--- /dev/null
+++ b/net/netfilter/nfnetlink_queue.c
@@ -0,0 +1,1121 @@
+/*
+ * This is a module which is used for queueing packets and communicating with
+ * userspace via nfetlink.
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ *
+ * Based on the old ipv4-only ip_queue.c:
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/proc_fs.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <asm/atomic.h>
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include "../bridge/br_private.h"
+#endif
+#define NFQNL_QMAX_DEFAULT 1024
+#if 0
+#define QDEBUG(x, args ...)     printk(KERN_DEBUG "%s(%d):%s(): " x,       \
+                                        __FILE__, __LINE__, __FUNCTION__,  \
+                                        ## args)
+#else
+#define QDEBUG(x, ...)
+#endif
+struct nfqnl_queue_entry {
+        struct list_head list;
+        struct nf_info *info;
+        struct sk_buff *skb;
+        unsigned int id;
+};
+struct nfqnl_instance {
+        struct hlist_node hlist;                /* global list of queues */
+        atomic_t use;
+        int peer_pid;
+        unsigned int queue_maxlen;
+        unsigned int copy_range;
+        unsigned int queue_total;
+        unsigned int queue_dropped;
+        unsigned int queue_user_dropped;
+        atomic_t id_sequence;                   /* 'sequence' of pkt ids */
+        u_int16_t queue_num;                    /* number of this queue */
+        u_int8_t copy_mode;
+        spinlock_t lock;
+        struct list_head queue_list;            /* packets in queue */
+};
+typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long);
+static DEFINE_RWLOCK(instances_lock);
+#define INSTANCE_BUCKETS        16
+static struct hlist_head instance_table[INSTANCE_BUCKETS];
+static inline u_int8_t instance_hashfn(u_int16_t queue_num)
+{
+        return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
+}
+static struct nfqnl_instance *
+__instance_lookup(u_int16_t queue_num)
+{
+        struct hlist_head *head;
+        struct hlist_node *pos;
+        struct nfqnl_instance *inst;
+        head = &instance_table[instance_hashfn(queue_num)];
+        hlist_for_each_entry(inst, pos, head, hlist) {
+                if (inst->queue_num == queue_num)
+                        return inst;
+        }
+        return NULL;
+}
+static struct nfqnl_instance *
+instance_lookup_get(u_int16_t queue_num)
+{
+        struct nfqnl_instance *inst;
+        read_lock_bh(&instances_lock);
+        inst = __instance_lookup(queue_num);
+        if (inst)
+                atomic_inc(&inst->use);
+        read_unlock_bh(&instances_lock);
+        return inst;
+}
+static void
+instance_put(struct nfqnl_instance *inst)
+{
+        if (inst && atomic_dec_and_test(&inst->use)) {
+                QDEBUG("kfree(inst=%p)\n", inst);
+                kfree(inst);
+        }
+}
+static struct nfqnl_instance *
+instance_create(u_int16_t queue_num, int pid)
+{
+        struct nfqnl_instance *inst;
+        QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid);
+        write_lock_bh(&instances_lock); 
+        if (__instance_lookup(queue_num)) {
+                inst = NULL;
+                QDEBUG("aborting, instance already exists\n");
+                goto out_unlock;
+        }
+        inst = kmalloc(sizeof(*inst), GFP_ATOMIC);
+        if (!inst)
+                goto out_unlock;
+        memset(inst, 0, sizeof(*inst));
+        inst->queue_num = queue_num;
+        inst->peer_pid = pid;
+        inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
+        inst->copy_range = 0xfffff;
+        inst->copy_mode = NFQNL_COPY_NONE;
+        atomic_set(&inst->id_sequence, 0);
+        /* needs to be two, since we _put() after creation */
+        atomic_set(&inst->use, 2);
+        inst->lock = SPIN_LOCK_UNLOCKED;
+        INIT_LIST_HEAD(&inst->queue_list);
+        if (!try_module_get(THIS_MODULE))
+                goto out_free;
+        hlist_add_head(&inst->hlist, 
+                       &instance_table[instance_hashfn(queue_num)]);
+        write_unlock_bh(&instances_lock);
+        QDEBUG("successfully created new instance\n");
+        return inst;
+out_free:
+        kfree(inst);
+out_unlock:
+        write_unlock_bh(&instances_lock);
+        return NULL;
+}
+static void nfqnl_flush(struct nfqnl_instance *queue, int verdict);
+static void
+_instance_destroy2(struct nfqnl_instance *inst, int lock)
+{
+        /* first pull it out of the global list */
+        if (lock)
+                write_lock_bh(&instances_lock);
+        QDEBUG("removing instance %p (queuenum=%u) from hash\n",
+                inst, inst->queue_num);
+        hlist_del(&inst->hlist);
+        if (lock)
+                write_unlock_bh(&instances_lock);
+        /* then flush all pending skbs from the queue */
+        nfqnl_flush(inst, NF_DROP);
+        /* and finally put the refcount */
+        instance_put(inst);
+        module_put(THIS_MODULE);
+}
+static inline void
+__instance_destroy(struct nfqnl_instance *inst)
+{
+        _instance_destroy2(inst, 0);
+}
+static inline void
+instance_destroy(struct nfqnl_instance *inst)
+{
+        _instance_destroy2(inst, 1);
+}
+static void
+issue_verdict(struct nfqnl_queue_entry *entry, int verdict)
+{
+        QDEBUG("entering for entry %p, verdict %u\n", entry, verdict);
+        /* TCP input path (and probably other bits) assume to be called
+         * from softirq context, not from syscall, like issue_verdict is
+         * called.  TCP input path deadlocks with locks taken from timer
+         * softirq, e.g.  We therefore emulate this by local_bh_disable() */
+        local_bh_disable();
+        nf_reinject(entry->skb, entry->info, verdict);
+        local_bh_enable();
+        kfree(entry);
+}
+static inline void
+__enqueue_entry(struct nfqnl_instance *queue,
+                      struct nfqnl_queue_entry *entry)
+{
+       list_add(&entry->list, &queue->queue_list);
+       queue->queue_total++;
+}
+/*
+ * Find and return a queued entry matched by cmpfn, or return the last
+ * entry if cmpfn is NULL.
+ */
+static inline struct nfqnl_queue_entry *
+__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, 
+                   unsigned long data)
+{
+        struct list_head *p;
+        list_for_each_prev(p, &queue->queue_list) {
+                struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p;
+                
+                if (!cmpfn || cmpfn(entry, data))
+                        return entry;
+        }
+        return NULL;
+}
+static inline void
+__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry)
+{
+        list_del(&entry->list);
+        q->queue_total--;
+}
+static inline struct nfqnl_queue_entry *
+__find_dequeue_entry(struct nfqnl_instance *queue,
+                     nfqnl_cmpfn cmpfn, unsigned long data)
+{
+        struct nfqnl_queue_entry *entry;
+        entry = __find_entry(queue, cmpfn, data);
+        if (entry == NULL)
+                return NULL;
+        __dequeue_entry(queue, entry);
+        return entry;
+}
+static inline void
+__nfqnl_flush(struct nfqnl_instance *queue, int verdict)
+{
+        struct nfqnl_queue_entry *entry;
+        
+        while ((entry = __find_dequeue_entry(queue, NULL, 0)))
+                issue_verdict(entry, verdict);
+}
+static inline int
+__nfqnl_set_mode(struct nfqnl_instance *queue,
+                 unsigned char mode, unsigned int range)
+{
+        int status = 0;
+        
+        switch (mode) {
+        case NFQNL_COPY_NONE:
+        case NFQNL_COPY_META:
+                queue->copy_mode = mode;
+                queue->copy_range = 0;
+                break;
+                
+        case NFQNL_COPY_PACKET:
+                queue->copy_mode = mode;
+                /* we're using struct nfattr which has 16bit nfa_len */
+                if (range > 0xffff)
+                        queue->copy_range = 0xffff;
+                else
+                        queue->copy_range = range;
+                break;
+                
+        default:
+                status = -EINVAL;
+        }
+        return status;
+}
+static struct nfqnl_queue_entry *
+find_dequeue_entry(struct nfqnl_instance *queue,
+                         nfqnl_cmpfn cmpfn, unsigned long data)
+{
+        struct nfqnl_queue_entry *entry;
+        
+        spin_lock_bh(&queue->lock);
+        entry = __find_dequeue_entry(queue, cmpfn, data);
+        spin_unlock_bh(&queue->lock);
+        return entry;
+}
+static void
+nfqnl_flush(struct nfqnl_instance *queue, int verdict)
+{
+        spin_lock_bh(&queue->lock);
+        __nfqnl_flush(queue, verdict);
+        spin_unlock_bh(&queue->lock);
+}
+static struct sk_buff *
+nfqnl_build_packet_message(struct nfqnl_instance *queue,
+                           struct nfqnl_queue_entry *entry, int *errp)
+{
+        unsigned char *old_tail;
+        size_t size;
+        size_t data_len = 0;
+        struct sk_buff *skb;
+        struct nfqnl_msg_packet_hdr pmsg;
+        struct nlmsghdr *nlh;
+        struct nfgenmsg *nfmsg;
+        unsigned int tmp_uint;
+        QDEBUG("entered\n");
+        /* all macros expand to constant values at compile time */
+        size =    NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr))
+                + NLMSG_SPACE(sizeof(u_int32_t))        /* ifindex */
+                + NLMSG_SPACE(sizeof(u_int32_t))        /* ifindex */
+#ifdef CONFIG_BRIDGE_NETFILTER
+                + NLMSG_SPACE(sizeof(u_int32_t))        /* ifindex */
+                + NLMSG_SPACE(sizeof(u_int32_t))        /* ifindex */
+#endif
+                + NLMSG_SPACE(sizeof(u_int32_t))        /* mark */
+                + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw))
+                + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp));
+        spin_lock_bh(&queue->lock);
+        
+        switch (queue->copy_mode) {
+        case NFQNL_COPY_META:
+        case NFQNL_COPY_NONE:
+                data_len = 0;
+                break;
+        
+        case NFQNL_COPY_PACKET:
+                if (queue->copy_range == 0 
+                    || queue->copy_range > entry->skb->len)
+                        data_len = entry->skb->len;
+                else
+                        data_len = queue->copy_range;
+                
+                size += NLMSG_SPACE(data_len);
+                break;
+        
+        default:
+                *errp = -EINVAL;
+                spin_unlock_bh(&queue->lock);
+                return NULL;
+        }
+        spin_unlock_bh(&queue->lock);
+        skb = alloc_skb(size, GFP_ATOMIC);
+        if (!skb)
+                goto nlmsg_failure;
+                
+        old_tail= skb->tail;
+        nlh = NLMSG_PUT(skb, 0, 0, 
+                        NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
+                        sizeof(struct nfgenmsg));
+        nfmsg = NLMSG_DATA(nlh);
+        nfmsg->nfgen_family = entry->info->pf;
+        nfmsg->version = NFNETLINK_V0;
+        nfmsg->res_id = htons(queue->queue_num);
+        pmsg.packet_id          = htonl(entry->id);
+        pmsg.hw_protocol        = htons(entry->skb->protocol);
+        pmsg.hook               = entry->info->hook;
+        NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
+        if (entry->info->indev) {
+                tmp_uint = htonl(entry->info->indev->ifindex);
+#ifndef CONFIG_BRIDGE_NETFILTER
+                NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint);
+#else
+                if (entry->info->pf == PF_BRIDGE) {
+                        /* Case 1: indev is physical input device, we need to
+                         * look for bridge group (when called from 
+                         * netfilter_bridge) */
+                        NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), 
+                                &tmp_uint);
+                        /* this is the bridge group "brX" */
+                        tmp_uint = htonl(entry->info->indev->br_port->br->dev->ifindex);
+                        NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
+                                &tmp_uint);
+                } else {
+                        /* Case 2: indev is bridge group, we need to look for
+                         * physical device (when called from ipv4) */
+                        NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint),
+                                &tmp_uint);
+                        if (entry->skb->nf_bridge
+                            && entry->skb->nf_bridge->physindev) {
+                                tmp_uint = htonl(entry->skb->nf_bridge->physindev->ifindex);
+                                NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV,
+                                        sizeof(tmp_uint), &tmp_uint);
+                        }
+                }
+#endif
+        }
+        if (entry->info->outdev) {
+                tmp_uint = htonl(entry->info->outdev->ifindex);
+#ifndef CONFIG_BRIDGE_NETFILTER
+                NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint);
+#else
+                if (entry->info->pf == PF_BRIDGE) {
+                        /* Case 1: outdev is physical output device, we need to
+                         * look for bridge group (when called from 
+                         * netfilter_bridge) */
+                        NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint),
+                                &tmp_uint);
+                        /* this is the bridge group "brX" */
+                        tmp_uint = htonl(entry->info->outdev->br_port->br->dev->ifindex);
+                        NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
+                                &tmp_uint);
+                } else {
+                        /* Case 2: outdev is bridge group, we need to look for
+                         * physical output device (when called from ipv4) */
+                        NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint),
+                                &tmp_uint);
+                        if (entry->skb->nf_bridge
+                            && entry->skb->nf_bridge->physoutdev) {
+                                tmp_uint = htonl(entry->skb->nf_bridge->physoutdev->ifindex);
+                                NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV,
+                                        sizeof(tmp_uint), &tmp_uint);
+                        }
+                }
+#endif
+        }
+        if (entry->skb->nfmark) {
+                tmp_uint = htonl(entry->skb->nfmark);
+                NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint);
+        }
+        if (entry->info->indev && entry->skb->dev
+            && entry->skb->dev->hard_header_parse) {
+                struct nfqnl_msg_packet_hw phw;
+                phw.hw_addrlen =
+                        entry->skb->dev->hard_header_parse(entry->skb,
+                                                           phw.hw_addr);
+                phw.hw_addrlen = htons(phw.hw_addrlen);
+                NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
+        }
+        if (entry->skb->tstamp.off_sec) {
+                struct nfqnl_msg_packet_timestamp ts;
+                ts.sec = cpu_to_be64(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec);
+                ts.usec = cpu_to_be64(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec);
+                NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
+        }
+        if (data_len) {
+                struct nfattr *nfa;
+                int size = NFA_LENGTH(data_len);
+                if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) {
+                        printk(KERN_WARNING "nf_queue: no tailroom!\n");
+                        goto nlmsg_failure;
+                }
+                nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size));
+                nfa->nfa_type = NFQA_PAYLOAD;
+                nfa->nfa_len = size;
+                if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len))
+                        BUG();
+        }
+                
+        nlh->nlmsg_len = skb->tail - old_tail;
+        return skb;
+nlmsg_failure:
+nfattr_failure:
+        if (skb)
+                kfree_skb(skb);
+        *errp = -EINVAL;
+        if (net_ratelimit())
+                printk(KERN_ERR "nf_queue: error creating packet message\n");
+        return NULL;
+}
+static int
+nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, 
+                     unsigned int queuenum, void *data)
+{
+        int status = -EINVAL;
+        struct sk_buff *nskb;
+        struct nfqnl_instance *queue;
+        struct nfqnl_queue_entry *entry;
+        QDEBUG("entered\n");
+        queue = instance_lookup_get(queuenum);
+        if (!queue) {
+                QDEBUG("no queue instance matching\n");
+                return -EINVAL;
+        }
+        if (queue->copy_mode == NFQNL_COPY_NONE) {
+                QDEBUG("mode COPY_NONE, aborting\n");
+                status = -EAGAIN;
+                goto err_out_put;
+        }
+        entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+        if (entry == NULL) {
+                if (net_ratelimit())
+                        printk(KERN_ERR 
+                                "nf_queue: OOM in nfqnl_enqueue_packet()\n");
+                status = -ENOMEM;
+                goto err_out_put;
+        }
+        entry->info = info;
+        entry->skb = skb;
+        entry->id = atomic_inc_return(&queue->id_sequence);
+        nskb = nfqnl_build_packet_message(queue, entry, &status);
+        if (nskb == NULL)
+                goto err_out_free;
+                
+        spin_lock_bh(&queue->lock);
+        
+        if (!queue->peer_pid)
+                goto err_out_free_nskb; 
+        if (queue->queue_total >= queue->queue_maxlen) {
+                queue->queue_dropped++;
+                status = -ENOSPC;
+                if (net_ratelimit())
+                          printk(KERN_WARNING "ip_queue: full at %d entries, "
+                                 "dropping packets(s). Dropped: %d\n", 
+                                 queue->queue_total, queue->queue_dropped);
+                goto err_out_free_nskb;
+        }
+        /* nfnetlink_unicast will either free the nskb or add it to a socket */
+        status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
+        if (status < 0) {
+                queue->queue_user_dropped++;
+                goto err_out_unlock;
+        }
+        __enqueue_entry(queue, entry);
+        spin_unlock_bh(&queue->lock);
+        instance_put(queue);
+        return status;
+err_out_free_nskb:
+        kfree_skb(nskb); 
+        
+err_out_unlock:
+        spin_unlock_bh(&queue->lock);
+err_out_free:
+        kfree(entry);
+err_out_put:
+        instance_put(queue);
+        return status;
+}
+static int
+nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
+{
+        int diff;
+        diff = data_len - e->skb->len;
+        if (diff < 0)
+                skb_trim(e->skb, data_len);
+        else if (diff > 0) {
+                if (data_len > 0xFFFF)
+                        return -EINVAL;
+                if (diff > skb_tailroom(e->skb)) {
+                        struct sk_buff *newskb;
+                        
+                        newskb = skb_copy_expand(e->skb,
+                                                 skb_headroom(e->skb),
+                                                 diff,
+                                                 GFP_ATOMIC);
+                        if (newskb == NULL) {
+                                printk(KERN_WARNING "ip_queue: OOM "
+                                      "in mangle, dropping packet\n");
+                                return -ENOMEM;
+                        }
+                        if (e->skb->sk)
+                                skb_set_owner_w(newskb, e->skb->sk);
+                        kfree_skb(e->skb);
+                        e->skb = newskb;
+                }
+                skb_put(e->skb, diff);
+        }
+        if (!skb_make_writable(&e->skb, data_len))
+                return -ENOMEM;
+        memcpy(e->skb->data, data, data_len);
+        return 0;
+}
+static inline int
+id_cmp(struct nfqnl_queue_entry *e, unsigned long id)
+{
+        return (id == e->id);
+}
+static int
+nfqnl_set_mode(struct nfqnl_instance *queue,
+               unsigned char mode, unsigned int range)
+{
+        int status;
+        spin_lock_bh(&queue->lock);
+        status = __nfqnl_set_mode(queue, mode, range);
+        spin_unlock_bh(&queue->lock);
+        return status;
+}
+static int
+dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex)
+{
+        if (entry->info->indev)
+                if (entry->info->indev->ifindex == ifindex)
+                        return 1;
+                        
+        if (entry->info->outdev)
+                if (entry->info->outdev->ifindex == ifindex)
+                        return 1;
+        return 0;
+}
+/* drop all packets with either indev or outdev == ifindex from all queue
+ * instances */
+static void
+nfqnl_dev_drop(int ifindex)
+{
+        int i;
+        
+        QDEBUG("entering for ifindex %u\n", ifindex);
+        /* this only looks like we have to hold the readlock for a way too long
+         * time, issue_verdict(),  nf_reinject(), ... - but we always only
+         * issue NF_DROP, which is processed directly in nf_reinject() */
+        read_lock_bh(&instances_lock);
+        for  (i = 0; i < INSTANCE_BUCKETS; i++) {
+                struct hlist_node *tmp;
+                struct nfqnl_instance *inst;
+                struct hlist_head *head = &instance_table[i];
+                hlist_for_each_entry(inst, tmp, head, hlist) {
+                        struct nfqnl_queue_entry *entry;
+                        while ((entry = find_dequeue_entry(inst, dev_cmp, 
+                                                           ifindex)) != NULL)
+                                issue_verdict(entry, NF_DROP);
+                }
+        }
+        read_unlock_bh(&instances_lock);
+}
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+static int
+nfqnl_rcv_dev_event(struct notifier_block *this,
+                    unsigned long event, void *ptr)
+{
+        struct net_device *dev = ptr;
+        /* Drop any packets associated with the downed device */
+        if (event == NETDEV_DOWN)
+                nfqnl_dev_drop(dev->ifindex);
+        return NOTIFY_DONE;
+}
+static struct notifier_block nfqnl_dev_notifier = {
+        .notifier_call  = nfqnl_rcv_dev_event,
+};
+static int
+nfqnl_rcv_nl_event(struct notifier_block *this,
+                   unsigned long event, void *ptr)
+{
+        struct netlink_notify *n = ptr;
+        if (event == NETLINK_URELEASE &&
+            n->protocol == NETLINK_NETFILTER && n->pid) {
+                int i;
+                /* destroy all instances for this pid */
+                write_lock_bh(&instances_lock);
+                for  (i = 0; i < INSTANCE_BUCKETS; i++) {
+                        struct hlist_node *tmp, *t2;
+                        struct nfqnl_instance *inst;
+                        struct hlist_head *head = &instance_table[i];
+                        hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+                                if (n->pid == inst->peer_pid)
+                                        __instance_destroy(inst);
+                        }
+                }
+                write_unlock_bh(&instances_lock);
+        }
+        return NOTIFY_DONE;
+}
+static struct notifier_block nfqnl_rtnl_notifier = {
+        .notifier_call  = nfqnl_rcv_nl_event,
+};
+static const int nfqa_verdict_min[NFQA_MAX] = {
+        [NFQA_VERDICT_HDR-1]    = sizeof(struct nfqnl_msg_verdict_hdr),
+        [NFQA_MARK-1]           = sizeof(u_int32_t),
+        [NFQA_PAYLOAD-1]        = 0,
+};
+static int
+nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
+                   struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+{
+        struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+        u_int16_t queue_num = ntohs(nfmsg->res_id);
+        struct nfqnl_msg_verdict_hdr *vhdr;
+        struct nfqnl_instance *queue;
+        unsigned int verdict;
+        struct nfqnl_queue_entry *entry;
+        int err;
+        if (nfattr_bad_size(nfqa, NFQA_MAX, nfqa_verdict_min)) {
+                QDEBUG("bad attribute size\n");
+                return -EINVAL;
+        }
+        queue = instance_lookup_get(queue_num);
+        if (!queue)
+                return -ENODEV;
+        if (queue->peer_pid != NETLINK_CB(skb).pid) {
+                err = -EPERM;
+                goto err_out_put;
+        }
+        if (!nfqa[NFQA_VERDICT_HDR-1]) {
+                err = -EINVAL;
+                goto err_out_put;
+        }
+        vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]);
+        verdict = ntohl(vhdr->verdict);
+        if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
+                err = -EINVAL;
+                goto err_out_put;
+        }
+        entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id));
+        if (entry == NULL) {
+                err = -ENOENT;
+                goto err_out_put;
+        }
+        if (nfqa[NFQA_PAYLOAD-1]) {
+                if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]),
+                                 NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0)
+                        verdict = NF_DROP;
+        }
+        if (nfqa[NFQA_MARK-1])
+                skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1]));
+                
+        issue_verdict(entry, verdict);
+        instance_put(queue);
+        return 0;
+err_out_put:
+        instance_put(queue);
+        return err;
+}
+static int
+nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
+                  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+{
+        return -ENOTSUPP;
+}
+static const int nfqa_cfg_min[NFQA_CFG_MAX] = {
+        [NFQA_CFG_CMD-1]        = sizeof(struct nfqnl_msg_config_cmd),
+        [NFQA_CFG_PARAMS-1]     = sizeof(struct nfqnl_msg_config_params),
+};
+static struct nf_queue_handler nfqh = {
+        .name   = "nf_queue",
+        .outfn  = &nfqnl_enqueue_packet,
+};
+static int
+nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
+                  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+{
+        struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+        u_int16_t queue_num = ntohs(nfmsg->res_id);
+        struct nfqnl_instance *queue;
+        int ret = 0;
+        QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
+        if (nfattr_bad_size(nfqa, NFQA_CFG_MAX, nfqa_cfg_min)) {
+                QDEBUG("bad attribute size\n");
+                return -EINVAL;
+        }
+        queue = instance_lookup_get(queue_num);
+        if (nfqa[NFQA_CFG_CMD-1]) {
+                struct nfqnl_msg_config_cmd *cmd;
+                cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]);
+                QDEBUG("found CFG_CMD\n");
+                switch (cmd->command) {
+                case NFQNL_CFG_CMD_BIND:
+                        if (queue)
+                                return -EBUSY;
+                        queue = instance_create(queue_num, NETLINK_CB(skb).pid);
+                        if (!queue)
+                                return -EINVAL;
+                        break;
+                case NFQNL_CFG_CMD_UNBIND:
+                        if (!queue)
+                                return -ENODEV;
+                        if (queue->peer_pid != NETLINK_CB(skb).pid) {
+                                ret = -EPERM;
+                                goto out_put;
+                        }
+                        instance_destroy(queue);
+                        break;
+                case NFQNL_CFG_CMD_PF_BIND:
+                        QDEBUG("registering queue handler for pf=%u\n",
+                                ntohs(cmd->pf));
+                        ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh);
+                        break;
+                case NFQNL_CFG_CMD_PF_UNBIND:
+                        QDEBUG("unregistering queue handler for pf=%u\n",
+                                ntohs(cmd->pf));
+                        /* This is a bug and a feature.  We can unregister
+                         * other handlers(!) */
+                        ret = nf_unregister_queue_handler(ntohs(cmd->pf));
+                        break;
+                default:
+                        ret = -EINVAL;
+                        break;
+                }
+        } else {
+                if (!queue) {
+                        QDEBUG("no config command, and no instance ENOENT\n");
+                        ret = -ENOENT;
+                        goto out_put;
+                }
+                if (queue->peer_pid != NETLINK_CB(skb).pid) {
+                        QDEBUG("no config command, and wrong pid\n");
+                        ret = -EPERM;
+                        goto out_put;
+                }
+        }
+        if (nfqa[NFQA_CFG_PARAMS-1]) {
+                struct nfqnl_msg_config_params *params;
+                params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]);
+                nfqnl_set_mode(queue, params->copy_mode,
+                                ntohl(params->copy_range));
+        }
+out_put:
+        instance_put(queue);
+        return ret;
+}
+static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
+        [NFQNL_MSG_PACKET]      = { .call = nfqnl_recv_unsupp,
+                                    .attr_count = NFQA_MAX,
+                                    .cap_required = CAP_NET_ADMIN },
+        [NFQNL_MSG_VERDICT]     = { .call = nfqnl_recv_verdict,
+                                    .attr_count = NFQA_MAX,
+                                    .cap_required = CAP_NET_ADMIN },
+        [NFQNL_MSG_CONFIG]      = { .call = nfqnl_recv_config,
+                                    .attr_count = NFQA_CFG_MAX,
+                                    .cap_required = CAP_NET_ADMIN },
+};
+static struct nfnetlink_subsystem nfqnl_subsys = {
+        .name           = "nf_queue",
+        .subsys_id      = NFNL_SUBSYS_QUEUE,
+        .cb_count       = NFQNL_MSG_MAX,
+        .cb             = nfqnl_cb,
+};
+#ifdef CONFIG_PROC_FS
+struct iter_state {
+        unsigned int bucket;
+};
+static struct hlist_node *get_first(struct seq_file *seq)
+{
+        struct iter_state *st = seq->private;
+        if (!st)
+                return NULL;
+        for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
+                if (!hlist_empty(&instance_table[st->bucket]))
+                        return instance_table[st->bucket].first;
+        }
+        return NULL;
+}
+static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+{
+        struct iter_state *st = seq->private;
+        h = h->next;
+        while (!h) {
+                if (++st->bucket >= INSTANCE_BUCKETS)
+                        return NULL;
+                h = instance_table[st->bucket].first;
+        }
+        return h;
+}
+static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+{
+        struct hlist_node *head;
+        head = get_first(seq);
+        if (head)
+                while (pos && (head = get_next(seq, head)))
+                        pos--;
+        return pos ? NULL : head;
+}
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+{
+        read_lock_bh(&instances_lock);
+        return get_idx(seq, *pos);
+}
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+        (*pos)++;
+        return get_next(s, v);
+}
+static void seq_stop(struct seq_file *s, void *v)
+{
+        read_unlock_bh(&instances_lock);
+}
+static int seq_show(struct seq_file *s, void *v)
+{
+        const struct nfqnl_instance *inst = v;
+        return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+                          inst->queue_num,
+                          inst->peer_pid, inst->queue_total,
+                          inst->copy_mode, inst->copy_range,
+                          inst->queue_dropped, inst->queue_user_dropped,
+                          atomic_read(&inst->id_sequence),
+                          atomic_read(&inst->use));
+}
+static struct seq_operations nfqnl_seq_ops = {
+        .start  = seq_start,
+        .next   = seq_next,
+        .stop   = seq_stop,
+        .show   = seq_show,
+};
+static int nfqnl_open(struct inode *inode, struct file *file)
+{
+        struct seq_file *seq;
+        struct iter_state *is;
+        int ret;
+        is = kmalloc(sizeof(*is), GFP_KERNEL);
+        if (!is)
+                return -ENOMEM;
+        memset(is, 0, sizeof(*is));
+        ret = seq_open(file, &nfqnl_seq_ops);
+        if (ret < 0)
+                goto out_free;
+        seq = file->private_data;
+        seq->private = is;
+        return ret;
+out_free:
+        kfree(is);
+        return ret;
+}
+static struct file_operations nfqnl_file_ops = {
+        .owner   = THIS_MODULE,
+        .open    = nfqnl_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release_private,
+};
+#endif /* PROC_FS */
+static int
+init_or_cleanup(int init)
+{
+        int i, status = -ENOMEM;
+#ifdef CONFIG_PROC_FS
+        struct proc_dir_entry *proc_nfqueue;
+#endif
+        
+        if (!init)
+                goto cleanup;
+        for (i = 0; i < INSTANCE_BUCKETS; i++)
+                INIT_HLIST_HEAD(&instance_table[i]);
+        netlink_register_notifier(&nfqnl_rtnl_notifier);
+        status = nfnetlink_subsys_register(&nfqnl_subsys);
+        if (status < 0) {
+                printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
+                goto cleanup_netlink_notifier;
+        }
+#ifdef CONFIG_PROC_FS
+        proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440,
+                                         proc_net_netfilter);
+        if (!proc_nfqueue)
+                goto cleanup_subsys;
+        proc_nfqueue->proc_fops = &nfqnl_file_ops;
+#endif
+        register_netdevice_notifier(&nfqnl_dev_notifier);
+        return status;
+cleanup:
+        nf_unregister_queue_handlers(&nfqh);
+        unregister_netdevice_notifier(&nfqnl_dev_notifier);
+#ifdef CONFIG_PROC_FS
+        remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
+cleanup_subsys:
+#endif  
+        nfnetlink_subsys_unregister(&nfqnl_subsys);
+cleanup_netlink_notifier:
+        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+        return status;
+}
+static int __init init(void)
+{
+        
+        return init_or_cleanup(1);
+}
+static void __exit fini(void)
+{
+        init_or_cleanup(0);
+}
+MODULE_DESCRIPTION("netfilter packet queue handler");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
+module_init(init);
+module_exit(fini);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3405fdf41b93..62435ffc6184 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -13,7 +13,12 @@
 *                               added netlink_proto_exit
 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
 *                               use nlk_sk, as sk->protinfo is on a diet 8)
- *
+ * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
+ *                               - inc module use count of module that owns
+ *                                 the kernel socket in case userspace opens
+ *                                 socket of same protocol
+ *                               - remove all module support, since netlink is
+ *                                 mandatory if CONFIG_NET=y these days
 */
 #include <linux/config.h>
@@ -55,21 +60,29 @@
 #include <net/scm.h>
 #define Nprintk(a...)
+#define NLGRPSZ(x)      (ALIGN(x, sizeof(unsigned long) * 8) / 8)
 struct netlink_sock {
        /* struct sock has to be the first member of netlink_sock */
        struct sock             sk;
        u32                     pid;
-        unsigned int            groups;
        u32                     dst_pid;
-        unsigned int            dst_groups;
+        u32                     dst_group;
+        u32                     flags;
+        u32                     subscriptions;
+        u32                     ngroups;
+        unsigned long           *groups;
        unsigned long           state;
        wait_queue_head_t       wait;
        struct netlink_callback *cb;
        spinlock_t              cb_lock;
        void                    (*data_ready)(struct sock *sk, int bytes);
+        struct module           *module;
 };
+#define NETLINK_KERNEL_SOCKET   0x1
+#define NETLINK_RECV_PKTINFO    0x2
 static inline struct netlink_sock *nlk_sk(struct sock *sk)
 {
        return (struct netlink_sock *)sk;
@@ -92,6 +105,9 @@ struct netlink_table {
        struct nl_pid_hash hash;
        struct hlist_head mc_list;
        unsigned int nl_nonroot;
+        unsigned int groups;
+        struct module *module;
+        int registered;
 };
 static struct netlink_table *nl_table;
@@ -106,6 +122,11 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
 static struct notifier_block *netlink_chain;
+static u32 netlink_group_mask(u32 group)
+{
+        return group ? 1 << (group - 1) : 0;
+}
 static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
 {
        return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
@@ -122,6 +143,7 @@ static void netlink_sock_destruct(struct sock *sk)
        BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
        BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
        BUG_TRAP(!nlk_sk(sk)->cb);
+        BUG_TRAP(!nlk_sk(sk)->groups);
 }
 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP.
@@ -317,7 +339,7 @@ static void netlink_remove(struct sock *sk)
        netlink_table_grab();
        if (sk_del_node_init(sk))
                nl_table[sk->sk_protocol].hash.entries--;
-        if (nlk_sk(sk)->groups)
+        if (nlk_sk(sk)->subscriptions)
                __sk_del_bind_node(sk);
        netlink_table_ungrab();
 }
@@ -328,19 +350,11 @@ static struct proto netlink_proto = {
        .obj_size = sizeof(struct netlink_sock),
 };
-static int netlink_create(struct socket *sock, int protocol)
+static int __netlink_create(struct socket *sock, int protocol)
 {
        struct sock *sk;
        struct netlink_sock *nlk;
-        sock->state = SS_UNCONNECTED;
-        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
-                return -ESOCKTNOSUPPORT;
-        if (protocol<0 || protocol >= MAX_LINKS)
-                return -EPROTONOSUPPORT;
        sock->ops = &netlink_ops;
        sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
@@ -350,15 +364,67 @@ static int netlink_create(struct socket *sock, int protocol)
        sock_init_data(sock, sk);
        nlk = nlk_sk(sk);
        spin_lock_init(&nlk->cb_lock);
        init_waitqueue_head(&nlk->wait);
-        sk->sk_destruct = netlink_sock_destruct;
+        sk->sk_destruct = netlink_sock_destruct;
        sk->sk_protocol = protocol;
        return 0;
 }
+static int netlink_create(struct socket *sock, int protocol)
+{
+        struct module *module = NULL;
+        struct netlink_sock *nlk;
+        unsigned int groups;
+        int err = 0;
+        sock->state = SS_UNCONNECTED;
+        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
+                return -ESOCKTNOSUPPORT;
+        if (protocol<0 || protocol >= MAX_LINKS)
+                return -EPROTONOSUPPORT;
+        netlink_lock_table();
+#ifdef CONFIG_KMOD
+        if (!nl_table[protocol].registered) {
+                netlink_unlock_table();
+                request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
+                netlink_lock_table();
+        }
+#endif
+        if (nl_table[protocol].registered &&
+            try_module_get(nl_table[protocol].module))
+                module = nl_table[protocol].module;
+        else
+                err = -EPROTONOSUPPORT;
+        groups = nl_table[protocol].groups;
+        netlink_unlock_table();
+        if (err || (err = __netlink_create(sock, protocol) < 0))
+                goto out_module;
+        nlk = nlk_sk(sock->sk);
+        nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL);
+        if (nlk->groups == NULL) {
+                err = -ENOMEM;
+                goto out_module;
+        }
+        memset(nlk->groups, 0, NLGRPSZ(groups));
+        nlk->ngroups = groups;
+        nlk->module = module;
+out:
+        return err;
+out_module:
+        module_put(module);
+        goto out;
+}
 static int netlink_release(struct socket *sock)
 {
        struct sock *sk = sock->sk;
@@ -387,14 +453,27 @@ static int netlink_release(struct socket *sock)
        skb_queue_purge(&sk->sk_write_queue);
-        if (nlk->pid && !nlk->groups) {
+        if (nlk->pid && !nlk->subscriptions) {
                struct netlink_notify n = {
                                                .protocol = sk->sk_protocol,
                                                .pid = nlk->pid,
                                          };
                notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n);
        }       
-        
+        if (nlk->module)
+                module_put(nlk->module);
+        if (nlk->flags & NETLINK_KERNEL_SOCKET) {
+                netlink_table_grab();
+                nl_table[sk->sk_protocol].module = NULL;
+                nl_table[sk->sk_protocol].registered = 0;
+                netlink_table_ungrab();
+        }
+        kfree(nlk->groups);
+        nlk->groups = NULL;
        sock_put(sk);
        return 0;
 }
@@ -443,6 +522,18 @@ static inline int netlink_capable(struct socket *sock, unsigned int flag)
               capable(CAP_NET_ADMIN);
 } 
+static void
+netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
+{
+        struct netlink_sock *nlk = nlk_sk(sk);
+        if (nlk->subscriptions && !subscriptions)
+                __sk_del_bind_node(sk);
+        else if (!nlk->subscriptions && subscriptions)
+                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
+        nlk->subscriptions = subscriptions;
+}
 static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
        struct sock *sk = sock->sk;
@@ -468,15 +559,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len
                        return err;
        }
-        if (!nladdr->nl_groups && !nlk->groups)
+        if (!nladdr->nl_groups && !(u32)nlk->groups[0])
                return 0;
        netlink_table_grab();
-        if (nlk->groups && !nladdr->nl_groups)
+        netlink_update_subscriptions(sk, nlk->subscriptions +
-                __sk_del_bind_node(sk);
+                                         hweight32(nladdr->nl_groups) -
-        else if (!nlk->groups && nladdr->nl_groups)
+                                         hweight32(nlk->groups[0]));
-                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
+        nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups; 
-        nlk->groups = nladdr->nl_groups;
        netlink_table_ungrab();
        return 0;
@@ -493,7 +583,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
        if (addr->sa_family == AF_UNSPEC) {
                sk->sk_state    = NETLINK_UNCONNECTED;
                nlk->dst_pid    = 0;
-                nlk->dst_groups = 0;
+                nlk->dst_group  = 0;
                return 0;
        }
        if (addr->sa_family != AF_NETLINK)
@@ -509,7 +599,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
        if (err == 0) {
                sk->sk_state    = NETLINK_CONNECTED;
                nlk->dst_pid    = nladdr->nl_pid;
-                nlk->dst_groups = nladdr->nl_groups;
+                nlk->dst_group  = ffs(nladdr->nl_groups);
        }
        return err;
@@ -527,10 +617,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr
        if (peer) {
                nladdr->nl_pid = nlk->dst_pid;
-                nladdr->nl_groups = nlk->dst_groups;
+                nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
        } else {
                nladdr->nl_pid = nlk->pid;
-                nladdr->nl_groups = nlk->groups;
+                nladdr->nl_groups = nlk->groups[0];
        }
        return 0;
 }
@@ -648,7 +738,8 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
        sock_put(sk);
 }
-static inline struct sk_buff *netlink_trim(struct sk_buff *skb, int allocation)
+static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
+                                           unsigned int __nocast allocation)
 {
        int delta;
@@ -717,7 +808,7 @@ struct netlink_broadcast_data {
        int failure;
        int congested;
        int delivered;
-        int allocation;
+        unsigned int allocation;
        struct sk_buff *skb, *skb2;
 };
@@ -730,7 +821,8 @@ static inline int do_one_broadcast(struct sock *sk,
        if (p->exclude_sk == sk)
                goto out;
-        if (nlk->pid == p->pid || !(nlk->groups & p->group))
+        if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+            !test_bit(p->group - 1, nlk->groups))
                goto out;
        if (p->failure) {
@@ -769,7 +861,7 @@ out:
 }
 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
-                      u32 group, int allocation)
+                      u32 group, unsigned int __nocast allocation)
 {
        struct netlink_broadcast_data info;
        struct hlist_node *node;
@@ -826,7 +918,8 @@ static inline int do_one_set_err(struct sock *sk,
        if (sk == p->exclude_sk)
                goto out;
-        if (nlk->pid == p->pid || !(nlk->groups & p->group))
+        if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+            !test_bit(p->group - 1, nlk->groups))
                goto out;
        sk->sk_err = p->code;
@@ -854,6 +947,94 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
        read_unlock(&nl_table_lock);
 }
+static int netlink_setsockopt(struct socket *sock, int level, int optname,
+                              char __user *optval, int optlen)
+{
+        struct sock *sk = sock->sk;
+        struct netlink_sock *nlk = nlk_sk(sk);
+        int val = 0, err;
+        if (level != SOL_NETLINK)
+                return -ENOPROTOOPT;
+        if (optlen >= sizeof(int) &&
+            get_user(val, (int __user *)optval))
+                return -EFAULT;
+        switch (optname) {
+        case NETLINK_PKTINFO:
+                if (val)
+                        nlk->flags |= NETLINK_RECV_PKTINFO;
+                else
+                        nlk->flags &= ~NETLINK_RECV_PKTINFO;
+                err = 0;
+                break;
+        case NETLINK_ADD_MEMBERSHIP:
+        case NETLINK_DROP_MEMBERSHIP: {
+                unsigned int subscriptions;
+                int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0;
+                if (!netlink_capable(sock, NL_NONROOT_RECV))
+                        return -EPERM;
+                if (!val || val - 1 >= nlk->ngroups)
+                        return -EINVAL;
+                netlink_table_grab();
+                old = test_bit(val - 1, nlk->groups);
+                subscriptions = nlk->subscriptions - old + new;
+                if (new)
+                        __set_bit(val - 1, nlk->groups);
+                else
+                        __clear_bit(val - 1, nlk->groups);
+                netlink_update_subscriptions(sk, subscriptions);
+                netlink_table_ungrab();
+                err = 0;
+                break;
+        }
+        default:
+                err = -ENOPROTOOPT;
+        }
+        return err;
+}
+static int netlink_getsockopt(struct socket *sock, int level, int optname,
+                              char __user *optval, int __user *optlen)
+{
+        struct sock *sk = sock->sk;
+        struct netlink_sock *nlk = nlk_sk(sk);
+        int len, val, err;
+        if (level != SOL_NETLINK)
+                return -ENOPROTOOPT;
+        if (get_user(len, optlen))
+                return -EFAULT;
+        if (len < 0)
+                return -EINVAL;
+        switch (optname) {
+        case NETLINK_PKTINFO:
+                if (len < sizeof(int))
+                        return -EINVAL;
+                len = sizeof(int);
+                val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
+                put_user(len, optlen);
+                put_user(val, optval);
+                err = 0;
+                break;
+        default:
+                err = -ENOPROTOOPT;
+        }
+        return err;
+}
+static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+        struct nl_pktinfo info;
+        info.group = NETLINK_CB(skb).dst_group;
+        put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
+}
 static inline void netlink_rcv_wake(struct sock *sk)
 {
        struct netlink_sock *nlk = nlk_sk(sk);
@@ -872,7 +1053,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
        struct netlink_sock *nlk = nlk_sk(sk);
        struct sockaddr_nl *addr=msg->msg_name;
        u32 dst_pid;
-        u32 dst_groups;
+        u32 dst_group;
        struct sk_buff *skb;
        int err;
        struct scm_cookie scm;
@@ -890,12 +1071,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
                if (addr->nl_family != AF_NETLINK)
                        return -EINVAL;
                dst_pid = addr->nl_pid;
-                dst_groups = addr->nl_groups;
+                dst_group = ffs(addr->nl_groups);
-                if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+                if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
                        return -EPERM;
        } else {
                dst_pid = nlk->dst_pid;
-                dst_groups = nlk->dst_groups;
+                dst_group = nlk->dst_group;
        }
        if (!nlk->pid) {
@@ -913,9 +1094,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
                goto out;
        NETLINK_CB(skb).pid     = nlk->pid;
-        NETLINK_CB(skb).groups  = nlk->groups;
        NETLINK_CB(skb).dst_pid = dst_pid;
-        NETLINK_CB(skb).dst_groups = dst_groups;
+        NETLINK_CB(skb).dst_group = dst_group;
        NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
        memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
@@ -937,9 +1117,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
                goto out;
        }
-        if (dst_groups) {
+        if (dst_group) {
                atomic_inc(&skb->users);
-                netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL);
+                netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
        }
        err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
@@ -985,7 +1165,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
                addr->nl_family = AF_NETLINK;
                addr->nl_pad    = 0;
                addr->nl_pid    = NETLINK_CB(skb).pid;
-                addr->nl_groups = NETLINK_CB(skb).dst_groups;
+                addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
                msg->msg_namelen = sizeof(*addr);
        }
@@ -1000,6 +1180,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
                netlink_dump(sk);
        scm_recv(sock, msg, siocb->scm, flags);
+        if (nlk->flags & NETLINK_RECV_PKTINFO)
+                netlink_cmsg_recv_pktinfo(msg, skb);
 out:
        netlink_rcv_wake(sk);
@@ -1022,10 +1204,13 @@ static void netlink_data_ready(struct sock *sk, int len)
 */
 struct sock *
-netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len))
+netlink_kernel_create(int unit, unsigned int groups,
+                      void (*input)(struct sock *sk, int len),
+                      struct module *module)
 {
        struct socket *sock;
        struct sock *sk;
+        struct netlink_sock *nlk;
        if (!nl_table)
                return NULL;
@@ -1036,20 +1221,31 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len))
        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
                return NULL;
-        if (netlink_create(sock, unit) < 0) {
+        if (__netlink_create(sock, unit) < 0)
-                sock_release(sock);
+                goto out_sock_release;
-                return NULL;
-        }
        sk = sock->sk;
        sk->sk_data_ready = netlink_data_ready;
        if (input)
                nlk_sk(sk)->data_ready = input;
-        if (netlink_insert(sk, 0)) {
+        if (netlink_insert(sk, 0))
-                sock_release(sock);
+                goto out_sock_release;
-                return NULL;
-        }
+        nlk = nlk_sk(sk);
+        nlk->flags |= NETLINK_KERNEL_SOCKET;
+        netlink_table_grab();
+        nl_table[unit].groups = groups < 32 ? 32 : groups;
+        nl_table[unit].module = module;
+        nl_table[unit].registered = 1;
+        netlink_table_ungrab();
        return sk;
+out_sock_release:
+        sock_release(sock);
+        return NULL;
 }
 void netlink_set_nonroot(int protocol, unsigned int flags)
@@ -1287,7 +1483,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
                           s,
                           s->sk_protocol,
                           nlk->pid,
-                           nlk->groups,
+                           nlk->flags & NETLINK_KERNEL_SOCKET ?
+                                0 : (unsigned int)nlk->groups[0],
                           atomic_read(&s->sk_rmem_alloc),
                           atomic_read(&s->sk_wmem_alloc),
                           nlk->cb,
@@ -1361,8 +1558,8 @@ static struct proto_ops netlink_ops = {
        .ioctl =        sock_no_ioctl,
        .listen =       sock_no_listen,
        .shutdown =     sock_no_shutdown,
-        .setsockopt =   sock_no_setsockopt,
+        .setsockopt =   netlink_setsockopt,
-        .getsockopt =   sock_no_getsockopt,
+        .getsockopt =   netlink_getsockopt,
        .sendmsg =      netlink_sendmsg,
        .recvmsg =      netlink_recvmsg,
        .mmap =         sock_no_mmap,
@@ -1437,21 +1634,7 @@ out:
        return err;
 }
-static void __exit netlink_proto_exit(void)
-{
-        sock_unregister(PF_NETLINK);
-        proc_net_remove("netlink");
-        kfree(nl_table);
-        nl_table = NULL;
-        proto_unregister(&netlink_proto);
-}
 core_initcall(netlink_proto_init);
-module_exit(netlink_proto_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_NETLINK);
 EXPORT_SYMBOL(netlink_ack);
 EXPORT_SYMBOL(netlink_broadcast);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 31ed4a9a1d06..4b53de982114 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -39,7 +39,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <net/ip.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <net/arp.h>
 #include <linux/init.h>
@@ -459,12 +459,7 @@ static struct sock *nr_make_new(struct sock *osk)
        sk->sk_sndbuf   = osk->sk_sndbuf;
        sk->sk_state    = TCP_ESTABLISHED;
        sk->sk_sleep    = osk->sk_sleep;
+        sock_copy_flags(sk, osk);
-        if (sock_flag(osk, SOCK_ZAPPED))
-                sock_set_flag(sk, SOCK_ZAPPED);
-        if (sock_flag(osk, SOCK_DBG))
-                sock_set_flag(sk, SOCK_DBG);
        skb_queue_head_init(&nr->ack_queue);
        skb_queue_head_init(&nr->reseq_queue);
@@ -541,7 +536,8 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct nr_sock *nr = nr_sk(sk);
        struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr;
        struct net_device *dev;
-        ax25_address *user, *source;
+        ax25_uid_assoc *user;
+        ax25_address *source;
        lock_sock(sk);
        if (!sock_flag(sk, SOCK_ZAPPED)) {
@@ -580,16 +576,19 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        } else {
                source = &addr->fsa_ax25.sax25_call;
-                if ((user = ax25_findbyuid(current->euid)) == NULL) {
+                user = ax25_findbyuid(current->euid);
+                if (user) {
+                        nr->user_addr   = user->call;
+                        ax25_uid_put(user);
+                } else {
                        if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
                                release_sock(sk);
                                dev_put(dev);
                                return -EPERM;
                        }
-                        user = source;
+                        nr->user_addr   = *source;
                }
-                nr->user_addr   = *user;
                nr->source_addr = *source;
        }
@@ -609,7 +608,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
        struct sock *sk = sock->sk;
        struct nr_sock *nr = nr_sk(sk);
        struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr;
-        ax25_address *user, *source = NULL;
+        ax25_address *source = NULL;
+        ax25_uid_assoc *user;
        struct net_device *dev;
        lock_sock(sk);
@@ -650,16 +650,19 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
                }
                source = (ax25_address *)dev->dev_addr;
-                if ((user = ax25_findbyuid(current->euid)) == NULL) {
+                user = ax25_findbyuid(current->euid);
+                if (user) {
+                        nr->user_addr   = user->call;
+                        ax25_uid_put(user);
+                } else {
                        if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
                                dev_put(dev);
                                release_sock(sk);
                                return -EPERM;
                        }
-                        user = source;
+                        nr->user_addr   = *source;
                }
-                nr->user_addr   = *user;
                nr->source_addr = *source;
                nr->device      = dev;
@@ -855,17 +858,16 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
        frametype          = skb->data[19] & 0x0F;
        flags              = skb->data[19] & 0xF0;
-#ifdef CONFIG_INET
        /*
         * Check for an incoming IP over NET/ROM frame.
         */
-        if (frametype == NR_PROTOEXT && circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
+        if (frametype == NR_PROTOEXT &&
+            circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
                skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
                skb->h.raw = skb->data;
                return nr_rx_ip(skb, dev);
        }
-#endif
        /*
         * Find an existing socket connection, based on circuit ID, if it's
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 220bf7494f71..263da4c26494 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -38,8 +38,6 @@
 #include <net/ax25.h>
 #include <net/netrom.h>
-#ifdef CONFIG_INET
 /*
 *      Only allow IP over NET/ROM frames through if the netrom device is up.
 */
@@ -64,11 +62,12 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
        skb->nh.raw   = skb->data;
        skb->pkt_type = PACKET_HOST;
-        ip_rcv(skb, skb->dev, NULL);
+        netif_rx(skb);
        return 1;
 }
+#ifdef CONFIG_INET
 static int nr_rebuild_header(struct sk_buff *skb)
 {
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 9c44b3794126..64b81a796907 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -22,8 +22,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
-#include <net/ip.h>                     /* For ip_rcv */
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 0627347b14b8..587bed2674bf 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -21,7 +21,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
@@ -77,7 +77,7 @@ void nr_requeue_frames(struct sock *sk)
                if (skb_prev == NULL)
                        skb_queue_head(&sk->sk_write_queue, skb);
                else
-                        skb_append(skb_prev, skb);
+                        skb_append(skb_prev, skb, &sk->sk_write_queue);
                skb_prev = skb;
        }
 }
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index faabda8088be..75b72d389ba9 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -22,7 +22,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
diff --git a/net/packet/Kconfig b/net/packet/Kconfig
new file mode 100644
index 000000000000..34ff93ff894d
--- /dev/null
+++ b/net/packet/Kconfig
@@ -0,0 +1,26 @@
+#
+# Packet configuration
+#
+config PACKET
+        tristate "Packet socket"
+        ---help---
+          The Packet protocol is used by applications which communicate
+          directly with network devices without an intermediate network
+          protocol implemented in the kernel, e.g. tcpdump.  If you want them
+          to work, choose Y.
+          To compile this driver as a module, choose M here: the module will
+          be called af_packet.
+          If unsure, say Y.
+config PACKET_MMAP
+        bool "Packet socket: mmapped IO"
+        depends on PACKET
+        help
+          If you say Y here, the Packet protocol driver will use an IO
+          mechanism that results in faster communication.
+          If unsure, say N.
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0269616e75a1..ba997095f08f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -241,7 +241,7 @@ static struct proto_ops packet_ops;
 #ifdef CONFIG_SOCK_PACKET
 static struct proto_ops packet_ops_spkt;
-static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
+static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
 {
        struct sock *sk;
        struct sockaddr_pkt *spkt;
@@ -274,6 +274,9 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
        dst_release(skb->dst);
        skb->dst = NULL;
+        /* drop conntrack reference */
+        nf_reset(skb);
        spkt = (struct sockaddr_pkt*)skb->cb;
        skb_push(skb, skb->data-skb->mac.raw);
@@ -438,7 +441,7 @@ static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned
   we will not harm anyone.
 */
-static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
+static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct sock *sk;
        struct sockaddr_ll *sll;
@@ -517,6 +520,9 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packe
        dst_release(skb->dst);
        skb->dst = NULL;
+        /* drop conntrack reference */
+        nf_reset(skb);
        spin_lock(&sk->sk_receive_queue.lock);
        po->stats.tp_packets++;
        __skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -540,7 +546,7 @@ drop:
 }
 #ifdef CONFIG_PACKET_MMAP
-static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
+static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct sock *sk;
        struct packet_sock *po;
@@ -629,12 +635,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct pack
        h->tp_snaplen = snaplen;
        h->tp_mac = macoff;
        h->tp_net = netoff;
-        if (skb->stamp.tv_sec == 0) { 
+        if (skb->tstamp.off_sec == 0) { 
-                do_gettimeofday(&skb->stamp);
+                __net_timestamp(skb);
                sock_enable_timestamp(sk);
        }
-        h->tp_sec = skb->stamp.tv_sec;
+        h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec;
-        h->tp_usec = skb->stamp.tv_usec;
+        h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec;
        sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
        sll->sll_halen = 0;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 7eb6a5bf93ea..c6e59f84c3ae 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -41,7 +41,7 @@
 #include <net/rose.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <net/ip.h>
 #include <net/arp.h>
@@ -556,12 +556,7 @@ static struct sock *rose_make_new(struct sock *osk)
        sk->sk_sndbuf   = osk->sk_sndbuf;
        sk->sk_state    = TCP_ESTABLISHED;
        sk->sk_sleep    = osk->sk_sleep;
+        sock_copy_flags(sk, osk);
-        if (sock_flag(osk, SOCK_ZAPPED))
-                sock_set_flag(sk, SOCK_ZAPPED);
-        if (sock_flag(osk, SOCK_DBG))
-                sock_set_flag(sk, SOCK_DBG);
        init_timer(&rose->timer);
        init_timer(&rose->idletimer);
@@ -631,7 +626,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct rose_sock *rose = rose_sk(sk);
        struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
        struct net_device *dev;
-        ax25_address *user, *source;
+        ax25_address *source;
+        ax25_uid_assoc *user;
        int n;
        if (!sock_flag(sk, SOCK_ZAPPED))
@@ -656,14 +652,17 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        source = &addr->srose_call;
-        if ((user = ax25_findbyuid(current->euid)) == NULL) {
+        user = ax25_findbyuid(current->euid);
+        if (user) {
+                rose->source_call = user->call;
+                ax25_uid_put(user);
+        } else {
                if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE))
                        return -EACCES;
-                user = source;
+                rose->source_call   = *source;
        }
        rose->source_addr   = addr->srose_addr;
-        rose->source_call   = *user;
        rose->device        = dev;
        rose->source_ndigis = addr->srose_ndigis;
@@ -690,8 +689,8 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
        struct rose_sock *rose = rose_sk(sk);
        struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
        unsigned char cause, diagnostic;
-        ax25_address *user;
        struct net_device *dev;
+        ax25_uid_assoc *user;
        int n;
        if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
@@ -741,12 +740,14 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
                if ((dev = rose_dev_first()) == NULL)
                        return -ENETUNREACH;
-                if ((user = ax25_findbyuid(current->euid)) == NULL)
+                user = ax25_findbyuid(current->euid);
+                if (!user)
                        return -EINVAL;
                memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN);
-                rose->source_call = *user;
+                rose->source_call = user->call;
                rose->device      = dev;
+                ax25_uid_put(user);
                rose_insert_socket(sk);         /* Finish the bind */
        }
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index ef475a1bb1ba..8348d33f1efe 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -26,8 +26,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/ip.h>                     /* For ip_rcv */
+#include <net/tcp_states.h>
-#include <net/tcp.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index ff73ebb912b8..4510cd7613ec 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -24,7 +24,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <linux/fcntl.h>
@@ -994,8 +994,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
         *      1. The frame isn't for us,
         *      2. It isn't "owned" by any existing route.
         */
-        if (frametype != ROSE_CALL_REQUEST)     /* XXX */
+        if (frametype != ROSE_CALL_REQUEST) {   /* XXX */
-                return 0;
+                res = 0;
+                goto out;
+        }
        len  = (((skb->data[3] >> 4) & 0x0F) + 1) / 2;
        len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2;
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 7db7e1cedc3a..a29a3a960fd6 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -21,7 +21,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
@@ -74,7 +74,7 @@ void rose_requeue_frames(struct sock *sk)
                if (skb_prev == NULL)
                        skb_queue_head(&sk->sk_write_queue, skb);
                else
-                        skb_append(skb_prev, skb);
+                        skb_append(skb_prev, skb, &sk->sk_write_queue);
                skb_prev = skb;
        }
 }
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index 84dd4403f792..50ae0371dab8 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -22,7 +22,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/system.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
index 9bce7794130a..122c086ee2db 100644
--- a/net/rxrpc/transport.c
+++ b/net/rxrpc/transport.c
@@ -330,7 +330,7 @@ static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
        msg->trans = trans;
        msg->state = RXRPC_MSG_RECEIVED;
-        msg->stamp = pkt->stamp;
+        skb_get_timestamp(pkt, &msg->stamp);
        if (msg->stamp.tv_sec == 0) {
                do_gettimeofday(&msg->stamp); 
                if (pkt->sk) 
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 7bac249258e3..45d3bc0812c8 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -1,6 +1,43 @@
 #
 # Traffic control configuration.
 # 
+menuconfig NET_SCHED
+        bool "QoS and/or fair queueing"
+        ---help---
+          When the kernel has several packets to send out over a network
+          device, it has to decide which ones to send first, which ones to
+          delay, and which ones to drop. This is the job of the packet
+          scheduler, and several different algorithms for how to do this
+          "fairly" have been proposed.
+          If you say N here, you will get the standard packet scheduler, which
+          is a FIFO (first come, first served). If you say Y here, you will be
+          able to choose from among several alternative algorithms which can
+          then be attached to different network devices. This is useful for
+          example if some of your network devices are real time devices that
+          need a certain minimum data flow rate, or if you need to limit the
+          maximum data flow rate for traffic which matches specified criteria.
+          This code is considered to be experimental.
+          To administer these schedulers, you'll need the user-level utilities
+          from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
+          That package also contains some documentation; for more, check out
+          <http://snafu.freedom.org/linux2.2/iproute-notes.html>.
+          This Quality of Service (QoS) support will enable you to use
+          Differentiated Services (diffserv) and Resource Reservation Protocol
+          (RSVP) on your Linux router if you also say Y to "QoS support",
+          "Packet classifier API" and to some classifiers below. Documentation
+          and software is at <http://diffserv.sourceforge.net/>.
+          If you say Y here and to "/proc file system" below, you will be able
+          to read status information about packet schedulers from the file
+          /proc/net/psched.
+          The available schedulers are listed in the following questions; you
+          can say Y to as many as you like. If unsure, say N now.
 choice
        prompt "Packet scheduler clock source"
        depends on NET_SCHED
@@ -454,6 +491,7 @@ config NET_EMATCH_TEXT
        depends on NET_EMATCH
        select TEXTSEARCH
        select TEXTSEARCH_KMP
+        select TEXTSEARCH_BM
        select TEXTSEARCH_FSM
        ---help---
          Say Y here if you want to be ablt to classify packets based on
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 249c61936ea0..8aebe8f6d271 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
        while ((a = act) != NULL) {
 repeat:
                if (a->ops && a->ops->act) {
-                        ret = a->ops->act(&skb, a);
+                        ret = a->ops->act(&skb, a, res);
                        if (TC_MUNGED & skb->tc_verd) {
                                /* copied already, allow trampling */
                                skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -179,11 +179,6 @@ repeat:
                act = a->next;
        }
 exec_done:
-        if (skb->tc_classid > 0) {
-                res->classid = skb->tc_classid;
-                res->class = 0;
-                skb->tc_classid = 0;
-        }
        return ret;
 }
@@ -598,7 +593,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
        nlh->nlmsg_flags |= NLM_F_ROOT;
        module_put(a->ops->owner);
        kfree(a);
-        err = rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+        err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
        if (err > 0)
                return 0;
@@ -661,7 +656,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
                /* now do the delete */
                tcf_action_destroy(head, 0);
-                ret = rtnetlink_send(skb, pid, RTMGRP_TC,
+                ret = rtnetlink_send(skb, pid, RTNLGRP_TC,
                                     n->nlmsg_flags&NLM_F_ECHO);
                if (ret > 0)
                        return 0;
@@ -703,9 +698,9 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
        x->rta_len = skb->tail - (u8*)x;
        
        nlh->nlmsg_len = skb->tail - b;
-        NETLINK_CB(skb).dst_groups = RTMGRP_TC;
+        NETLINK_CB(skb).dst_group = RTNLGRP_TC;
        
-        err = rtnetlink_send(skb, pid, RTMGRP_TC, flags&NLM_F_ECHO);
+        err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
        if (err > 0)
                err = 0;
        return err;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3b5714ef4d1a..b4d89fbb3782 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -367,7 +367,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
                return -EINVAL;
        }
-        return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+        return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 }
 struct tcf_dump_args
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 53d98f8d3d80..00eae5f9a01a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -27,17 +27,17 @@
 *               lvalue                                   rvalue
 *            +-----------+                           +-----------+
 *            | type: INT |                           | type: INT |
- *       def  | id: INDEV |                           | id: VALUE |
+ *       def  | id: DEV   |                           | id: VALUE |
 *            | data:     |                           | data: 3   |
 *            +-----------+                           +-----------+
 *                  |                                       |
- *                  ---> meta_ops[INT][INDEV](...)          |
+ *                  ---> meta_ops[INT][DEV](...)            |
 *                            |                             |
 *                  -----------                             |
 *                  V                                       V
 *            +-----------+                           +-----------+
 *            | type: INT |                           | type: INT |
- *       obj  | id: INDEV |                           | id: VALUE |
+ *       obj  | id: DEV |                             | id: VALUE |
 *            | data: 2   |<--data got filled out     | data: 3   |
 *            +-----------+                           +-----------+
 *                  |                                         |
@@ -170,26 +170,6 @@ META_COLLECTOR(var_dev)
        *err = var_dev(skb->dev, dst);
 }
-META_COLLECTOR(int_indev)
-{
-        *err = int_dev(skb->input_dev, dst);
-}
-META_COLLECTOR(var_indev)
-{
-        *err = var_dev(skb->input_dev, dst);
-}
-META_COLLECTOR(int_realdev)
-{
-        *err = int_dev(skb->real_dev, dst);
-}
-META_COLLECTOR(var_realdev)
-{
-        *err = var_dev(skb->real_dev, dst);
-}
 /**************************************************************************
 * skb attributes
 **************************************************************************/
@@ -229,12 +209,14 @@ META_COLLECTOR(int_maclen)
 * Netfilter
 **************************************************************************/
-#ifdef CONFIG_NETFILTER
 META_COLLECTOR(int_nfmark)
 {
+#ifdef CONFIG_NETFILTER
        dst->value = skb->nfmark;
-}
+#else
+        dst->value = 0;
 #endif
+}
 /**************************************************************************
 * Traffic Control
@@ -245,31 +227,21 @@ META_COLLECTOR(int_tcindex)
        dst->value = skb->tc_index;
 }
-#ifdef CONFIG_NET_CLS_ACT
-META_COLLECTOR(int_tcverd)
-{
-        dst->value = skb->tc_verd;
-}
-META_COLLECTOR(int_tcclassid)
-{
-        dst->value = skb->tc_classid;
-}
-#endif
 /**************************************************************************
 * Routing
 **************************************************************************/
-#ifdef CONFIG_NET_CLS_ROUTE
 META_COLLECTOR(int_rtclassid)
 {
        if (unlikely(skb->dst == NULL))
                *err = -1;
        else
+#ifdef CONFIG_NET_CLS_ROUTE
                dst->value = skb->dst->tclassid;
-}
+#else
+                dst->value = 0;
 #endif
+}
 META_COLLECTOR(int_rtiif)
 {
@@ -505,8 +477,6 @@ struct meta_ops
 static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
        [TCF_META_TYPE_VAR] = {
                [META_ID(DEV)]                  = META_FUNC(var_dev),
-                [META_ID(INDEV)]                = META_FUNC(var_indev),
-                [META_ID(REALDEV)]              = META_FUNC(var_realdev),
                [META_ID(SK_BOUND_IF)]          = META_FUNC(var_sk_bound_if),
        },
        [TCF_META_TYPE_INT] = {
@@ -515,25 +485,15 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
                [META_ID(LOADAVG_1)]            = META_FUNC(int_loadavg_1),
                [META_ID(LOADAVG_2)]            = META_FUNC(int_loadavg_2),
                [META_ID(DEV)]                  = META_FUNC(int_dev),
-                [META_ID(INDEV)]                = META_FUNC(int_indev),
-                [META_ID(REALDEV)]              = META_FUNC(int_realdev),
                [META_ID(PRIORITY)]             = META_FUNC(int_priority),
                [META_ID(PROTOCOL)]             = META_FUNC(int_protocol),
                [META_ID(PKTTYPE)]              = META_FUNC(int_pkttype),
                [META_ID(PKTLEN)]               = META_FUNC(int_pktlen),
                [META_ID(DATALEN)]              = META_FUNC(int_datalen),
                [META_ID(MACLEN)]               = META_FUNC(int_maclen),
-#ifdef CONFIG_NETFILTER
                [META_ID(NFMARK)]               = META_FUNC(int_nfmark),
-#endif
                [META_ID(TCINDEX)]              = META_FUNC(int_tcindex),
-#ifdef CONFIG_NET_CLS_ACT
-                [META_ID(TCVERDICT)]            = META_FUNC(int_tcverd),
-                [META_ID(TCCLASSID)]            = META_FUNC(int_tcclassid),
-#endif
-#ifdef CONFIG_NET_CLS_ROUTE
                [META_ID(RTCLASSID)]            = META_FUNC(int_rtclassid),
-#endif
                [META_ID(RTIIF)]                = META_FUNC(int_rtiif),
                [META_ID(SK_FAMILY)]            = META_FUNC(int_sk_family),
                [META_ID(SK_STATE)]             = META_FUNC(int_sk_state),
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 873840d8d072..77beabc91fa3 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -55,9 +55,6 @@ static int em_text_change(struct tcf_proto *tp, void *data, int len,
        struct ts_config *ts_conf;
        int flags = 0;
-        printk("Configuring text: %s from %d:%d to %d:%d len %d\n", conf->algo, conf->from_offset,
-            conf->from_layer, conf->to_offset, conf->to_layer, conf->pattern_len);
        if (len < sizeof(*conf) || len < (sizeof(*conf) + conf->pattern_len))
                return -EINVAL;
diff --git a/net/sched/gact.c b/net/sched/gact.c
index a811c89fef7f..d1c6d542912a 100644
--- a/net/sched/gact.c
+++ b/net/sched/gact.c
@@ -135,7 +135,7 @@ tcf_gact_cleanup(struct tc_action *a, int bind)
 }
 static int
-tcf_gact(struct sk_buff **pskb, struct tc_action *a)
+tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
        struct tcf_gact *p = PRIV(a, gact);
        struct sk_buff *skb = *pskb;
diff --git a/net/sched/ipt.c b/net/sched/ipt.c
index b114d994d523..f50136eed211 100644
--- a/net/sched/ipt.c
+++ b/net/sched/ipt.c
@@ -201,7 +201,7 @@ tcf_ipt_cleanup(struct tc_action *a, int bind)
 }
 static int
-tcf_ipt(struct sk_buff **pskb, struct tc_action *a)
+tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
        int ret = 0, result = 0;
        struct tcf_ipt *p = PRIV(a, ipt);
diff --git a/net/sched/mirred.c b/net/sched/mirred.c
index f309ce336803..20d06916dc0b 100644
--- a/net/sched/mirred.c
+++ b/net/sched/mirred.c
@@ -158,7 +158,7 @@ tcf_mirred_cleanup(struct tc_action *a, int bind)
 }
 static int
-tcf_mirred(struct sk_buff **pskb, struct tc_action *a)
+tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
        struct tcf_mirred *p = PRIV(a, mirred);
        struct net_device *dev;
diff --git a/net/sched/pedit.c b/net/sched/pedit.c
index 678be6a645fb..767d24f4610e 100644
--- a/net/sched/pedit.c
+++ b/net/sched/pedit.c
@@ -130,7 +130,7 @@ tcf_pedit_cleanup(struct tc_action *a, int bind)
 }
 static int
-tcf_pedit(struct sk_buff **pskb, struct tc_action *a)
+tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
        struct tcf_pedit *p = PRIV(a, pedit);
        struct sk_buff *skb = *pskb;
diff --git a/net/sched/police.c b/net/sched/police.c
index c03545faf523..eb39fb2f39b6 100644
--- a/net/sched/police.c
+++ b/net/sched/police.c
@@ -284,7 +284,8 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind)
        return 0;
 }
-static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a)
+static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a,
+                          struct tcf_result *res)
 {
        psched_time_t now;
        struct sk_buff *skb = *pskb;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b9a069af4a02..737681cb9a92 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -816,7 +816,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
        }
        if (skb->len)
-                return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+                return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 err_out:
        kfree_skb(skb);
@@ -1040,7 +1040,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
                return -EINVAL;
        }
-        return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+        return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 }
 struct qdisc_dump_args
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 73e218e646ac..99ceb91f0150 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -238,6 +238,20 @@ static void dev_watchdog_down(struct net_device *dev)
        spin_unlock_bh(&dev->xmit_lock);
 }
+void netif_carrier_on(struct net_device *dev)
+{
+        if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
+                linkwatch_fire_event(dev);
+        if (netif_running(dev))
+                __netdev_watchdog_up(dev);
+}
+void netif_carrier_off(struct net_device *dev)
+{
+        if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
+                linkwatch_fire_event(dev);
+}
 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
@@ -331,11 +345,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
        int prio;
        struct sk_buff_head *list = qdisc_priv(qdisc);
-        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++, list++) {
+        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
-                struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
+                if (!skb_queue_empty(list + prio)) {
-                if (skb) {
                        qdisc->q.qlen--;
-                        return skb;
+                        return __qdisc_dequeue_head(qdisc, list + prio);
                }
        }
@@ -439,6 +452,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
        if (!ops->init || ops->init(sch, NULL) == 0)
                return sch;
+        qdisc_destroy(sch);
 errout:
        return NULL;
 }
@@ -600,6 +614,8 @@ void dev_shutdown(struct net_device *dev)
 }
 EXPORT_SYMBOL(__netdev_watchdog_up);
+EXPORT_SYMBOL(netif_carrier_on);
+EXPORT_SYMBOL(netif_carrier_off);
 EXPORT_SYMBOL(noop_qdisc);
 EXPORT_SYMBOL(noop_qdisc_ops);
 EXPORT_SYMBOL(qdisc_create_dflt);
diff --git a/net/sched/simple.c b/net/sched/simple.c
index 3ab4c675ab5d..8a6ae4f491e8 100644
--- a/net/sched/simple.c
+++ b/net/sched/simple.c
@@ -44,7 +44,7 @@ static DEFINE_RWLOCK(simp_lock);
 #include <net/pkt_act.h>
 #include <net/act_generic.h>
-static int tcf_simp(struct sk_buff **pskb, struct tc_action *a)
+static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
        struct sk_buff *skb = *pskb;
        struct tcf_defact *p = PRIV(a, defact);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 4b47dd6f2485..5b24ae0650d3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
                                          const struct sctp_endpoint *ep,
                                          const struct sock *sk,
                                          sctp_scope_t scope,
-                                          int gfp)
+                                          unsigned int __nocast gfp)
 {
        struct sctp_sock *sp;
        int i;
@@ -272,7 +272,8 @@ fail_init:
 /* Allocate and initialize a new association */
 struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
                                         const struct sock *sk,
-                                         sctp_scope_t scope, int gfp)
+                                         sctp_scope_t scope,
+                                         unsigned int __nocast gfp)
 {
        struct sctp_association *asoc;
@@ -478,7 +479,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
 /* Add a transport address to an association.  */
 struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
                                           const union sctp_addr *addr,
-                                           const int gfp,
+                                           const unsigned int __nocast gfp,
                                           const int peer_state)
 {
        struct sctp_transport *peer;
@@ -1229,7 +1230,8 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
 /* Build the bind address list for the association based on info from the
 * local endpoint and the remote peer.
 */
-int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, int gfp)
+int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
+                                     unsigned int __nocast gfp)
 {
        sctp_scope_t scope;
        int flags;
@@ -1251,7 +1253,8 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, int gfp)
 /* Build the association's bind address list from the cookie.  */
 int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
-                                         struct sctp_cookie *cookie, int gfp)
+                                         struct sctp_cookie *cookie,
+                                         unsigned int __nocast gfp)
 {
        int var_size2 = ntohs(cookie->peer_init->chunk_hdr.length);
        int var_size3 = cookie->raw_addr_list_len;
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index f90eadfb60a2..f71549710f2e 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -53,7 +53,8 @@
 /* Forward declarations for internal helpers. */
 static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *,
-                              sctp_scope_t scope, int gfp, int flags);
+                              sctp_scope_t scope, unsigned int __nocast gfp,
+                              int flags);
 static void sctp_bind_addr_clean(struct sctp_bind_addr *);
 /* First Level Abstractions. */
@@ -63,7 +64,8 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
 */
 int sctp_bind_addr_copy(struct sctp_bind_addr *dest, 
                        const struct sctp_bind_addr *src,
-                        sctp_scope_t scope, int gfp, int flags)
+                        sctp_scope_t scope, unsigned int __nocast gfp,
+                        int flags)
 {
        struct sctp_sockaddr_entry *addr;
        struct list_head *pos;
@@ -144,7 +146,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
 /* Add an address to the bind address list in the SCTP_bind_addr structure. */
 int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
-                       int gfp)
+                       unsigned int __nocast gfp)
 {
        struct sctp_sockaddr_entry *addr;
@@ -197,7 +199,8 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
 * The second argument is the return value for the length.
 */
 union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
-                                         int *addrs_len, int gfp)
+                                         int *addrs_len,
+                                         unsigned int __nocast gfp)
 {
        union sctp_params addrparms;
        union sctp_params retval;
@@ -249,7 +252,7 @@ end_raw:
 * address parameters).
 */
 int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
-                           int addrs_len, __u16 port, int gfp)
+                           int addrs_len, __u16 port, unsigned int __nocast gfp)
 {
        union sctp_addr_param *rawaddr;
        struct sctp_paramhdr *param;
@@ -347,7 +350,8 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
 /* Copy out addresses from the global local address list. */
 static int sctp_copy_one_addr(struct sctp_bind_addr *dest, 
                              union sctp_addr *addr,
-                              sctp_scope_t scope, int gfp, int flags)
+                              sctp_scope_t scope, unsigned int __nocast gfp,
+                              int flags)
 {
        int error = 0;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 0c2ab7885058..61da2937e641 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -62,7 +62,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
 }
 /* Allocate and initialize datamsg. */
-SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(int gfp)
+SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(unsigned int __nocast gfp)
 {
        struct sctp_datamsg *msg;
        msg = kmalloc(sizeof(struct sctp_datamsg), gfp);
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index c44bf4165c6e..e22ccd655965 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -67,7 +67,8 @@ static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep);
 * Initialize the base fields of the endpoint structure.
 */
 static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
-                                                struct sock *sk, int gfp)
+                                                struct sock *sk,
+                                                unsigned int __nocast gfp)
 {
        struct sctp_sock *sp = sctp_sk(sk);
        memset(ep, 0, sizeof(struct sctp_endpoint));
@@ -137,7 +138,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 /* Create a sctp_endpoint with all that boring stuff initialized.
 * Returns NULL if there isn't enough memory.
 */
-struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, int gfp)
+struct sctp_endpoint *sctp_endpoint_new(struct sock *sk,
+                                        unsigned int __nocast gfp)
 {
        struct sctp_endpoint *ep;
@@ -191,8 +193,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
        sctp_unhash_endpoint(ep);
        /* Free up the HMAC transform. */
-        if (sctp_sk(ep->base.sk)->hmac)
+        sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac);
-                sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac);
        /* Cleanup. */
        sctp_inq_free(&ep->base.inqueue);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 5e085e041a6e..28f32243397f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -236,8 +236,8 @@ int sctp_rcv(struct sk_buff *skb)
        }
        /* SCTP seems to always need a timestamp right now (FIXME) */
-        if (skb->stamp.tv_sec == 0) {
+        if (skb->tstamp.off_sec == 0) {
-                do_gettimeofday(&skb->stamp);
+                __net_timestamp(skb);
                sock_enable_timestamp(sk); 
        }
@@ -351,7 +351,6 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 *
 */
 void sctp_icmp_proto_unreachable(struct sock *sk,
-                           struct sctp_endpoint *ep,
                           struct sctp_association *asoc,
                           struct sctp_transport *t)
 {
@@ -367,7 +366,6 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 /* Common lookup code for icmp/icmpv6 error handler. */
 struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
                             struct sctphdr *sctphdr,
-                             struct sctp_endpoint **epp,
                             struct sctp_association **app,
                             struct sctp_transport **tpp)
 {
@@ -375,11 +373,10 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
        union sctp_addr daddr;
        struct sctp_af *af;
        struct sock *sk = NULL;
-        struct sctp_endpoint *ep = NULL;
        struct sctp_association *asoc = NULL;
        struct sctp_transport *transport = NULL;
-        *app = NULL; *epp = NULL; *tpp = NULL;
+        *app = NULL; *tpp = NULL;
        af = sctp_get_af_specific(family);
        if (unlikely(!af)) {
@@ -394,26 +391,15 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
         * packet.
         */
        asoc = __sctp_lookup_association(&saddr, &daddr, &transport);
-        if (!asoc) {
+        if (!asoc)
-                /* If there is no matching association, see if it matches any
+                return NULL;
-                 * endpoint. This may happen for an ICMP error generated in
-                 * response to an INIT_ACK.
-                 */
-                ep = __sctp_rcv_lookup_endpoint(&daddr);
-                if (!ep) {
-                        return NULL;
-                }
-        }
-        if (asoc) {
+        sk = asoc->base.sk;
-                sk = asoc->base.sk;
-                if (ntohl(sctphdr->vtag) != asoc->c.peer_vtag) {
+        if (ntohl(sctphdr->vtag) != asoc->c.peer_vtag) {
-                        ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+                ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
-                        goto out;
+                goto out;
-                }
+        }
-        } else
-                sk = ep->base.sk;
        sctp_bh_lock_sock(sk);
@@ -423,7 +409,6 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
        if (sock_owned_by_user(sk))
                NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
-        *epp = ep;
        *app = asoc;
        *tpp = transport;
        return sk;
@@ -432,21 +417,16 @@ out:
        sock_put(sk);
        if (asoc)
                sctp_association_put(asoc);
-        if (ep)
-                sctp_endpoint_put(ep);
        return NULL;
 }
 /* Common cleanup code for icmp/icmpv6 error handler. */
-void sctp_err_finish(struct sock *sk, struct sctp_endpoint *ep,
+void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
-                     struct sctp_association *asoc)
 {
        sctp_bh_unlock_sock(sk);
        sock_put(sk);
        if (asoc)
                sctp_association_put(asoc);
-        if (ep)
-                sctp_endpoint_put(ep);
 }
 /*
@@ -471,7 +451,6 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
        int type = skb->h.icmph->type;
        int code = skb->h.icmph->code;
        struct sock *sk;
-        struct sctp_endpoint *ep;
        struct sctp_association *asoc;
        struct sctp_transport *transport;
        struct inet_sock *inet;
@@ -488,7 +467,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
        savesctp  = skb->h.raw;
        skb->nh.iph = iph;
        skb->h.raw = (char *)sh;
-        sk = sctp_err_lookup(AF_INET, skb, sh, &ep, &asoc, &transport);
+        sk = sctp_err_lookup(AF_INET, skb, sh, &asoc, &transport);
        /* Put back, the original pointers. */
        skb->nh.raw = saveip;
        skb->h.raw = savesctp;
@@ -515,7 +494,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
                }
                else {
                        if (ICMP_PROT_UNREACH == code) {
-                                sctp_icmp_proto_unreachable(sk, ep, asoc,
+                                sctp_icmp_proto_unreachable(sk, asoc,
                                                            transport);
                                goto out_unlock;
                        }
@@ -544,7 +523,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
        }
 out_unlock:
-        sctp_err_finish(sk, ep, asoc);
+        sctp_err_finish(sk, asoc);
 }
 /*
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index c7e42d125b9c..fa3be2b8fb5f 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -66,8 +66,8 @@
 #include <linux/seq_file.h>
 #include <net/protocol.h>
-#include <net/tcp.h>
 #include <net/ndisc.h>
+#include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/transp_v6.h>
 #include <net/addrconf.h>
@@ -91,7 +91,6 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
        struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
        struct sock *sk;
-        struct sctp_endpoint *ep;
        struct sctp_association *asoc;
        struct sctp_transport *transport;
        struct ipv6_pinfo *np;
@@ -105,7 +104,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        savesctp  = skb->h.raw;
        skb->nh.ipv6h = iph;
        skb->h.raw = (char *)sh;
-        sk = sctp_err_lookup(AF_INET6, skb, sh, &ep, &asoc, &transport);
+        sk = sctp_err_lookup(AF_INET6, skb, sh, &asoc, &transport);
        /* Put back, the original pointers. */
        skb->nh.raw = saveip;
        skb->h.raw = savesctp;
@@ -124,7 +123,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                goto out_unlock;
        case ICMPV6_PARAMPROB:
                if (ICMPV6_UNK_NEXTHDR == code) {
-                        sctp_icmp_proto_unreachable(sk, ep, asoc, transport);
+                        sctp_icmp_proto_unreachable(sk, asoc, transport);
                        goto out_unlock;
                }
                break;
@@ -142,7 +141,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        }
 out_unlock:
-        sctp_err_finish(sk, ep, asoc);
+        sctp_err_finish(sk, asoc);
 out:
        if (likely(idev != NULL))
                in6_dev_put(idev);
@@ -642,10 +641,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
        else
                newinet->pmtudisc = IP_PMTUDISC_WANT;
-#ifdef INET_REFCNT_DEBUG
+        sk_refcnt_debug_inc(newsk);
-        atomic_inc(&inet6_sock_nr);
-        atomic_inc(&inet_sock_nr);
-#endif
        if (newsk->sk_prot->init(newsk)) {
                sk_common_release(newsk);
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index 0781e5d509fd..8ff588f0d76a 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -127,8 +127,12 @@ done:
 /* Initialize the objcount in the proc filesystem.  */
 void sctp_dbg_objcnt_init(void)
 {
-        create_proc_read_entry("sctp_dbg_objcnt", 0, proc_net_sctp,
+        struct proc_dir_entry *ent;
+        ent = create_proc_read_entry("sctp_dbg_objcnt", 0, proc_net_sctp,
                               sctp_dbg_objcnt_read, NULL);
+        if (!ent)
+                printk(KERN_WARNING 
+                        "sctp_dbg_objcnt: Unable to create /proc entry.\n");
 }
 /* Cleanup the objcount entry in the proc filesystem.  */
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 98d49ec9b74b..b74f7772b576 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -57,6 +57,7 @@ static struct snmp_mib sctp_snmp_list[] = {
        SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS),
        SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS),
        SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS),
+        SNMP_MIB_SENTINEL
 };
 /* Return the current value of a particular entry in the mib by adding its
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e7f37faba7c0..e7025be77691 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -62,7 +62,7 @@
 /* Global data structures. */
 struct sctp_globals sctp_globals;
 struct proc_dir_entry   *proc_net_sctp;
-DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics);
+DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
 struct idr sctp_assocs_id;
 DEFINE_SPINLOCK(sctp_assocs_id_lock);
@@ -78,8 +78,8 @@ static struct sctp_pf *sctp_pf_inet_specific;
 static struct sctp_af *sctp_af_v4_specific;
 static struct sctp_af *sctp_af_v6_specific;
-kmem_cache_t *sctp_chunk_cachep;
+kmem_cache_t *sctp_chunk_cachep __read_mostly;
-kmem_cache_t *sctp_bucket_cachep;
+kmem_cache_t *sctp_bucket_cachep __read_mostly;
 extern int sctp_snmp_proc_init(void);
 extern int sctp_snmp_proc_exit(void);
@@ -219,7 +219,7 @@ static void sctp_free_local_addr_list(void)
 /* Copy the local addresses which are valid for 'scope' into 'bp'.  */
 int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
-                              int gfp, int copy_flags)
+                              unsigned int __nocast gfp, int copy_flags)
 {
        struct sctp_sockaddr_entry *addr;
        int error = 0;
@@ -593,9 +593,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
        newinet->mc_index = 0;
        newinet->mc_list = NULL;
-#ifdef INET_REFCNT_DEBUG
+        sk_refcnt_debug_inc(newsk);
-        atomic_inc(&inet_sock_nr);
-#endif
        if (newsk->sk_prot->init(newsk)) {
                sk_common_release(newsk);
@@ -1244,6 +1242,10 @@ SCTP_STATIC __exit void sctp_exit(void)
 module_init(sctp_init);
 module_exit(sctp_exit);
+/*
+ * __stringify doesn't likes enums, so use IPPROTO_SCTP value (132) directly.
+ */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132");
 MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>");
 MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)");
 MODULE_LICENSE("GPL");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 773cd93fa3d0..3868a8d70cc0 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -78,7 +78,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
 static int sctp_process_param(struct sctp_association *asoc,
                              union sctp_params param,
                              const union sctp_addr *peer_addr,
-                              int gfp);
+                              unsigned int __nocast gfp);
 /* What was the inbound interface for this chunk? */
 int sctp_chunk_iif(const struct sctp_chunk *chunk)
@@ -174,7 +174,7 @@ void  sctp_init_cause(struct sctp_chunk *chunk, __u16 cause_code,
 */
 struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
                             const struct sctp_bind_addr *bp,
-                             int gfp, int vparam_len)
+                             unsigned int __nocast gfp, int vparam_len)
 {
        sctp_inithdr_t init;
        union sctp_params addrs;
@@ -261,7 +261,7 @@ nodata:
 struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
                                 const struct sctp_chunk *chunk,
-                                 int gfp, int unkparam_len)
+                                 unsigned int __nocast gfp, int unkparam_len)
 {
        sctp_inithdr_t initack;
        struct sctp_chunk *retval;
@@ -1233,7 +1233,8 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk)
 /* Create a CLOSED association to use with an incoming packet.  */
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
-                                        struct sctp_chunk *chunk, int gfp)
+                                        struct sctp_chunk *chunk,
+                                        unsigned int __nocast gfp)
 {
        struct sctp_association *asoc;
        struct sk_buff *skb;
@@ -1348,7 +1349,7 @@ nodata:
 struct sctp_association *sctp_unpack_cookie(
        const struct sctp_endpoint *ep,
        const struct sctp_association *asoc,
-        struct sctp_chunk *chunk, int gfp,
+        struct sctp_chunk *chunk, unsigned int __nocast gfp,
        int *error, struct sctp_chunk **errp)
 {
        struct sctp_association *retval = NULL;
@@ -1361,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie(
        char *key;
        sctp_scope_t scope;
        struct sk_buff *skb = chunk->skb;
+        struct timeval tv;
        headersize = sizeof(sctp_chunkhdr_t) + SCTP_SECRET_SIZE;
        bodysize = ntohs(chunk->chunk_hdr->length) - headersize;
@@ -1433,7 +1435,8 @@ no_hmac:
         * an association, there is no need to check cookie's expiration
         * for init collision case of lost COOKIE ACK.
         */
-        if (!asoc && tv_lt(bear_cookie->expiration, skb->stamp)) {
+        skb_get_timestamp(skb, &tv);
+        if (!asoc && tv_lt(bear_cookie->expiration, tv)) {
                __u16 len;
                /*
                 * Section 3.3.10.3 Stale Cookie Error (3)
@@ -1446,10 +1449,9 @@ no_hmac:
                len = ntohs(chunk->chunk_hdr->length);
                *errp = sctp_make_op_error_space(asoc, chunk, len);
                if (*errp) {
-                        suseconds_t usecs = (skb->stamp.tv_sec -
+                        suseconds_t usecs = (tv.tv_sec -
                                bear_cookie->expiration.tv_sec) * 1000000L +
-                                skb->stamp.tv_usec -
+                                tv.tv_usec - bear_cookie->expiration.tv_usec;
-                                bear_cookie->expiration.tv_usec;
                        usecs = htonl(usecs);
                        sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE,
@@ -1812,7 +1814,7 @@ int sctp_verify_init(const struct sctp_association *asoc,
 */
 int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
                      const union sctp_addr *peer_addr,
-                      sctp_init_chunk_t *peer_init, int gfp)
+                      sctp_init_chunk_t *peer_init, unsigned int __nocast gfp)
 {
        union sctp_params param;
        struct sctp_transport *transport;
@@ -1983,7 +1985,7 @@ nomem:
 static int sctp_process_param(struct sctp_association *asoc,
                              union sctp_params param,
                              const union sctp_addr *peer_addr,
-                              int gfp)
+                              unsigned int __nocast gfp)
 {
        union sctp_addr addr;
        int i;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 778639db125a..39c970b5b198 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -63,7 +63,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
                                void *event_arg,
                                sctp_disposition_t status,
                                sctp_cmd_seq_t *commands,
-                                int gfp);
+                                unsigned int __nocast gfp);
 static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
                             sctp_state_t state,
                             struct sctp_endpoint *ep,
@@ -71,7 +71,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
                             void *event_arg,
                             sctp_disposition_t status,
                             sctp_cmd_seq_t *commands,
-                             int gfp);
+                             unsigned int __nocast gfp);
 /********************************************************************
 * Helper functions
@@ -497,7 +497,8 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
 static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
                                 struct sctp_association *asoc,
                                 struct sctp_chunk *chunk,
-                                 sctp_init_chunk_t *peer_init, int gfp)
+                                 sctp_init_chunk_t *peer_init,
+                                 unsigned int __nocast gfp)
 {
        int error;
@@ -852,7 +853,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
               struct sctp_endpoint *ep,
               struct sctp_association *asoc,
               void *event_arg,
-               int gfp)
+               unsigned int __nocast gfp)
 {
        sctp_cmd_seq_t commands;
        const sctp_sm_table_entry_t *state_fn;
@@ -897,7 +898,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
                             void *event_arg,
                             sctp_disposition_t status,
                             sctp_cmd_seq_t *commands,
-                             int gfp)
+                             unsigned int __nocast gfp)
 {
        int error;
@@ -985,7 +986,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
                                void *event_arg,
                                sctp_disposition_t status,
                                sctp_cmd_seq_t *commands,
-                                int gfp)
+                                unsigned int __nocast gfp)
 {
        int error = 0;
        int force;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 091a66f06a35..91ec8c936913 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4194,8 +4194,7 @@ out:
        sctp_release_sock(sk);
        return err;
 cleanup:
-        if (tfm)
+        sctp_crypto_free_tfm(tfm);
-                sctp_crypto_free_tfm(tfm);
        goto out;
 }
@@ -4892,7 +4891,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
        sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) {
                event = sctp_skb2event(skb);
                if (event->asoc == assoc) {
-                        __skb_unlink(skb, skb->list);
+                        __skb_unlink(skb, &oldsk->sk_receive_queue);
                        __skb_queue_tail(&newsk->sk_receive_queue, skb);
                }
        }
@@ -4921,7 +4920,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
                sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) {
                        event = sctp_skb2event(skb);
                        if (event->asoc == assoc) {
-                                __skb_unlink(skb, skb->list);
+                                __skb_unlink(skb, &oldsp->pd_lobby);
                                __skb_queue_tail(queue, skb);
                        }
                }
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index e627d2b451b6..25037daf3fa0 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -57,7 +57,8 @@ static inline size_t sctp_ssnmap_size(__u16 in, __u16 out)
 /* Create a new sctp_ssnmap.
 * Allocate room to store at least 'len' contiguous TSNs.
 */
-struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, int gfp)
+struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,
+                                    unsigned int __nocast gfp)
 {
        struct sctp_ssnmap *retval;
        int size;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index dc4893474f18..75b28dd634fe 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -42,6 +42,7 @@
 */
 #include <net/sctp/structs.h>
+#include <net/sctp/sctp.h>
 #include <linux/sysctl.h>
 static ctl_handler sctp_sysctl_jiffies_ms;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a63b69179607..d2f04ebe5081 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -57,7 +57,7 @@
 /* Initialize a new transport from provided memory.  */
 static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
                                                  const union sctp_addr *addr,
-                                                  int gfp)
+                                                  unsigned int __nocast gfp)
 {
        /* Copy in the address.  */
        peer->ipaddr = *addr;
@@ -121,7 +121,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 }
 /* Allocate and initialize a new transport.  */
-struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, int gfp)
+struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
+                                          unsigned int __nocast gfp)
 {
        struct sctp_transport *transport;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 17d0ff534735..0abd5101107c 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -74,7 +74,7 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags)
 /* Create a new sctp_ulpevent.  */
 SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags,
-                                                    int gfp)
+                                                    unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event;
        struct sk_buff *skb;
@@ -136,7 +136,7 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
 struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
        const struct sctp_association *asoc,
        __u16 flags, __u16 state, __u16 error, __u16 outbound,
-        __u16 inbound, int gfp)
+        __u16 inbound, unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event;
        struct sctp_assoc_change *sac;
@@ -237,7 +237,7 @@ fail:
 struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
        const struct sctp_association *asoc,
        const struct sockaddr_storage *aaddr,
-        int flags, int state, int error, int gfp)
+        int flags, int state, int error, unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event;
        struct sctp_paddr_change  *spc;
@@ -350,7 +350,7 @@ fail:
 */
 struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
        const struct sctp_association *asoc, struct sctp_chunk *chunk,
-        __u16 flags, int gfp)
+        __u16 flags, unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event;
        struct sctp_remote_error *sre;
@@ -448,7 +448,7 @@ fail:
 */
 struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
        const struct sctp_association *asoc, struct sctp_chunk *chunk,
-        __u16 flags, __u32 error, int gfp)
+        __u16 flags, __u32 error, unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event;
        struct sctp_send_failed *ssf;
@@ -557,7 +557,7 @@ fail:
 */
 struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
        const struct sctp_association *asoc,
-        __u16 flags, int gfp)
+        __u16 flags, unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event;
        struct sctp_shutdown_event *sse;
@@ -620,7 +620,7 @@ fail:
 * 5.3.1.6 SCTP_ADAPTION_INDICATION
 */
 struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication(
-        const struct sctp_association *asoc, int gfp)
+        const struct sctp_association *asoc, unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event;
        struct sctp_adaption_event *sai;
@@ -657,7 +657,7 @@ fail:
 */
 struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
                                                struct sctp_chunk *chunk,
-                                                int gfp)
+                                                unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event = NULL;
        struct sk_buff *skb;
@@ -718,7 +718,8 @@ fail:
 *   various events.
 */
 struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
-        const struct sctp_association *asoc, __u32 indication, int gfp)
+        const struct sctp_association *asoc, __u32 indication,
+        unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event;
        struct sctp_pdapi_event *pd;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index d5dd2cf7ac4a..ec2c857eae7f 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -50,9 +50,9 @@
 /* Forward declarations for internal helpers.  */
 static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq,
-                                                struct sctp_ulpevent *);
+                                              struct sctp_ulpevent *);
 static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *,
-                                                struct sctp_ulpevent *);
+                                              struct sctp_ulpevent *);
 /* 1st Level Abstractions */
@@ -100,7 +100,7 @@ void sctp_ulpq_free(struct sctp_ulpq *ulpq)
 /* Process an incoming DATA chunk.  */
 int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
-                        int gfp)
+                        unsigned int __nocast gfp)
 {
        struct sk_buff_head temp;
        sctp_data_chunk_t *hdr;
@@ -125,7 +125,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
                event = sctp_ulpq_order(ulpq, event);
        }
-        /* Send event to the ULP.  */
+        /* Send event to the ULP.  'event' is the sctp_ulpevent for
+         * very first SKB on the 'temp' list.
+         */
        if (event)
                sctp_ulpq_tail_event(ulpq, event);
@@ -158,14 +160,18 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
        return sctp_clear_pd(ulpq->asoc->base.sk);
 }
+/* If the SKB of 'event' is on a list, it is the first such member
+ * of that list.
+ */
 int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 {
        struct sock *sk = ulpq->asoc->base.sk;
-        struct sk_buff_head *queue;
+        struct sk_buff_head *queue, *skb_list;
+        struct sk_buff *skb = sctp_event2skb(event);
        int clear_pd = 0;
+        skb_list = (struct sk_buff_head *) skb->prev;
        /* If the socket is just going to throw this away, do not
         * even try to deliver it.
         */
@@ -197,10 +203,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
        /* If we are harvesting multiple skbs they will be
         * collected on a list.
         */
-        if (sctp_event2skb(event)->list)
+        if (skb_list)
-                sctp_skb_list_tail(sctp_event2skb(event)->list, queue);
+                sctp_skb_list_tail(skb_list, queue);
        else
-                __skb_queue_tail(queue, sctp_event2skb(event));
+                __skb_queue_tail(queue, skb);
        /* Did we just complete partial delivery and need to get
         * rolling again?  Move pending data to the receive
@@ -214,10 +220,11 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
        return 1;
 out_free:
-        if (sctp_event2skb(event)->list)
+        if (skb_list)
-                sctp_queue_purge_ulpevents(sctp_event2skb(event)->list);
+                sctp_queue_purge_ulpevents(skb_list);
        else
                sctp_ulpevent_free(event);
        return 0;
 }
@@ -269,7 +276,7 @@ static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
 * payload was fragmented on the way and ip had to reassemble them.
 * We add the rest of skb's to the first skb's fraglist.
 */
-static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag)
+static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag)
 {
        struct sk_buff *pos;
        struct sctp_ulpevent *event;
@@ -294,7 +301,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag,
                skb_shinfo(f_frag)->frag_list = pos;
        /* Remove the first fragment from the reassembly queue.  */
-        __skb_unlink(f_frag, f_frag->list);
+        __skb_unlink(f_frag, queue);
        while (pos) {
                pnext = pos->next;
@@ -304,7 +311,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag,
                f_frag->data_len += pos->len;
                /* Remove the fragment from the reassembly queue.  */
-                __skb_unlink(pos, pos->list);
+                __skb_unlink(pos, queue);
        
                /* Break if we have reached the last fragment.  */
                if (pos == l_frag)
@@ -375,7 +382,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 done:
        return retval;
 found:
-        retval = sctp_make_reassembled_event(first_frag, pos);
+        retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos);
        if (retval)
                retval->msg_flags |= MSG_EOR;
        goto done;
@@ -435,7 +442,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq
         * further.
         */
 done:
-        retval = sctp_make_reassembled_event(first_frag, last_frag);
+        retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
        if (retval && is_last)
                retval->msg_flags |= MSG_EOR;
@@ -527,7 +534,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u
         * further.
         */
 done:
-        retval = sctp_make_reassembled_event(first_frag, last_frag);
+        retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
        return retval;
 }
@@ -537,6 +544,7 @@ done:
 static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
                                              struct sctp_ulpevent *event)
 {
+        struct sk_buff_head *event_list;
        struct sk_buff *pos, *tmp;
        struct sctp_ulpevent *cevent;
        struct sctp_stream *in;
@@ -547,6 +555,8 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
        ssn = event->ssn;
        in  = &ulpq->asoc->ssnmap->in;
+        event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev;
        /* We are holding the chunks by stream, by SSN.  */
        sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
                cevent = (struct sctp_ulpevent *) pos->cb;
@@ -567,10 +577,10 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
                /* Found it, so mark in the ssnmap. */
                sctp_ssn_next(in, sid);
-                __skb_unlink(pos, pos->list);
+                __skb_unlink(pos, &ulpq->lobby);
                /* Attach all gathered skbs to the event.  */
-                __skb_queue_tail(sctp_event2skb(event)->list, pos);
+                __skb_queue_tail(event_list, pos);
        }
 }
@@ -626,7 +636,7 @@ static inline void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq,
 }
 static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
-                                                struct sctp_ulpevent *event)
+                                             struct sctp_ulpevent *event)
 {
        __u16 sid, ssn;
        struct sctp_stream *in;
@@ -667,7 +677,7 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
 {
        struct sk_buff *pos, *tmp;
        struct sctp_ulpevent *cevent;
-        struct sctp_ulpevent *event = NULL;
+        struct sctp_ulpevent *event;
        struct sctp_stream *in;
        struct sk_buff_head temp;
        __u16 csid, cssn;
@@ -675,6 +685,8 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
        in  = &ulpq->asoc->ssnmap->in;
        /* We are holding the chunks by stream, by SSN.  */
+        skb_queue_head_init(&temp);
+        event = NULL;
        sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
                cevent = (struct sctp_ulpevent *) pos->cb;
                csid = cevent->stream;
@@ -686,19 +698,20 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
                /* Found it, so mark in the ssnmap. */         
                sctp_ssn_next(in, csid);
-                __skb_unlink(pos, pos->list);
+                __skb_unlink(pos, &ulpq->lobby);
                if (!event) {                                           
                        /* Create a temporary list to collect chunks on.  */
                        event = sctp_skb2event(pos);
-                        skb_queue_head_init(&temp);
                        __skb_queue_tail(&temp, sctp_event2skb(event));
                } else {
                        /* Attach all gathered skbs to the event.  */
-                        __skb_queue_tail(sctp_event2skb(event)->list, pos);
+                        __skb_queue_tail(&temp, pos);
                }
        }
-        /* Send event to the ULP.  */
+        /* Send event to the ULP.  'event' is the sctp_ulpevent for
+         * very first SKB on the 'temp' list.
+         */
        if (event)
                sctp_ulpq_tail_event(ulpq, event);
 }
@@ -778,7 +791,8 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed)
 /* Partial deliver the first message as there is pressure on rwnd. */
 void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
-                                struct sctp_chunk *chunk, int gfp)
+                                struct sctp_chunk *chunk,
+                                unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *event;
        struct sctp_association *asoc;
@@ -802,7 +816,7 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 /* Renege some packets to make room for an incoming chunk.  */
 void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
-                      int gfp)
+                      unsigned int __nocast gfp)
 {
        struct sctp_association *asoc;
        __u16 needed, freed;
@@ -841,7 +855,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 /* Notify the application if an association is aborted and in
 * partial delivery mode.  Send up any pending received messages.
 */
-void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, int gfp)
+void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, unsigned int __nocast gfp)
 {
        struct sctp_ulpevent *ev = NULL;
        struct sock *sk;
diff --git a/net/socket.c b/net/socket.c
index 6f2a17881972..94fe638b4d72 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -70,6 +70,8 @@
 #include <linux/seq_file.h>
 #include <linux/wanrouter.h>
 #include <linux/if_bridge.h>
+#include <linux/if_frad.h>
+#include <linux/if_vlan.h>
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <linux/cache.h>
@@ -272,7 +274,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ule
 #define SOCKFS_MAGIC 0x534F434B
-static kmem_cache_t * sock_inode_cachep;
+static kmem_cache_t * sock_inode_cachep __read_mostly;
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
@@ -331,7 +333,7 @@ static struct super_block *sockfs_get_sb(struct file_system_type *fs_type,
        return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
 }
-static struct vfsmount *sock_mnt;
+static struct vfsmount *sock_mnt __read_mostly;
 static struct file_system_type sock_fs_type = {
        .name =         "sockfs",
@@ -404,6 +406,7 @@ int sock_map_fd(struct socket *sock)
                file->f_mode = FMODE_READ | FMODE_WRITE;
                file->f_flags = O_RDWR;
                file->f_pos = 0;
+                file->private_data = sock;
                fd_install(fd, file);
        }
@@ -436,6 +439,9 @@ struct socket *sockfd_lookup(int fd, int *err)
                return NULL;
        }
+        if (file->f_op == &socket_file_ops)
+                return file->private_data;      /* set in sock_map_fd */
        inode = file->f_dentry->d_inode;
        if (!S_ISSOCK(inode->i_mode)) {
                *err = -ENOTSOCK;
@@ -720,8 +726,8 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
        return __sock_sendmsg(iocb, sock, &x->async_msg, size);
 }
-ssize_t sock_sendpage(struct file *file, struct page *page,
+static ssize_t sock_sendpage(struct file *file, struct page *page,
-                      int offset, size_t size, loff_t *ppos, int more)
+                             int offset, size_t size, loff_t *ppos, int more)
 {
        struct socket *sock;
        int flags;
@@ -944,7 +950,7 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma)
        return sock->ops->mmap(file, sock, vma);
 }
-int sock_close(struct inode *inode, struct file *filp)
+static int sock_close(struct inode *inode, struct file *filp)
 {
        /*
         *      It was possible the inode is NULL we were 
@@ -2023,9 +2029,6 @@ int sock_unregister(int family)
        return 0;
 }
-extern void sk_init(void);
 void __init sock_init(void)
 {
        /*
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 24c21f2a33a7..ee6ae74cd1b2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -160,7 +160,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
                                " unsupported checksum %d", cksumtype);
                        goto out;
        }
-        if (!(tfm = crypto_alloc_tfm(cksumname, 0)))
+        if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP)))
                goto out;
        cksum->len = crypto_tfm_alg_digestsize(tfm);
        if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL)
@@ -185,9 +185,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
                        sg->page = body->pages[i];
                        sg->offset = offset;
                        sg->length = thislen;
-                        kmap(sg->page); /* XXX kmap_atomic? */
                        crypto_digest_update(tfm, sg, 1);
-                        kunmap(sg->page);
                        len -= thislen;
                        i++;
                        offset = 0;
@@ -201,8 +199,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
        crypto_digest_final(tfm, cksum->data);
        code = 0;
 out:
-        if (tfm)
+        crypto_free_tfm(tfm);
-                crypto_free_tfm(tfm);
        return code;
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index cf726510df8e..606a8a82cafb 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -185,12 +185,9 @@ static void
 gss_delete_sec_context_kerberos(void *internal_ctx) {
        struct krb5_ctx *kctx = internal_ctx;
-        if (kctx->seq)
+        crypto_free_tfm(kctx->seq);
-                crypto_free_tfm(kctx->seq);
+        crypto_free_tfm(kctx->enc);
-        if (kctx->enc)
+        kfree(kctx->mech_used.data);
-                crypto_free_tfm(kctx->enc);
-        if (kctx->mech_used.data)
-                kfree(kctx->mech_used.data);
        kfree(kctx);
 }
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index dad05994c3eb..6c97d61baa9b 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -214,14 +214,10 @@ static void
 gss_delete_sec_context_spkm3(void *internal_ctx) {
        struct spkm3_ctx *sctx = internal_ctx;
-        if(sctx->derived_integ_key)
+        crypto_free_tfm(sctx->derived_integ_key);
-                crypto_free_tfm(sctx->derived_integ_key);
+        crypto_free_tfm(sctx->derived_conf_key);
-        if(sctx->derived_conf_key)
+        kfree(sctx->share_key.data);
-                crypto_free_tfm(sctx->derived_conf_key);
+        kfree(sctx->mech_used.data);
-        if(sctx->share_key.data)
-                kfree(sctx->share_key.data);
-        if(sctx->mech_used.data)
-                kfree(sctx->mech_used.data);
        kfree(sctx);
 }
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 554f224c0445..ded6c63f11ec 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -3,7 +3,7 @@
 *
 * Userland/kernel interface for rpcauth_gss.
 * Code shamelessly plagiarized from fs/nfsd/nfsctl.c
- * and fs/driverfs/inode.c
+ * and fs/sysfs/inode.c
 *
 * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no>
 *
@@ -28,13 +28,13 @@
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
-static struct vfsmount *rpc_mount;
+static struct vfsmount *rpc_mount __read_mostly;
 static int rpc_mount_count;
 static struct file_system_type rpc_pipe_fs_type;
-static kmem_cache_t *rpc_inode_cachep;
+static kmem_cache_t *rpc_inode_cachep __read_mostly;
 #define RPC_UPCALL_TIMEOUT (30*HZ)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 2d9eb7fbd521..f3104035e35d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -34,10 +34,10 @@ static int			rpc_task_id;
 #define RPC_BUFFER_MAXSIZE      (2048)
 #define RPC_BUFFER_POOLSIZE     (8)
 #define RPC_TASK_POOLSIZE       (8)
-static kmem_cache_t     *rpc_task_slabp;
+static kmem_cache_t     *rpc_task_slabp __read_mostly;
-static kmem_cache_t     *rpc_buffer_slabp;
+static kmem_cache_t     *rpc_buffer_slabp __read_mostly;
-static mempool_t        *rpc_task_mempool;
+static mempool_t        *rpc_task_mempool __read_mostly;
-static mempool_t        *rpc_buffer_mempool;
+static mempool_t        *rpc_buffer_mempool __read_mostly;
 static void                     __rpc_default_timer(struct rpc_task *task);
 static void                     rpciod_killall(void);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 56db8f13e6cb..05fe2e735538 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -34,7 +34,7 @@
 #include <net/sock.h>
 #include <net/checksum.h>
 #include <net/ip.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -584,13 +584,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
                /* possibly an icmp error */
                dprintk("svc: recvfrom returned error %d\n", -err);
        }
-        if (skb->stamp.tv_sec == 0) {
+        if (skb->tstamp.off_sec == 0) {
-                skb->stamp.tv_sec = xtime.tv_sec; 
+                struct timeval tv;
-                skb->stamp.tv_usec = xtime.tv_nsec * 1000; 
+                tv.tv_sec = xtime.tv_sec;
+                tv.tv_usec = xtime.tv_nsec * 1000;
+                skb_set_timestamp(skb, &tv);
                /* Don't enable netstamp, sunrpc doesn't 
                   need that much accuracy */
        }
-        svsk->sk_sk->sk_stamp = skb->stamp;
+        skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
        set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
        /*
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 8a4d9c106af1..fde16f40a581 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -993,6 +993,7 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
                        return -EINVAL;
        } else {
                if (xdr_decode_word(buf, base, &desc->array_len) != 0 ||
+                    desc->array_len > desc->array_maxlen ||
                    (unsigned long) base + 4 + desc->array_len *
                                    desc->elem_size > buf->len)
                        return -EINVAL;
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 3f6e31069c54..c5241fcbb966 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -17,17 +17,15 @@
 #include <linux/sysctl.h>
 #ifdef CONFIG_INET
-extern struct ctl_table ipv4_table[];
+#include <net/ip.h>
 #endif
-extern struct ctl_table core_table[];
 #ifdef CONFIG_NET
-extern struct ctl_table ether_table[];
+#include <linux/if_ether.h>
 #endif
 #ifdef CONFIG_TR
-extern struct ctl_table tr_table[];
+#include <linux/if_tr.h>
 #endif
 struct ctl_table net_table[] = {
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
new file mode 100644
index 000000000000..5a69733bcdad
--- /dev/null
+++ b/net/unix/Kconfig
@@ -0,0 +1,21 @@
+#
+# Unix Domain Sockets
+#
+config UNIX
+        tristate "Unix domain sockets"
+        ---help---
+          If you say Y here, you will include support for Unix domain sockets;
+          sockets are the standard Unix mechanism for establishing and
+          accessing network connections.  Many commonly used programs such as
+          the X Window system and syslog use these sockets even if your
+          machine is not connected to any network.  Unless you are working on
+          an embedded system or something similar, you therefore definitely
+          want to say Y here.
+          To compile this driver as a module, choose M here: the module will be
+          called unix.  Note that several important services won't work
+          correctly if you say M here and then neglect to load the module.
+          Say Y unless you know what you are doing.
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d403e34088ad..41feca3bef86 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -105,7 +105,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <net/sock.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
 #include <net/af_unix.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -2026,14 +2026,6 @@ static struct net_proto_family unix_family_ops = {
        .owner  = THIS_MODULE,
 };
-#ifdef CONFIG_SYSCTL
-extern void unix_sysctl_register(void);
-extern void unix_sysctl_unregister(void);
-#else
-static inline void unix_sysctl_register(void) {}
-static inline void unix_sysctl_unregister(void) {}
-#endif
 static int __init af_unix_init(void)
 {
        int rc = -1;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 4bd95c8f5934..6ffc64e1712d 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -76,11 +76,11 @@
 #include <linux/netdevice.h>
 #include <linux/file.h>
 #include <linux/proc_fs.h>
-#include <linux/tcp.h>
 #include <net/sock.h>
 #include <net/af_unix.h>
 #include <net/scm.h>
+#include <net/tcp_states.h>
 /* Internal data structures and random procedures: */
@@ -286,16 +286,16 @@ void unix_gc(void)
                        skb = skb_peek(&s->sk_receive_queue);
                        while (skb &&
                               skb != (struct sk_buff *)&s->sk_receive_queue) {
-                                nextsk=skb->next;
+                                nextsk = skb->next;
                                /*
                                 *      Do we have file descriptors ?
                                 */
-                                if(UNIXCB(skb).fp)
+                                if (UNIXCB(skb).fp) {
-                                {
+                                        __skb_unlink(skb,
-                                        __skb_unlink(skb, skb->list);
+                                                     &s->sk_receive_queue);
-                                        __skb_queue_tail(&hitlist,skb);
+                                        __skb_queue_tail(&hitlist, skb);
                                }
-                                skb=nextsk;
+                                skb = nextsk;
                        }
                        spin_unlock(&s->sk_receive_queue.lock);
                }
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index c974dac4580a..690ffa5d5bfb 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -12,7 +12,7 @@
 #include <linux/mm.h>
 #include <linux/sysctl.h>
-extern int sysctl_unix_max_dgram_qlen;
+#include <net/af_unix.h>
 static ctl_table unix_table[] = {
        {
diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig
new file mode 100644
index 000000000000..1debe1cb054e
--- /dev/null
+++ b/net/wanrouter/Kconfig
@@ -0,0 +1,29 @@
+#
+# Configuration for WAN router
+#
+config WAN_ROUTER
+        tristate "WAN router"
+        depends on EXPERIMENTAL
+        ---help---
+          Wide Area Networks (WANs), such as X.25, frame relay and leased
+          lines, are used to interconnect Local Area Networks (LANs) over vast
+          distances with data transfer rates significantly higher than those
+          achievable with commonly used asynchronous modem connections.
+          Usually, a quite expensive external device called a `WAN router' is
+          needed to connect to a WAN.
+          As an alternative, WAN routing can be built into the Linux kernel.
+          With relatively inexpensive WAN interface cards available on the
+          market, a perfectly usable router can be built for less than half
+          the price of an external router.  If you have one of those cards and
+          wish to use your Linux box as a WAN router, say Y here and also to
+          the WAN driver for your card, below.  You will then need the
+          wan-tools package which is available from <ftp://ftp.sangoma.com/>.
+          Read <file:Documentation/networking/wan-router.txt> for more
+          information.
+          To compile WAN routing support as a module, choose M here: the
+          module will be called wanrouter.
+          If unsure, say N.
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
index d93b19faaab7..596cb96e5f47 100644
--- a/net/wanrouter/af_wanpipe.c
+++ b/net/wanrouter/af_wanpipe.c
@@ -57,7 +57,7 @@
 #include <linux/wanpipe.h>
 #include <linux/if_wanpipe.h>
 #include <linux/pkt_sched.h>
-#include <linux/tcp.h>
+#include <linux/tcp_states.h>
 #include <linux/if_wanpipe_common.h>
 #include <linux/sdla_x25.h>
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index d6844ac226f5..13b650ad22e2 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -358,10 +358,10 @@ int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
 */
-unsigned short wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
+__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
        int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */
-        unsigned short ethertype;
+        __be16 ethertype;
        switch (skb->data[cnt]) {
        case NLPID_IP:          /* IP datagramm */
@@ -379,7 +379,7 @@ unsigned short wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
                                skb->data[cnt+3], dev->name);
                        return 0;
                }
-                ethertype = *((unsigned short*)&skb->data[cnt+4]);
+                ethertype = *((__be16*)&skb->data[cnt+4]);
                cnt += 6;
                break;
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
new file mode 100644
index 000000000000..e6759c9660bb
--- /dev/null
+++ b/net/x25/Kconfig
@@ -0,0 +1,36 @@
+#
+# CCITT X.25 Packet Layer
+#
+config X25
+        tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
+        depends on EXPERIMENTAL
+        ---help---
+          X.25 is a set of standardized network protocols, similar in scope to
+          frame relay; the one physical line from your box to the X.25 network
+          entry point can carry several logical point-to-point connections
+          (called "virtual circuits") to other computers connected to the X.25
+          network. Governments, banks, and other organizations tend to use it
+          to connect to each other or to form Wide Area Networks (WANs). Many
+          countries have public X.25 networks. X.25 consists of two
+          protocols: the higher level Packet Layer Protocol (PLP) (say Y here
+          if you want that) and the lower level data link layer protocol LAPB
+          (say Y to "LAPB Data Link Driver" below if you want that).
+          You can read more about X.25 at <http://www.sangoma.com/x25.htm> and
+          <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>.
+          Information about X.25 for Linux is contained in the files
+          <file:Documentation/networking/x25.txt> and
+          <file:Documentation/networking/x25-iface.txt>.
+          One connects to an X.25 network either with a dedicated network card
+          using the X.21 protocol (not yet supported by Linux) or one can do
+          X.25 over a standard telephone line using an ordinary modem (say Y
+          to "X.25 async driver" below) or over Ethernet using an ordinary
+          Ethernet card and the LAPB over Ethernet (say Y to "LAPB Data Link
+          Driver" and "LAPB over Ethernet driver" below).
+          To compile this driver as a module, choose M here: the module
+          will be called x25. If unsure, say N.
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 04bec047fa9a..020d73cc8414 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -47,7 +47,7 @@
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <asm/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/termios.h>      /* For TIOCINQ/OUTQ */
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 36fc3bf6d882..adfe7b8df355 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -81,7 +81,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 }
 int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
-                           struct packet_type *ptype)
+                           struct packet_type *ptype, struct net_device *orig_dev)
 {
        struct sk_buff *nskb;
        struct x25_neigh *nb;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index b0197c70a9fc..26146874b839 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -28,7 +28,7 @@
 #include <linux/string.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <net/x25.h>
 static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 7fd872ad0c20..8be9b8fbc24d 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -27,7 +27,7 @@
 #include <linux/string.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <net/x25.h>
 /*
@@ -80,7 +80,7 @@ void x25_requeue_frames(struct sock *sk)
                if (!skb_prev)
                        skb_queue_head(&sk->sk_write_queue, skb);
                else
-                        skb_append(skb_prev, skb);
+                        skb_append(skb_prev, skb, &sk->sk_write_queue);
                skb_prev = skb;
        }
 }
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index d6a21a3ad80e..0a92e1da3922 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -23,7 +23,7 @@
 #include <linux/jiffies.h>
 #include <linux/timer.h>
 #include <net/sock.h>
-#include <net/tcp.h>
+#include <net/tcp_states.h>
 #include <net/x25.h>
 static void x25_heartbeat_expiry(unsigned long);
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 58ca6a972c48..0c1c04322baf 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -1,6 +1,10 @@
 #
 # XFRM configuration
 #
+config XFRM
+       bool
+       depends on NET
 config XFRM_USER
        tristate "IPsec user configuration interface"
        depends on INET && XFRM
@@ -10,3 +14,14 @@ config XFRM_USER
          If unsure, say Y.
+config NET_KEY
+        tristate "PF_KEY sockets"
+        select XFRM
+        ---help---
+          PF_KEYv2 socket family, compatible to KAME ones.
+          They are required if you are going to use IPsec tools ported
+          from KAME.
+          Say Y unless you know what you are doing.
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index c58a6f05a0b6..2407a7072327 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -12,7 +12,7 @@
 #include <net/ip.h>
 #include <net/xfrm.h>
-static kmem_cache_t *secpath_cachep;
+static kmem_cache_t *secpath_cachep __read_mostly;
 void __secpath_destroy(struct sec_path *sp)
 {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d65ed8684fc1..83c8135e1764 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -37,7 +37,7 @@ EXPORT_SYMBOL(xfrm_policy_list);
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
-static kmem_cache_t *xfrm_dst_cache;
+static kmem_cache_t *xfrm_dst_cache __read_mostly;
 static struct work_struct xfrm_policy_gc_work;
 static struct list_head xfrm_policy_gc_list =
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ecade4893a13..c35336a0f71b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1125,9 +1125,8 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
        if (build_expire(skb, x, c->data.hard) < 0)
                BUG();
-        NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
+        NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE;
+        return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
-        return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
 }
 static int xfrm_notify_sa_flush(struct km_event *c)
@@ -1152,7 +1151,8 @@ static int xfrm_notify_sa_flush(struct km_event *c)
        nlh->nlmsg_len = skb->tail - b;
-        return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
+        NETLINK_CB(skb).dst_group = XFRMNLGRP_SA;
+        return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
 nlmsg_failure:
        kfree_skb(skb);
@@ -1226,7 +1226,8 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
        nlh->nlmsg_len = skb->tail - b;
-        return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
+        NETLINK_CB(skb).dst_group = XFRMNLGRP_SA;
+        return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC);
 nlmsg_failure:
 rtattr_failure:
@@ -1304,9 +1305,8 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
        if (build_acquire(skb, x, xt, xp, dir) < 0)
                BUG();
-        NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE;
+        NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE;
+        return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
-        return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC);
 }
 /* User gives us xfrm_user_policy_info followed by an array of 0
@@ -1350,6 +1350,9 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
        if (nr > XFRM_MAX_DEPTH)
                return NULL;
+        if (p->dir > XFRM_POLICY_OUT)
+                return NULL;
        xp = xfrm_policy_alloc(GFP_KERNEL);
        if (xp == NULL) {
                *dir = -ENOBUFS;
@@ -1402,9 +1405,8 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
        if (build_polexpire(skb, xp, dir, c->data.hard) < 0)
                BUG();
-        NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
+        NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE;
+        return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC);
-        return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
 }
 static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
@@ -1452,7 +1454,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
        nlh->nlmsg_len = skb->tail - b;
-        return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
+        NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY;
+        return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
 nlmsg_failure:
 rtattr_failure:
@@ -1477,7 +1480,8 @@ static int xfrm_notify_policy_flush(struct km_event *c)
        nlh->nlmsg_len = skb->tail - b;
-        return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
+        NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY;
+        return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
 nlmsg_failure:
        kfree_skb(skb);
@@ -1516,7 +1520,8 @@ static int __init xfrm_user_init(void)
 {
        printk(KERN_INFO "Initializing IPsec netlink socket\n");
-        xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv);
+        xfrm_nl = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
+                                        xfrm_netlink_rcv, THIS_MODULE);
        if (xfrm_nl == NULL)
                return -ENOMEM;
@@ -1534,3 +1539,4 @@ static void __exit xfrm_user_exit(void)
 module_init(xfrm_user_init);
 module_exit(xfrm_user_exit);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);