Added missing tegra files.HEAD master

author: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-22 10:38:37 -0500
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-22 10:38:37 -0500
commit: fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch)
tree: a57612d1888735a2ec7972891b68c1ac5ec8faea /net
parent: 8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff)
54 files changed, 19609 insertions, 0 deletions
diff --git a/net/802/tr.c b/net/802/tr.c
new file mode 100644
index 00000000000..5e20cf8a074
--- /dev/null
+++ b/net/802/tr.c
@@ -0,0 +1,677 @@
+/*
+ * NET3:        Token ring device handling subroutines
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Fixes:       3 Feb 97 Paul Norton <pnorton@cts.com> Minor routing fixes.
+ *              Added rif table to /proc/net/tr_rif and rif timeout to
+ *              /proc/sys/net/token-ring/rif_timeout.
+ *              22 Jun 98 Paul Norton <p.norton@computer.org> Rearranged
+ *              tr_header and tr_type_trans to handle passing IPX SNAP and
+ *              802.2 through the correct layers. Eliminated tr_reformat.
+ *
+ */
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/trdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/net.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <net/arp.h>
+#include <net/net_namespace.h>
+static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev);
+static void rif_check_expire(unsigned long dummy);
+#define TR_SR_DEBUG 0
+/*
+ *      Each RIF entry we learn is kept this way
+ */
+struct rif_cache {
+        unsigned char addr[TR_ALEN];
+        int iface;
+        __be16 rcf;
+        __be16 rseg[8];
+        struct rif_cache *next;
+        unsigned long last_used;
+        unsigned char local_ring;
+};
+#define RIF_TABLE_SIZE 32
+/*
+ *      We hash the RIF cache 32 ways. We do after all have to look it
+ *      up a lot.
+ */
+static struct rif_cache *rif_table[RIF_TABLE_SIZE];
+static DEFINE_SPINLOCK(rif_lock);
+/*
+ *      Garbage disposal timer.
+ */
+static struct timer_list rif_timer;
+static int sysctl_tr_rif_timeout = 60*10*HZ;
+static inline unsigned long rif_hash(const unsigned char *addr)
+{
+        unsigned long x;
+        x = addr[0];
+        x = (x << 2) ^ addr[1];
+        x = (x << 2) ^ addr[2];
+        x = (x << 2) ^ addr[3];
+        x = (x << 2) ^ addr[4];
+        x = (x << 2) ^ addr[5];
+        x ^= x >> 8;
+        return x & (RIF_TABLE_SIZE - 1);
+}
+/*
+ *      Put the headers on a token ring packet. Token ring source routing
+ *      makes this a little more exciting than on ethernet.
+ */
+static int tr_header(struct sk_buff *skb, struct net_device *dev,
+                     unsigned short type,
+                     const void *daddr, const void *saddr, unsigned len)
+{
+        struct trh_hdr *trh;
+        int hdr_len;
+        /*
+         * Add the 802.2 SNAP header if IP as the IPv4/IPv6 code calls
+         * dev->hard_header directly.
+         */
+        if (type == ETH_P_IP || type == ETH_P_IPV6 || type == ETH_P_ARP)
+        {
+                struct trllc *trllc;
+                hdr_len = sizeof(struct trh_hdr) + sizeof(struct trllc);
+                trh = (struct trh_hdr *)skb_push(skb, hdr_len);
+                trllc = (struct trllc *)(trh+1);
+                trllc->dsap = trllc->ssap = EXTENDED_SAP;
+                trllc->llc = UI_CMD;
+                trllc->protid[0] = trllc->protid[1] = trllc->protid[2] = 0x00;
+                trllc->ethertype = htons(type);
+        }
+        else
+        {
+                hdr_len = sizeof(struct trh_hdr);
+                trh = (struct trh_hdr *)skb_push(skb, hdr_len);
+        }
+        trh->ac=AC;
+        trh->fc=LLC_FRAME;
+        if(saddr)
+                memcpy(trh->saddr,saddr,dev->addr_len);
+        else
+                memcpy(trh->saddr,dev->dev_addr,dev->addr_len);
+        /*
+         *      Build the destination and then source route the frame
+         */
+        if(daddr)
+        {
+                memcpy(trh->daddr,daddr,dev->addr_len);
+                tr_source_route(skb, trh, dev);
+                return hdr_len;
+        }
+        return -hdr_len;
+}
+/*
+ *      A neighbour discovery of some species (eg arp) has completed. We
+ *      can now send the packet.
+ */
+static int tr_rebuild_header(struct sk_buff *skb)
+{
+        struct trh_hdr *trh=(struct trh_hdr *)skb->data;
+        struct trllc *trllc=(struct trllc *)(skb->data+sizeof(struct trh_hdr));
+        struct net_device *dev = skb->dev;
+        /*
+         *      FIXME: We don't yet support IPv6 over token rings
+         */
+        if(trllc->ethertype != htons(ETH_P_IP)) {
+                printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(trllc->ethertype));
+                return 0;
+        }
+#ifdef CONFIG_INET
+        if(arp_find(trh->daddr, skb)) {
+                        return 1;
+        }
+        else
+#endif
+        {
+                tr_source_route(skb,trh,dev);
+                return 0;
+        }
+}
+/*
+ *      Some of this is a bit hackish. We intercept RIF information
+ *      used for source routing. We also grab IP directly and don't feed
+ *      it via SNAP.
+ */
+__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
+{
+        struct trh_hdr *trh;
+        struct trllc *trllc;
+        unsigned riflen=0;
+        skb->dev = dev;
+        skb_reset_mac_header(skb);
+        trh = tr_hdr(skb);
+        if(trh->saddr[0] & TR_RII)
+                riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
+        trllc = (struct trllc *)(skb->data+sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
+        skb_pull(skb,sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
+        if(*trh->daddr & 0x80)
+        {
+                if(!memcmp(trh->daddr,dev->broadcast,TR_ALEN))
+                        skb->pkt_type=PACKET_BROADCAST;
+                else
+                        skb->pkt_type=PACKET_MULTICAST;
+        }
+        else if ( (trh->daddr[0] & 0x01) && (trh->daddr[1] & 0x00) && (trh->daddr[2] & 0x5E))
+        {
+                skb->pkt_type=PACKET_MULTICAST;
+        }
+        else if(dev->flags & IFF_PROMISC)
+        {
+                if(memcmp(trh->daddr, dev->dev_addr, TR_ALEN))
+                        skb->pkt_type=PACKET_OTHERHOST;
+        }
+        if ((skb->pkt_type != PACKET_BROADCAST) &&
+            (skb->pkt_type != PACKET_MULTICAST))
+                tr_add_rif_info(trh,dev) ;
+        /*
+         * Strip the SNAP header from ARP packets since we don't
+         * pass them through to the 802.2/SNAP layers.
+         */
+        if (trllc->dsap == EXTENDED_SAP &&
+            (trllc->ethertype == htons(ETH_P_IP) ||
+             trllc->ethertype == htons(ETH_P_IPV6) ||
+             trllc->ethertype == htons(ETH_P_ARP)))
+        {
+                skb_pull(skb, sizeof(struct trllc));
+                return trllc->ethertype;
+        }
+        return htons(ETH_P_TR_802_2);
+}
+/*
+ *      We try to do source routing...
+ */
+void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,
+                     struct net_device *dev)
+{
+        int slack;
+        unsigned int hash;
+        struct rif_cache *entry;
+        unsigned char *olddata;
+        unsigned long flags;
+        static const unsigned char mcast_func_addr[]
+                = {0xC0,0x00,0x00,0x04,0x00,0x00};
+        spin_lock_irqsave(&rif_lock, flags);
+        /*
+         *      Broadcasts are single route as stated in RFC 1042
+         */
+        if( (!memcmp(&(trh->daddr[0]),&(dev->broadcast[0]),TR_ALEN)) ||
+            (!memcmp(&(trh->daddr[0]),&(mcast_func_addr[0]), TR_ALEN))  )
+        {
+                trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)
+                               | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
+                trh->saddr[0]|=TR_RII;
+        }
+        else
+        {
+                hash = rif_hash(trh->daddr);
+                /*
+                 *      Walk the hash table and look for an entry
+                 */
+                for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->daddr[0]),TR_ALEN);entry=entry->next);
+                /*
+                 *      If we found an entry we can route the frame.
+                 */
+                if(entry)
+                {
+#if TR_SR_DEBUG
+printk("source routing for %pM\n", trh->daddr);
+#endif
+                        if(!entry->local_ring && (ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8)
+                        {
+                                trh->rcf=entry->rcf;
+                                memcpy(&trh->rseg[0],&entry->rseg[0],8*sizeof(unsigned short));
+                                trh->rcf^=htons(TR_RCF_DIR_BIT);
+                                trh->rcf&=htons(0x1fff);        /* Issam Chehab <ichehab@madge1.demon.co.uk> */
+                                trh->saddr[0]|=TR_RII;
+#if TR_SR_DEBUG
+                                printk("entry found with rcf %04x\n", entry->rcf);
+                        }
+                        else
+                        {
+                                printk("entry found but without rcf length, local=%02x\n", entry->local_ring);
+#endif
+                        }
+                        entry->last_used=jiffies;
+                }
+                else
+                {
+                        /*
+                         *      Without the information we simply have to shout
+                         *      on the wire. The replies should rapidly clean this
+                         *      situation up.
+                         */
+                        trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)
+                                       | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
+                        trh->saddr[0]|=TR_RII;
+#if TR_SR_DEBUG
+                        printk("no entry in rif table found - broadcasting frame\n");
+#endif
+                }
+        }
+        /* Compress the RIF here so we don't have to do it in the driver(s) */
+        if (!(trh->saddr[0] & 0x80))
+                slack = 18;
+        else
+                slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8);
+        olddata = skb->data;
+        spin_unlock_irqrestore(&rif_lock, flags);
+        skb_pull(skb, slack);
+        memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack);
+}
+/*
+ *      We have learned some new RIF information for our source
+ *      routing.
+ */
+static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
+{
+        unsigned int hash, rii_p = 0;
+        unsigned long flags;
+        struct rif_cache *entry;
+        unsigned char saddr0;
+        spin_lock_irqsave(&rif_lock, flags);
+        saddr0 = trh->saddr[0];
+        /*
+         *      Firstly see if the entry exists
+         */
+        if(trh->saddr[0] & TR_RII)
+        {
+                trh->saddr[0]&=0x7f;
+                if (((ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8) > 2)
+                {
+                        rii_p = 1;
+                }
+        }
+        hash = rif_hash(trh->saddr);
+        for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);entry=entry->next);
+        if(entry==NULL)
+        {
+#if TR_SR_DEBUG
+                printk("adding rif_entry: addr:%pM rcf:%04X\n",
+                       trh->saddr, ntohs(trh->rcf));
+#endif
+                /*
+                 *      Allocate our new entry. A failure to allocate loses
+                 *      use the information. This is harmless.
+                 *
+                 *      FIXME: We ought to keep some kind of cache size
+                 *      limiting and adjust the timers to suit.
+                 */
+                entry=kmalloc(sizeof(struct rif_cache),GFP_ATOMIC);
+                if(!entry)
+                {
+                        printk(KERN_DEBUG "tr.c: Couldn't malloc rif cache entry !\n");
+                        spin_unlock_irqrestore(&rif_lock, flags);
+                        return;
+                }
+                memcpy(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);
+                entry->iface = dev->ifindex;
+                entry->next=rif_table[hash];
+                entry->last_used=jiffies;
+                rif_table[hash]=entry;
+                if (rii_p)
+                {
+                        entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
+                        memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
+                        entry->local_ring = 0;
+                }
+                else
+                {
+                        entry->local_ring = 1;
+                }
+        }
+        else    /* Y. Tahara added */
+        {
+                /*
+                 *      Update existing entries
+                 */
+                if (!entry->local_ring)
+                    if (entry->rcf != (trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK)) &&
+                         !(trh->rcf & htons(TR_RCF_BROADCAST_MASK)))
+                    {
+#if TR_SR_DEBUG
+printk("updating rif_entry: addr:%pM rcf:%04X\n",
+                trh->saddr, ntohs(trh->rcf));
+#endif
+                            entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
+                            memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
+                    }
+                entry->last_used=jiffies;
+        }
+        trh->saddr[0]=saddr0; /* put the routing indicator back for tcpdump */
+        spin_unlock_irqrestore(&rif_lock, flags);
+}
+/*
+ *      Scan the cache with a timer and see what we need to throw out.
+ */
+static void rif_check_expire(unsigned long dummy)
+{
+        int i;
+        unsigned long flags, next_interval = jiffies + sysctl_tr_rif_timeout/2;
+        spin_lock_irqsave(&rif_lock, flags);
+        for(i =0; i < RIF_TABLE_SIZE; i++) {
+                struct rif_cache *entry, **pentry;
+                pentry = rif_table+i;
+                while((entry=*pentry) != NULL) {
+                        unsigned long expires
+                                = entry->last_used + sysctl_tr_rif_timeout;
+                        if (time_before_eq(expires, jiffies)) {
+                                *pentry = entry->next;
+                                kfree(entry);
+                        } else {
+                                pentry = &entry->next;
+                                if (time_before(expires, next_interval))
+                                        next_interval = expires;
+                        }
+                }
+        }
+        spin_unlock_irqrestore(&rif_lock, flags);
+        mod_timer(&rif_timer, next_interval);
+}
+/*
+ *      Generate the /proc/net information for the token ring RIF
+ *      routing.
+ */
+#ifdef CONFIG_PROC_FS
+static struct rif_cache *rif_get_idx(loff_t pos)
+{
+        int i;
+        struct rif_cache *entry;
+        loff_t off = 0;
+        for(i = 0; i < RIF_TABLE_SIZE; i++)
+                for(entry = rif_table[i]; entry; entry = entry->next) {
+                        if (off == pos)
+                                return entry;
+                        ++off;
+                }
+        return NULL;
+}
+static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
+        __acquires(&rif_lock)
+{
+        spin_lock_irq(&rif_lock);
+        return *pos ? rif_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+        int i;
+        struct rif_cache *ent = v;
+        ++*pos;
+        if (v == SEQ_START_TOKEN) {
+                i = -1;
+                goto scan;
+        }
+        if (ent->next)
+                return ent->next;
+        i = rif_hash(ent->addr);
+ scan:
+        while (++i < RIF_TABLE_SIZE) {
+                if ((ent = rif_table[i]) != NULL)
+                        return ent;
+        }
+        return NULL;
+}
+static void rif_seq_stop(struct seq_file *seq, void *v)
+        __releases(&rif_lock)
+{
+        spin_unlock_irq(&rif_lock);
+}
+static int rif_seq_show(struct seq_file *seq, void *v)
+{
+        int j, rcf_len, segment, brdgnmb;
+        struct rif_cache *entry = v;
+        if (v == SEQ_START_TOKEN)
+                seq_puts(seq,
+                     "if     TR address       TTL   rcf   routing segments\n");
+        else {
+                struct net_device *dev = dev_get_by_index(&init_net, entry->iface);
+                long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout)
+                                - (long) jiffies;
+                seq_printf(seq, "%s %pM %7li ",
+                           dev?dev->name:"?",
+                           entry->addr,
+                           ttl/HZ);
+                        if (entry->local_ring)
+                                seq_puts(seq, "local\n");
+                        else {
+                                seq_printf(seq, "%04X", ntohs(entry->rcf));
+                                rcf_len = ((ntohs(entry->rcf) & TR_RCF_LEN_MASK)>>8)-2;
+                                if (rcf_len)
+                                        rcf_len >>= 1;
+                                for(j = 1; j < rcf_len; j++) {
+                                        if(j==1) {
+                                                segment=ntohs(entry->rseg[j-1])>>4;
+                                                seq_printf(seq,"  %03X",segment);
+                                        }
+                                        segment=ntohs(entry->rseg[j])>>4;
+                                        brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
+                                        seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
+                                }
+                                seq_putc(seq, '\n');
+                        }
+                if (dev)
+                        dev_put(dev);
+                }
+        return 0;
+}
+static const struct seq_operations rif_seq_ops = {
+        .start = rif_seq_start,
+        .next  = rif_seq_next,
+        .stop  = rif_seq_stop,
+        .show  = rif_seq_show,
+};
+static int rif_seq_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &rif_seq_ops);
+}
+static const struct file_operations rif_seq_fops = {
+        .owner   = THIS_MODULE,
+        .open    = rif_seq_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release,
+};
+#endif
+static const struct header_ops tr_header_ops = {
+        .create = tr_header,
+        .rebuild= tr_rebuild_header,
+};
+static void tr_setup(struct net_device *dev)
+{
+        /*
+         *      Configure and register
+         */
+        dev->header_ops = &tr_header_ops;
+        dev->type               = ARPHRD_IEEE802_TR;
+        dev->hard_header_len    = TR_HLEN;
+        dev->mtu                = 2000;
+        dev->addr_len           = TR_ALEN;
+        dev->tx_queue_len       = 100;  /* Long queues on tr */
+        memset(dev->broadcast,0xFF, TR_ALEN);
+        /* New-style flags. */
+        dev->flags              = IFF_BROADCAST | IFF_MULTICAST ;
+}
+/**
+ * alloc_trdev - Register token ring device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *      for this token ring device
+ *
+ * Fill in the fields of the device structure with token ring-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+struct net_device *alloc_trdev(int sizeof_priv)
+{
+        return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
+}
+#ifdef CONFIG_SYSCTL
+static struct ctl_table tr_table[] = {
+        {
+                .procname       = "rif_timeout",
+                .data           = &sysctl_tr_rif_timeout,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec
+        },
+        { },
+};
+static __initdata struct ctl_path tr_path[] = {
+        { .procname = "net", },
+        { .procname = "token-ring", },
+        { }
+};
+#endif
+/*
+ *      Called during bootup.  We don't actually have to initialise
+ *      too much for this.
+ */
+static int __init rif_init(void)
+{
+        rif_timer.expires  = jiffies + sysctl_tr_rif_timeout;
+        setup_timer(&rif_timer, rif_check_expire, 0);
+        add_timer(&rif_timer);
+#ifdef CONFIG_SYSCTL
+        register_sysctl_paths(tr_path, tr_table);
+#endif
+        proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops);
+        return 0;
+}
+module_init(rif_init);
+EXPORT_SYMBOL(tr_type_trans);
+EXPORT_SYMBOL(alloc_trdev);
+MODULE_LICENSE("GPL");
diff --git a/net/activity_stats.c b/net/activity_stats.c
new file mode 100644
index 00000000000..8a3e9347006
--- /dev/null
+++ b/net/activity_stats.c
@@ -0,0 +1,115 @@
+/* net/activity_stats.c
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Author: Mike Chan (mike@android.com)
+ */
+#include <linux/proc_fs.h>
+#include <linux/suspend.h>
+#include <net/net_namespace.h>
+/*
+ * Track transmission rates in buckets (power of 2).
+ * 1,2,4,8...512 seconds.
+ *
+ * Buckets represent the count of network transmissions at least
+ * N seconds apart, where N is 1 << bucket index.
+ */
+#define BUCKET_MAX 10
+/* Track network activity frequency */
+static unsigned long activity_stats[BUCKET_MAX];
+static ktime_t last_transmit;
+static ktime_t suspend_time;
+static DEFINE_SPINLOCK(activity_lock);
+void activity_stats_update(void)
+{
+        int i;
+        unsigned long flags;
+        ktime_t now;
+        s64 delta;
+        spin_lock_irqsave(&activity_lock, flags);
+        now = ktime_get();
+        delta = ktime_to_ns(ktime_sub(now, last_transmit));
+        for (i = BUCKET_MAX - 1; i >= 0; i--) {
+                /*
+                 * Check if the time delta between network activity is within the
+                 * minimum bucket range.
+                 */
+                if (delta < (1000000000ULL << i))
+                        continue;
+                activity_stats[i]++;
+                last_transmit = now;
+                break;
+        }
+        spin_unlock_irqrestore(&activity_lock, flags);
+}
+static int activity_stats_read_proc(char *page, char **start, off_t off,
+                                        int count, int *eof, void *data)
+{
+        int i;
+        int len;
+        char *p = page;
+        /* Only print if offset is 0, or we have enough buffer space */
+        if (off || count < (30 * BUCKET_MAX + 22))
+                return -ENOMEM;
+        len = snprintf(p, count, "Min Bucket(sec) Count\n");
+        count -= len;
+        p += len;
+        for (i = 0; i < BUCKET_MAX; i++) {
+                len = snprintf(p, count, "%15d %lu\n", 1 << i, activity_stats[i]);
+                count -= len;
+                p += len;
+        }
+        *eof = 1;
+        return p - page;
+}
+static int activity_stats_notifier(struct notifier_block *nb,
+                                        unsigned long event, void *dummy)
+{
+        switch (event) {
+                case PM_SUSPEND_PREPARE:
+                        suspend_time = ktime_get_real();
+                        break;
+                case PM_POST_SUSPEND:
+                        suspend_time = ktime_sub(ktime_get_real(), suspend_time);
+                        last_transmit = ktime_sub(last_transmit, suspend_time);
+        }
+        return 0;
+}
+static struct notifier_block activity_stats_notifier_block = {
+        .notifier_call = activity_stats_notifier,
+};
+static int  __init activity_stats_init(void)
+{
+        create_proc_read_entry("activity", S_IRUGO,
+                        init_net.proc_net_stat, activity_stats_read_proc, NULL);
+        return register_pm_notifier(&activity_stats_notifier_block);
+}
+subsys_initcall(activity_stats_init);
diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c
new file mode 100644
index 00000000000..69467fe71ff
--- /dev/null
+++ b/net/batman-adv/aggregation.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+#include "main.h"
+#include "translation-table.h"
+#include "aggregation.h"
+#include "send.h"
+#include "routing.h"
+#include "hard-interface.h"
+/* return true if new_packet can be aggregated with forw_packet */
+static bool can_aggregate_with(const struct batman_packet *new_batman_packet,
+                               struct bat_priv *bat_priv,
+                               int packet_len,
+                               unsigned long send_time,
+                               bool directlink,
+                               const struct hard_iface *if_incoming,
+                               const struct forw_packet *forw_packet)
+{
+        struct batman_packet *batman_packet =
+                (struct batman_packet *)forw_packet->skb->data;
+        int aggregated_bytes = forw_packet->packet_len + packet_len;
+        struct hard_iface *primary_if = NULL;
+        bool res = false;
+        /**
+         * we can aggregate the current packet to this aggregated packet
+         * if:
+         *
+         * - the send time is within our MAX_AGGREGATION_MS time
+         * - the resulting packet wont be bigger than
+         *   MAX_AGGREGATION_BYTES
+         */
+        if (time_before(send_time, forw_packet->send_time) &&
+            time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS),
+                                        forw_packet->send_time) &&
+            (aggregated_bytes <= MAX_AGGREGATION_BYTES)) {
+                /**
+                 * check aggregation compatibility
+                 * -> direct link packets are broadcasted on
+                 *    their interface only
+                 * -> aggregate packet if the current packet is
+                 *    a "global" packet as well as the base
+                 *    packet
+                 */
+                primary_if = primary_if_get_selected(bat_priv);
+                if (!primary_if)
+                        goto out;
+                /* packets without direct link flag and high TTL
+                 * are flooded through the net  */
+                if ((!directlink) &&
+                    (!(batman_packet->flags & DIRECTLINK)) &&
+                    (batman_packet->ttl != 1) &&
+                    /* own packets originating non-primary
+                     * interfaces leave only that interface */
+                    ((!forw_packet->own) ||
+                     (forw_packet->if_incoming == primary_if))) {
+                        res = true;
+                        goto out;
+                }
+                /* if the incoming packet is sent via this one
+                 * interface only - we still can aggregate */
+                if ((directlink) &&
+                    (new_batman_packet->ttl == 1) &&
+                    (forw_packet->if_incoming == if_incoming) &&
+                    /* packets from direct neighbors or
+                     * own secondary interface packets
+                     * (= secondary interface packets in general) */
+                    (batman_packet->flags & DIRECTLINK ||
+                     (forw_packet->own &&
+                      forw_packet->if_incoming != primary_if))) {
+                        res = true;
+                        goto out;
+                }
+        }
+out:
+        if (primary_if)
+                hardif_free_ref(primary_if);
+        return res;
+}
+/* create a new aggregated packet and add this packet to it */
+static void new_aggregated_packet(const unsigned char *packet_buff,
+                                  int packet_len, unsigned long send_time,
+                                  bool direct_link,
+                                  struct hard_iface *if_incoming,
+                                  int own_packet)
+{
+        struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+        struct forw_packet *forw_packet_aggr;
+        unsigned char *skb_buff;
+        if (!atomic_inc_not_zero(&if_incoming->refcount))
+                return;
+        /* own packet should always be scheduled */
+        if (!own_packet) {
+                if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
+                        bat_dbg(DBG_BATMAN, bat_priv,
+                                "batman packet queue full\n");
+                        goto out;
+                }
+        }
+        forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
+        if (!forw_packet_aggr) {
+                if (!own_packet)
+                        atomic_inc(&bat_priv->batman_queue_left);
+                goto out;
+        }
+        if ((atomic_read(&bat_priv->aggregated_ogms)) &&
+            (packet_len < MAX_AGGREGATION_BYTES))
+                forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES +
+                                                      sizeof(struct ethhdr));
+        else
+                forw_packet_aggr->skb = dev_alloc_skb(packet_len +
+                                                      sizeof(struct ethhdr));
+        if (!forw_packet_aggr->skb) {
+                if (!own_packet)
+                        atomic_inc(&bat_priv->batman_queue_left);
+                kfree(forw_packet_aggr);
+                goto out;
+        }
+        skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr));
+        INIT_HLIST_NODE(&forw_packet_aggr->list);
+        skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
+        forw_packet_aggr->packet_len = packet_len;
+        memcpy(skb_buff, packet_buff, packet_len);
+        forw_packet_aggr->own = own_packet;
+        forw_packet_aggr->if_incoming = if_incoming;
+        forw_packet_aggr->num_packets = 0;
+        forw_packet_aggr->direct_link_flags = NO_FLAGS;
+        forw_packet_aggr->send_time = send_time;
+        /* save packet direct link flag status */
+        if (direct_link)
+                forw_packet_aggr->direct_link_flags |= 1;
+        /* add new packet to packet list */
+        spin_lock_bh(&bat_priv->forw_bat_list_lock);
+        hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
+        spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+        /* start timer for this packet */
+        INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
+                          send_outstanding_bat_packet);
+        queue_delayed_work(bat_event_workqueue,
+                           &forw_packet_aggr->delayed_work,
+                           send_time - jiffies);
+        return;
+out:
+        hardif_free_ref(if_incoming);
+}
+/* aggregate a new packet into the existing aggregation */
+static void aggregate(struct forw_packet *forw_packet_aggr,
+                      const unsigned char *packet_buff, int packet_len,
+                      bool direct_link)
+{
+        unsigned char *skb_buff;
+        skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
+        memcpy(skb_buff, packet_buff, packet_len);
+        forw_packet_aggr->packet_len += packet_len;
+        forw_packet_aggr->num_packets++;
+        /* save packet direct link flag status */
+        if (direct_link)
+                forw_packet_aggr->direct_link_flags |=
+                        (1 << forw_packet_aggr->num_packets);
+}
+void add_bat_packet_to_list(struct bat_priv *bat_priv,
+                            unsigned char *packet_buff, int packet_len,
+                            struct hard_iface *if_incoming, int own_packet,
+                            unsigned long send_time)
+{
+        /**
+         * _aggr -> pointer to the packet we want to aggregate with
+         * _pos -> pointer to the position in the queue
+         */
+        struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL;
+        struct hlist_node *tmp_node;
+        struct batman_packet *batman_packet =
+                (struct batman_packet *)packet_buff;
+        bool direct_link = batman_packet->flags & DIRECTLINK ? 1 : 0;
+        /* find position for the packet in the forward queue */
+        spin_lock_bh(&bat_priv->forw_bat_list_lock);
+        /* own packets are not to be aggregated */
+        if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
+                hlist_for_each_entry(forw_packet_pos, tmp_node,
+                                     &bat_priv->forw_bat_list, list) {
+                        if (can_aggregate_with(batman_packet,
+                                               bat_priv,
+                                               packet_len,
+                                               send_time,
+                                               direct_link,
+                                               if_incoming,
+                                               forw_packet_pos)) {
+                                forw_packet_aggr = forw_packet_pos;
+                                break;
+                        }
+                }
+        }
+        /* nothing to aggregate with - either aggregation disabled or no
+         * suitable aggregation packet found */
+        if (!forw_packet_aggr) {
+                /* the following section can run without the lock */
+                spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+                /**
+                 * if we could not aggregate this packet with one of the others
+                 * we hold it back for a while, so that it might be aggregated
+                 * later on
+                 */
+                if ((!own_packet) &&
+                    (atomic_read(&bat_priv->aggregated_ogms)))
+                        send_time += msecs_to_jiffies(MAX_AGGREGATION_MS);
+                new_aggregated_packet(packet_buff, packet_len,
+                                      send_time, direct_link,
+                                      if_incoming, own_packet);
+        } else {
+                aggregate(forw_packet_aggr,
+                          packet_buff, packet_len,
+                          direct_link);
+                spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+        }
+}
+/* unpack the aggregated packets and process them one by one */
+void receive_aggr_bat_packet(const struct ethhdr *ethhdr,
+                             unsigned char *packet_buff, int packet_len,
+                             struct hard_iface *if_incoming)
+{
+        struct batman_packet *batman_packet;
+        int buff_pos = 0;
+        unsigned char *tt_buff;
+        batman_packet = (struct batman_packet *)packet_buff;
+        do {
+                /* network to host order for our 32bit seqno and the
+                   orig_interval */
+                batman_packet->seqno = ntohl(batman_packet->seqno);
+                batman_packet->tt_crc = ntohs(batman_packet->tt_crc);
+                tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN;
+                receive_bat_packet(ethhdr, batman_packet, tt_buff, if_incoming);
+                buff_pos += BAT_PACKET_LEN +
+                        tt_len(batman_packet->tt_num_changes);
+                batman_packet = (struct batman_packet *)
+                        (packet_buff + buff_pos);
+        } while (aggregated_packet(buff_pos, packet_len,
+                                   batman_packet->tt_num_changes));
+}
diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h
new file mode 100644
index 00000000000..216337bb841
--- /dev/null
+++ b/net/batman-adv/aggregation.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+#ifndef _NET_BATMAN_ADV_AGGREGATION_H_
+#define _NET_BATMAN_ADV_AGGREGATION_H_
+#include "main.h"
+/* is there another aggregated packet here? */
+static inline int aggregated_packet(int buff_pos, int packet_len,
+                                    int tt_num_changes)
+{
+        int next_buff_pos = buff_pos + BAT_PACKET_LEN + (tt_num_changes *
+                                                sizeof(struct tt_change));
+        return (next_buff_pos <= packet_len) &&
+                (next_buff_pos <= MAX_AGGREGATION_BYTES);
+}
+void add_bat_packet_to_list(struct bat_priv *bat_priv,
+                            unsigned char *packet_buff, int packet_len,
+                            struct hard_iface *if_incoming, int own_packet,
+                            unsigned long send_time);
+void receive_aggr_bat_packet(const struct ethhdr *ethhdr,
+                             unsigned char *packet_buff, int packet_len,
+                             struct hard_iface *if_incoming);
+#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */
diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c
new file mode 100644
index 00000000000..d0af9bf69e4
--- /dev/null
+++ b/net/batman-adv/bat_debugfs.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+#include "main.h"
+#include <linux/debugfs.h>
+#include "bat_debugfs.h"
+#include "translation-table.h"
+#include "originator.h"
+#include "hard-interface.h"
+#include "gateway_common.h"
+#include "gateway_client.h"
+#include "soft-interface.h"
+#include "vis.h"
+#include "icmp_socket.h"
+static struct dentry *bat_debugfs;
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+#define LOG_BUFF_MASK (log_buff_len-1)
+#define LOG_BUFF(idx) (debug_log->log_buff[(idx) & LOG_BUFF_MASK])
+static int log_buff_len = LOG_BUF_LEN;
+static void emit_log_char(struct debug_log *debug_log, char c)
+{
+        LOG_BUFF(debug_log->log_end) = c;
+        debug_log->log_end++;
+        if (debug_log->log_end - debug_log->log_start > log_buff_len)
+                debug_log->log_start = debug_log->log_end - log_buff_len;
+}
+__printf(2, 3)
+static int fdebug_log(struct debug_log *debug_log, const char *fmt, ...)
+{
+        va_list args;
+        static char debug_log_buf[256];
+        char *p;
+        if (!debug_log)
+                return 0;
+        spin_lock_bh(&debug_log->lock);
+        va_start(args, fmt);
+        vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
+        va_end(args);
+        for (p = debug_log_buf; *p != 0; p++)
+                emit_log_char(debug_log, *p);
+        spin_unlock_bh(&debug_log->lock);
+        wake_up(&debug_log->queue_wait);
+        return 0;
+}
+int debug_log(struct bat_priv *bat_priv, const char *fmt, ...)
+{
+        va_list args;
+        char tmp_log_buf[256];
+        va_start(args, fmt);
+        vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
+        fdebug_log(bat_priv->debug_log, "[%10lu] %s",
+                   (jiffies / HZ), tmp_log_buf);
+        va_end(args);
+        return 0;
+}
+static int log_open(struct inode *inode, struct file *file)
+{
+        nonseekable_open(inode, file);
+        file->private_data = inode->i_private;
+        inc_module_count();
+        return 0;
+}
+static int log_release(struct inode *inode, struct file *file)
+{
+        dec_module_count();
+        return 0;
+}
+static ssize_t log_read(struct file *file, char __user *buf,
+                        size_t count, loff_t *ppos)
+{
+        struct bat_priv *bat_priv = file->private_data;
+        struct debug_log *debug_log = bat_priv->debug_log;
+        int error, i = 0;
+        char c;
+        if ((file->f_flags & O_NONBLOCK) &&
+            !(debug_log->log_end - debug_log->log_start))
+                return -EAGAIN;
+        if (!buf)
+                return -EINVAL;
+        if (count == 0)
+                return 0;
+        if (!access_ok(VERIFY_WRITE, buf, count))
+                return -EFAULT;
+        error = wait_event_interruptible(debug_log->queue_wait,
+                                (debug_log->log_start - debug_log->log_end));
+        if (error)
+                return error;
+        spin_lock_bh(&debug_log->lock);
+        while ((!error) && (i < count) &&
+               (debug_log->log_start != debug_log->log_end)) {
+                c = LOG_BUFF(debug_log->log_start);
+                debug_log->log_start++;
+                spin_unlock_bh(&debug_log->lock);
+                error = __put_user(c, buf);
+                spin_lock_bh(&debug_log->lock);
+                buf++;
+                i++;
+        }
+        spin_unlock_bh(&debug_log->lock);
+        if (!error)
+                return i;
+        return error;
+}
+static unsigned int log_poll(struct file *file, poll_table *wait)
+{
+        struct bat_priv *bat_priv = file->private_data;
+        struct debug_log *debug_log = bat_priv->debug_log;
+        poll_wait(file, &debug_log->queue_wait, wait);
+        if (debug_log->log_end - debug_log->log_start)
+                return POLLIN | POLLRDNORM;
+        return 0;
+}
+static const struct file_operations log_fops = {
+        .open           = log_open,
+        .release        = log_release,
+        .read           = log_read,
+        .poll           = log_poll,
+        .llseek         = no_llseek,
+};
+static int debug_log_setup(struct bat_priv *bat_priv)
+{
+        struct dentry *d;
+        if (!bat_priv->debug_dir)
+                goto err;
+        bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
+        if (!bat_priv->debug_log)
+                goto err;
+        spin_lock_init(&bat_priv->debug_log->lock);
+        init_waitqueue_head(&bat_priv->debug_log->queue_wait);
+        d = debugfs_create_file("log", S_IFREG | S_IRUSR,
+                                bat_priv->debug_dir, bat_priv, &log_fops);
+        if (d)
+                goto err;
+        return 0;
+err:
+        return 1;
+}
+static void debug_log_cleanup(struct bat_priv *bat_priv)
+{
+        kfree(bat_priv->debug_log);
+        bat_priv->debug_log = NULL;
+}
+#else /* CONFIG_BATMAN_ADV_DEBUG */
+static int debug_log_setup(struct bat_priv *bat_priv)
+{
+        bat_priv->debug_log = NULL;
+        return 0;
+}
+static void debug_log_cleanup(struct bat_priv *bat_priv)
+{
+        return;
+}
+#endif
+static int originators_open(struct inode *inode, struct file *file)
+{
+        struct net_device *net_dev = (struct net_device *)inode->i_private;
+        return single_open(file, orig_seq_print_text, net_dev);
+}
+static int gateways_open(struct inode *inode, struct file *file)
+{
+        struct net_device *net_dev = (struct net_device *)inode->i_private;
+        return single_open(file, gw_client_seq_print_text, net_dev);
+}
+static int softif_neigh_open(struct inode *inode, struct file *file)
+{
+        struct net_device *net_dev = (struct net_device *)inode->i_private;
+        return single_open(file, softif_neigh_seq_print_text, net_dev);
+}
+static int transtable_global_open(struct inode *inode, struct file *file)
+{
+        struct net_device *net_dev = (struct net_device *)inode->i_private;
+        return single_open(file, tt_global_seq_print_text, net_dev);
+}
+static int transtable_local_open(struct inode *inode, struct file *file)
+{
+        struct net_device *net_dev = (struct net_device *)inode->i_private;
+        return single_open(file, tt_local_seq_print_text, net_dev);
+}
+static int vis_data_open(struct inode *inode, struct file *file)
+{
+        struct net_device *net_dev = (struct net_device *)inode->i_private;
+        return single_open(file, vis_seq_print_text, net_dev);
+}
+struct bat_debuginfo {
+        struct attribute attr;
+        const struct file_operations fops;
+};
+#define BAT_DEBUGINFO(_name, _mode, _open)      \
+struct bat_debuginfo bat_debuginfo_##_name = {  \
+        .attr = { .name = __stringify(_name),   \
+                  .mode = _mode, },             \
+        .fops = { .owner = THIS_MODULE,         \
+                  .open = _open,                \
+                  .read = seq_read,             \
+                  .llseek = seq_lseek,          \
+                  .release = single_release,    \
+                }                               \
+};
+static BAT_DEBUGINFO(originators, S_IRUGO, originators_open);
+static BAT_DEBUGINFO(gateways, S_IRUGO, gateways_open);
+static BAT_DEBUGINFO(softif_neigh, S_IRUGO, softif_neigh_open);
+static BAT_DEBUGINFO(transtable_global, S_IRUGO, transtable_global_open);
+static BAT_DEBUGINFO(transtable_local, S_IRUGO, transtable_local_open);
+static BAT_DEBUGINFO(vis_data, S_IRUGO, vis_data_open);
+static struct bat_debuginfo *mesh_debuginfos[] = {
+        &bat_debuginfo_originators,
+        &bat_debuginfo_gateways,
+        &bat_debuginfo_softif_neigh,
+        &bat_debuginfo_transtable_global,
+        &bat_debuginfo_transtable_local,
+        &bat_debuginfo_vis_data,
+        NULL,
+};
+void debugfs_init(void)
+{
+        bat_debugfs = debugfs_create_dir(DEBUGFS_BAT_SUBDIR, NULL);
+        if (bat_debugfs == ERR_PTR(-ENODEV))
+                bat_debugfs = NULL;
+}
+void debugfs_destroy(void)
+{
+        if (bat_debugfs) {
+                debugfs_remove_recursive(bat_debugfs);
+                bat_debugfs = NULL;
+        }
+}
+int debugfs_add_meshif(struct net_device *dev)
+{
+        struct bat_priv *bat_priv = netdev_priv(dev);
+        struct bat_debuginfo **bat_debug;
+        struct dentry *file;
+        if (!bat_debugfs)
+                goto out;
+        bat_priv->debug_dir = debugfs_create_dir(dev->name, bat_debugfs);
+        if (!bat_priv->debug_dir)
+                goto out;
+        bat_socket_setup(bat_priv);
+        debug_log_setup(bat_priv);
+        for (bat_debug = mesh_debuginfos; *bat_debug; ++bat_debug) {
+                file = debugfs_create_file(((*bat_debug)->attr).name,
+                                          S_IFREG | ((*bat_debug)->attr).mode,
+                                          bat_priv->debug_dir,
+                                          dev, &(*bat_debug)->fops);
+                if (!file) {
+                        bat_err(dev, "Can't add debugfs file: %s/%s\n",
+                                dev->name, ((*bat_debug)->attr).name);
+                        goto rem_attr;
+                }
+        }
+        return 0;
+rem_attr:
+        debugfs_remove_recursive(bat_priv->debug_dir);
+        bat_priv->debug_dir = NULL;
+out:
+#ifdef CONFIG_DEBUG_FS
+        return -ENOMEM;
+#else
+        return 0;
+#endif /* CONFIG_DEBUG_FS */
+}
+void debugfs_del_meshif(struct net_device *dev)
+{
+        struct bat_priv *bat_priv = netdev_priv(dev);
+        debug_log_cleanup(bat_priv);
+        if (bat_debugfs) {
+                debugfs_remove_recursive(bat_priv->debug_dir);
+                bat_priv->debug_dir = NULL;
+        }
+}
diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h
new file mode 100644
index 00000000000..bc9cda3f01e
--- /dev/null
+++ b/net/batman-adv/bat_debugfs.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+#ifndef _NET_BATMAN_ADV_DEBUGFS_H_
+#define _NET_BATMAN_ADV_DEBUGFS_H_
+#define DEBUGFS_BAT_SUBDIR "batman_adv"
+void debugfs_init(void);
+void debugfs_destroy(void);
+int debugfs_add_meshif(struct net_device *dev);
+void debugfs_del_meshif(struct net_device *dev);
+#endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
new file mode 100644
index 00000000000..cd15deba60a
--- /dev/null
+++ b/net/batman-adv/bat_sysfs.c
@@ -0,0 +1,674 @@
+/*
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+#include "main.h"
+#include "bat_sysfs.h"
+#include "translation-table.h"
+#include "originator.h"
+#include "hard-interface.h"
+#include "gateway_common.h"
+#include "gateway_client.h"
+#include "vis.h"
+static struct net_device *kobj_to_netdev(struct kobject *obj)
+{
+        struct device *dev = container_of(obj->parent, struct device, kobj);
+        return to_net_dev(dev);
+}
+static struct bat_priv *kobj_to_batpriv(struct kobject *obj)
+{
+        struct net_device *net_dev = kobj_to_netdev(obj);
+        return netdev_priv(net_dev);
+}
+#define UEV_TYPE_VAR    "BATTYPE="
+#define UEV_ACTION_VAR  "BATACTION="
+#define UEV_DATA_VAR    "BATDATA="
+static char *uev_action_str[] = {
+        "add",
+        "del",
+        "change"
+};
+static char *uev_type_str[] = {
+        "gw"
+};
+/* Use this, if you have customized show and store functions */
+#define BAT_ATTR(_name, _mode, _show, _store)   \
+struct bat_attribute bat_attr_##_name = {       \
+        .attr = {.name = __stringify(_name),    \
+                 .mode = _mode },               \
+        .show   = _show,                        \
+        .store  = _store,                       \
+};
+#define BAT_ATTR_STORE_BOOL(_name, _post_func)                          \
+ssize_t store_##_name(struct kobject *kobj, struct attribute *attr,     \
+                      char *buff, size_t count)                         \
+{                                                                       \
+        struct net_device *net_dev = kobj_to_netdev(kobj);              \
+        struct bat_priv *bat_priv = netdev_priv(net_dev);               \
+        return __store_bool_attr(buff, count, _post_func, attr,         \
+                                 &bat_priv->_name, net_dev);            \
+}
+#define BAT_ATTR_SHOW_BOOL(_name)                                       \
+ssize_t show_##_name(struct kobject *kobj, struct attribute *attr,      \
+                            char *buff)                                 \
+{                                                                       \
+        struct bat_priv *bat_priv = kobj_to_batpriv(kobj);              \
+        return sprintf(buff, "%s\n",                                    \
+                       atomic_read(&bat_priv->_name) == 0 ?             \
+                       "disabled" : "enabled");                         \
+}                                                                       \
+/* Use this, if you are going to turn a [name] in bat_priv on or off */
+#define BAT_ATTR_BOOL(_name, _mode, _post_func)                         \
+        static BAT_ATTR_STORE_BOOL(_name, _post_func)                   \
+        static BAT_ATTR_SHOW_BOOL(_name)                                \
+        static BAT_ATTR(_name, _mode, show_##_name, store_##_name)
+#define BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func)              \
+ssize_t store_##_name(struct kobject *kobj, struct attribute *attr,     \
+                             char *buff, size_t count)                  \
+{                                                                       \
+        struct net_device *net_dev = kobj_to_netdev(kobj);              \
+        struct bat_priv *bat_priv = netdev_priv(net_dev);               \
+        return __store_uint_attr(buff, count, _min, _max, _post_func,   \
+                                 attr, &bat_priv->_name, net_dev);      \
+}
+#define BAT_ATTR_SHOW_UINT(_name)                                       \
+ssize_t show_##_name(struct kobject *kobj, struct attribute *attr,      \
+                            char *buff)                                 \
+{                                                                       \
+        struct bat_priv *bat_priv = kobj_to_batpriv(kobj);              \
+        return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name));    \
+}                                                                       \
+/* Use this, if you are going to set [name] in bat_priv to unsigned integer
+ * values only */
+#define BAT_ATTR_UINT(_name, _mode, _min, _max, _post_func)             \
+        static BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func)       \
+        static BAT_ATTR_SHOW_UINT(_name)                                \
+        static BAT_ATTR(_name, _mode, show_##_name, store_##_name)
+static int store_bool_attr(char *buff, size_t count,
+                           struct net_device *net_dev,
+                           const char *attr_name, atomic_t *attr)
+{
+        int enabled = -1;
+        if (buff[count - 1] == '\n')
+                buff[count - 1] = '\0';
+        if ((strncmp(buff, "1", 2) == 0) ||
+            (strncmp(buff, "enable", 7) == 0) ||
+            (strncmp(buff, "enabled", 8) == 0))
+                enabled = 1;
+        if ((strncmp(buff, "0", 2) == 0) ||
+            (strncmp(buff, "disable", 8) == 0) ||
+            (strncmp(buff, "disabled", 9) == 0))
+                enabled = 0;
+        if (enabled < 0) {
+                bat_info(net_dev,
+                         "%s: Invalid parameter received: %s\n",
+                         attr_name, buff);
+                return -EINVAL;
+        }
+        if (atomic_read(attr) == enabled)
+                return count;
+        bat_info(net_dev, "%s: Changing from: %s to: %s\n", attr_name,
+                 atomic_read(attr) == 1 ? "enabled" : "disabled",
+                 enabled == 1 ? "enabled" : "disabled");
+        atomic_set(attr, (unsigned)enabled);
+        return count;
+}
+static inline ssize_t __store_bool_attr(char *buff, size_t count,
+                        void (*post_func)(struct net_device *),
+                        struct attribute *attr,
+                        atomic_t *attr_store, struct net_device *net_dev)
+{
+        int ret;
+        ret = store_bool_attr(buff, count, net_dev, attr->name, attr_store);
+        if (post_func && ret)
+                post_func(net_dev);
+        return ret;
+}
+static int store_uint_attr(const char *buff, size_t count,
+                           struct net_device *net_dev, const char *attr_name,
+                           unsigned int min, unsigned int max, atomic_t *attr)
+{
+        unsigned long uint_val;
+        int ret;
+        ret = strict_strtoul(buff, 10, &uint_val);
+        if (ret) {
+                bat_info(net_dev,
+                         "%s: Invalid parameter received: %s\n",
+                         attr_name, buff);
+                return -EINVAL;
+        }
+        if (uint_val < min) {
+                bat_info(net_dev, "%s: Value is too small: %lu min: %u\n",
+                         attr_name, uint_val, min);
+                return -EINVAL;
+        }
+        if (uint_val > max) {
+                bat_info(net_dev, "%s: Value is too big: %lu max: %u\n",
+                         attr_name, uint_val, max);
+                return -EINVAL;
+        }
+        if (atomic_read(attr) == uint_val)
+                return count;
+        bat_info(net_dev, "%s: Changing from: %i to: %lu\n",
+                 attr_name, atomic_read(attr), uint_val);
+        atomic_set(attr, uint_val);
+        return count;
+}
+static inline ssize_t __store_uint_attr(const char *buff, size_t count,
+                        int min, int max,
+                        void (*post_func)(struct net_device *),
+                        const struct attribute *attr,
+                        atomic_t *attr_store, struct net_device *net_dev)
+{
+        int ret;
+        ret = store_uint_attr(buff, count, net_dev, attr->name,
+                              min, max, attr_store);
+        if (post_func && ret)
+                post_func(net_dev);
+        return ret;
+}
+static ssize_t show_vis_mode(struct kobject *kobj, struct attribute *attr,
+                             char *buff)
+{
+        struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
+        int vis_mode = atomic_read(&bat_priv->vis_mode);
+        return sprintf(buff, "%s\n",
+                       vis_mode == VIS_TYPE_CLIENT_UPDATE ?
+                                                        "client" : "server");
+}
+static ssize_t store_vis_mode(struct kobject *kobj, struct attribute *attr,
+                              char *buff, size_t count)
+{
+        struct net_device *net_dev = kobj_to_netdev(kobj);
+        struct bat_priv *bat_priv = netdev_priv(net_dev);
+        unsigned long val;
+        int ret, vis_mode_tmp = -1;
+        ret = strict_strtoul(buff, 10, &val);
+        if (((count == 2) && (!ret) && (val == VIS_TYPE_CLIENT_UPDATE)) ||
+            (strncmp(buff, "client", 6) == 0) ||
+            (strncmp(buff, "off", 3) == 0))
+                vis_mode_tmp = VIS_TYPE_CLIENT_UPDATE;
+        if (((count == 2) && (!ret) && (val == VIS_TYPE_SERVER_SYNC)) ||
+            (strncmp(buff, "server", 6) == 0))
+                vis_mode_tmp = VIS_TYPE_SERVER_SYNC;
+        if (vis_mode_tmp < 0) {
+                if (buff[count - 1] == '\n')
+                        buff[count - 1] = '\0';
+                bat_info(net_dev,
+                         "Invalid parameter for 'vis mode' setting received: "
+                         "%s\n", buff);
+                return -EINVAL;
+        }
+        if (atomic_read(&bat_priv->vis_mode) == vis_mode_tmp)
+                return count;
+        bat_info(net_dev, "Changing vis mode from: %s to: %s\n",
+                 atomic_read(&bat_priv->vis_mode) == VIS_TYPE_CLIENT_UPDATE ?
+                 "client" : "server", vis_mode_tmp == VIS_TYPE_CLIENT_UPDATE ?
+                 "client" : "server");
+        atomic_set(&bat_priv->vis_mode, (unsigned)vis_mode_tmp);
+        return count;
+}
+static void post_gw_deselect(struct net_device *net_dev)
+{
+        struct bat_priv *bat_priv = netdev_priv(net_dev);
+        gw_deselect(bat_priv);
+}
+static ssize_t show_gw_mode(struct kobject *kobj, struct attribute *attr,
+                            char *buff)
+{
+        struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
+        int bytes_written;
+        switch (atomic_read(&bat_priv->gw_mode)) {
+        case GW_MODE_CLIENT:
+                bytes_written = sprintf(buff, "%s\n", GW_MODE_CLIENT_NAME);
+                break;
+        case GW_MODE_SERVER:
+                bytes_written = sprintf(buff, "%s\n", GW_MODE_SERVER_NAME);
+                break;
+        default:
+                bytes_written = sprintf(buff, "%s\n", GW_MODE_OFF_NAME);
+                break;
+        }
+        return bytes_written;
+}
+static ssize_t store_gw_mode(struct kobject *kobj, struct attribute *attr,
+                             char *buff, size_t count)
+{
+        struct net_device *net_dev = kobj_to_netdev(kobj);
+        struct bat_priv *bat_priv = netdev_priv(net_dev);
+        char *curr_gw_mode_str;
+        int gw_mode_tmp = -1;
+        if (buff[count - 1] == '\n')
+                buff[count - 1] = '\0';
+        if (strncmp(buff, GW_MODE_OFF_NAME, strlen(GW_MODE_OFF_NAME)) == 0)
+                gw_mode_tmp = GW_MODE_OFF;
+        if (strncmp(buff, GW_MODE_CLIENT_NAME,
+                   strlen(GW_MODE_CLIENT_NAME)) == 0)
+                gw_mode_tmp = GW_MODE_CLIENT;
+        if (strncmp(buff, GW_MODE_SERVER_NAME,
+                   strlen(GW_MODE_SERVER_NAME)) == 0)
+                gw_mode_tmp = GW_MODE_SERVER;
+        if (gw_mode_tmp < 0) {
+                bat_info(net_dev,
+                         "Invalid parameter for 'gw mode' setting received: "
+                         "%s\n", buff);
+                return -EINVAL;
+        }
+        if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp)
+                return count;
+        switch (atomic_read(&bat_priv->gw_mode)) {
+        case GW_MODE_CLIENT:
+                curr_gw_mode_str = GW_MODE_CLIENT_NAME;
+                break;
+        case GW_MODE_SERVER:
+                curr_gw_mode_str = GW_MODE_SERVER_NAME;
+                break;
+        default:
+                curr_gw_mode_str = GW_MODE_OFF_NAME;
+                break;
+        }
+        bat_info(net_dev, "Changing gw mode from: %s to: %s\n",
+                 curr_gw_mode_str, buff);
+        gw_deselect(bat_priv);
+        atomic_set(&bat_priv->gw_mode, (unsigned)gw_mode_tmp);
+        return count;
+}
+static ssize_t show_gw_bwidth(struct kobject *kobj, struct attribute *attr,
+                              char *buff)
+{
+        struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
+        int down, up;
+        int gw_bandwidth = atomic_read(&bat_priv->gw_bandwidth);
+        gw_bandwidth_to_kbit(gw_bandwidth, &down, &up);
+        return sprintf(buff, "%i%s/%i%s\n",
+                       (down > 2048 ? down / 1024 : down),
+                       (down > 2048 ? "MBit" : "KBit"),
+                       (up > 2048 ? up / 1024 : up),
+                       (up > 2048 ? "MBit" : "KBit"));
+}
+static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr,
+                               char *buff, size_t count)
+{
+        struct net_device *net_dev = kobj_to_netdev(kobj);
+        if (buff[count - 1] == '\n')
+                buff[count - 1] = '\0';
+        return gw_bandwidth_set(net_dev, buff, count);
+}
+BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL);
+BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
+BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu);
+static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode);
+static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode);
+BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL);
+BAT_ATTR_UINT(hop_penalty, S_IRUGO | S_IWUSR, 0, TQ_MAX_VALUE, NULL);
+BAT_ATTR_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, TQ_MAX_VALUE,
+              post_gw_deselect);
+static BAT_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, show_gw_bwidth,
+                store_gw_bwidth);
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+BAT_ATTR_UINT(log_level, S_IRUGO | S_IWUSR, 0, 7, NULL);
+#endif
+static struct bat_attribute *mesh_attrs[] = {
+        &bat_attr_aggregated_ogms,
+        &bat_attr_bonding,
+        &bat_attr_fragmentation,
+        &bat_attr_vis_mode,
+        &bat_attr_gw_mode,
+        &bat_attr_orig_interval,
+        &bat_attr_hop_penalty,
+        &bat_attr_gw_sel_class,
+        &bat_attr_gw_bandwidth,
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+        &bat_attr_log_level,
+#endif
+        NULL,
+};
+int sysfs_add_meshif(struct net_device *dev)
+{
+        struct kobject *batif_kobject = &dev->dev.kobj;
+        struct bat_priv *bat_priv = netdev_priv(dev);
+        struct bat_attribute **bat_attr;
+        int err;
+        bat_priv->mesh_obj = kobject_create_and_add(SYSFS_IF_MESH_SUBDIR,
+                                                    batif_kobject);
+        if (!bat_priv->mesh_obj) {
+                bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name,
+                        SYSFS_IF_MESH_SUBDIR);
+                goto out;
+        }
+        for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr) {
+                err = sysfs_create_file(bat_priv->mesh_obj,
+                                        &((*bat_attr)->attr));
+                if (err) {
+                        bat_err(dev, "Can't add sysfs file: %s/%s/%s\n",
+                                dev->name, SYSFS_IF_MESH_SUBDIR,
+                                ((*bat_attr)->attr).name);
+                        goto rem_attr;
+                }
+        }
+        return 0;
+rem_attr:
+        for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr)
+                sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
+        kobject_put(bat_priv->mesh_obj);
+        bat_priv->mesh_obj = NULL;
+out:
+        return -ENOMEM;
+}
+void sysfs_del_meshif(struct net_device *dev)
+{
+        struct bat_priv *bat_priv = netdev_priv(dev);
+        struct bat_attribute **bat_attr;
+        for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr)
+                sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
+        kobject_put(bat_priv->mesh_obj);
+        bat_priv->mesh_obj = NULL;
+}
+static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr,
+                               char *buff)
+{
+        struct net_device *net_dev = kobj_to_netdev(kobj);
+        struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
+        ssize_t length;
+        if (!hard_iface)
+                return 0;
+        length = sprintf(buff, "%s\n", hard_iface->if_status == IF_NOT_IN_USE ?
+                         "none" : hard_iface->soft_iface->name);
+        hardif_free_ref(hard_iface);
+        return length;
+}
+static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
+                                char *buff, size_t count)
+{
+        struct net_device *net_dev = kobj_to_netdev(kobj);
+        struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
+        int status_tmp = -1;
+        int ret = count;
+        if (!hard_iface)
+                return count;
+        if (buff[count - 1] == '\n')
+                buff[count - 1] = '\0';
+        if (strlen(buff) >= IFNAMSIZ) {
+                pr_err("Invalid parameter for 'mesh_iface' setting received: "
+                       "interface name too long '%s'\n", buff);
+                hardif_free_ref(hard_iface);
+                return -EINVAL;
+        }
+        if (strncmp(buff, "none", 4) == 0)
+                status_tmp = IF_NOT_IN_USE;
+        else
+                status_tmp = IF_I_WANT_YOU;
+        if (hard_iface->if_status == status_tmp)
+                goto out;
+        if ((hard_iface->soft_iface) &&
+            (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
+                goto out;
+        if (!rtnl_trylock()) {
+                ret = -ERESTARTSYS;
+                goto out;
+        }
+        if (status_tmp == IF_NOT_IN_USE) {
+                hardif_disable_interface(hard_iface);
+                goto unlock;
+        }
+        /* if the interface already is in use */
+        if (hard_iface->if_status != IF_NOT_IN_USE)
+                hardif_disable_interface(hard_iface);
+        ret = hardif_enable_interface(hard_iface, buff);
+unlock:
+        rtnl_unlock();
+out:
+        hardif_free_ref(hard_iface);
+        return ret;
+}
+static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
+                                 char *buff)
+{
+        struct net_device *net_dev = kobj_to_netdev(kobj);
+        struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
+        ssize_t length;
+        if (!hard_iface)
+                return 0;
+        switch (hard_iface->if_status) {
+        case IF_TO_BE_REMOVED:
+                length = sprintf(buff, "disabling\n");
+                break;
+        case IF_INACTIVE:
+                length = sprintf(buff, "inactive\n");
+                break;
+        case IF_ACTIVE:
+                length = sprintf(buff, "active\n");
+                break;
+        case IF_TO_BE_ACTIVATED:
+                length = sprintf(buff, "enabling\n");
+                break;
+        case IF_NOT_IN_USE:
+        default:
+                length = sprintf(buff, "not in use\n");
+                break;
+        }
+        hardif_free_ref(hard_iface);
+        return length;
+}
+static BAT_ATTR(mesh_iface, S_IRUGO | S_IWUSR,
+                show_mesh_iface, store_mesh_iface);
+static BAT_ATTR(iface_status, S_IRUGO, show_iface_status, NULL);
+static struct bat_attribute *batman_attrs[] = {
+        &bat_attr_mesh_iface,
+        &bat_attr_iface_status,
+        NULL,
+};
+int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev)
+{
+        struct kobject *hardif_kobject = &dev->dev.kobj;
+        struct bat_attribute **bat_attr;
+        int err;
+        *hardif_obj = kobject_create_and_add(SYSFS_IF_BAT_SUBDIR,
+                                                    hardif_kobject);
+        if (!*hardif_obj) {
+                bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name,
+                        SYSFS_IF_BAT_SUBDIR);
+                goto out;
+        }
+        for (bat_attr = batman_attrs; *bat_attr; ++bat_attr) {
+                err = sysfs_create_file(*hardif_obj, &((*bat_attr)->attr));
+                if (err) {
+                        bat_err(dev, "Can't add sysfs file: %s/%s/%s\n",
+                                dev->name, SYSFS_IF_BAT_SUBDIR,
+                                ((*bat_attr)->attr).name);
+                        goto rem_attr;
+                }
+        }
+        return 0;
+rem_attr:
+        for (bat_attr = batman_attrs; *bat_attr; ++bat_attr)
+                sysfs_remove_file(*hardif_obj, &((*bat_attr)->attr));
+out:
+        return -ENOMEM;
+}
+void sysfs_del_hardif(struct kobject **hardif_obj)
+{
+        kobject_put(*hardif_obj);
+        *hardif_obj = NULL;
+}
+int throw_uevent(struct bat_priv *bat_priv, enum uev_type type,
+                 enum uev_action action, const char *data)
+{
+        int ret = -1;
+        struct hard_iface *primary_if = NULL;
+        struct kobject *bat_kobj;
+        char *uevent_env[4] = { NULL, NULL, NULL, NULL };
+        primary_if = primary_if_get_selected(bat_priv);
+        if (!primary_if)
+                goto out;
+        bat_kobj = &primary_if->soft_iface->dev.kobj;
+        uevent_env[0] = kmalloc(strlen(UEV_TYPE_VAR) +
+                                strlen(uev_type_str[type]) + 1,
+                                GFP_ATOMIC);
+        if (!uevent_env[0])
+                goto out;
+        sprintf(uevent_env[0], "%s%s", UEV_TYPE_VAR, uev_type_str[type]);
+        uevent_env[1] = kmalloc(strlen(UEV_ACTION_VAR) +
+                                strlen(uev_action_str[action]) + 1,
+                                GFP_ATOMIC);
+        if (!uevent_env[1])
+                goto out;
+        sprintf(uevent_env[1], "%s%s", UEV_ACTION_VAR, uev_action_str[action]);
+        /* If the event is DEL, ignore the data field */
+        if (action != UEV_DEL) {
+                uevent_env[2] = kmalloc(strlen(UEV_DATA_VAR) +
+                                        strlen(data) + 1, GFP_ATOMIC);
+                if (!uevent_env[2])
+                        goto out;
+                sprintf(uevent_env[2], "%s%s", UEV_DATA_VAR, data);
+        }
+        ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env);
+out:
+        kfree(uevent_env[0]);
+        kfree(uevent_env[1]);
+        kfree(uevent_env[2]);
+        if (primary_if)
+                hardif_free_ref(primary_if);
+        if (ret)
+                bat_dbg(DBG_BATMAN, bat_priv, "Impossible to send "
+                        "uevent for (%s,%s,%s) event (err: %d)\n",
+                        uev_type_str[type], uev_action_str[action],
+                        (action == UEV_DEL ? "NULL" : data), ret);
+        return ret;
+}
diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h
new file mode 100644
index 00000000000..a3f75a723c5
--- /dev/null
+++ b/net/batman-adv/bat_sysfs.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+#ifndef _NET_BATMAN_ADV_SYSFS_H_
+#define _NET_BATMAN_ADV_SYSFS_H_
+#define SYSFS_IF_MESH_SUBDIR "mesh"
+#define SYSFS_IF_BAT_SUBDIR "batman_adv"
+struct bat_attribute {
+        struct attribute attr;
+        ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
+                        char *buf);
+        ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
+                         char *buf, size_t count);
+};
+int sysfs_add_meshif(struct net_device *dev);
+void sysfs_del_meshif(struct net_device *dev);
+int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev);
+void sysfs_del_hardif(struct kobject **hardif_obj);
+int throw_uevent(struct bat_priv *bat_priv, enum uev_type type,
+                 enum uev_action action, const char *data);
+#endif /* _NET_BATMAN_ADV_SYSFS_H_ */
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
new file mode 100644
index 00000000000..283c2b993fb
--- /dev/null
+++ b/net/core/kmap_skb.h
@@ -0,0 +1,19 @@
+#include <linux/highmem.h>
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+        BUG_ON(in_irq());
+        local_bh_disable();
+#endif
+        return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+}
+static inline void kunmap_skb_frag(void *vaddr)
+{
+        kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+        local_bh_enable();
+#endif
+}
diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c
new file mode 100644
index 00000000000..8f4ff5a2c81
--- /dev/null
+++ b/net/dsa/mv88e6060.c
@@ -0,0 +1,288 @@
+/*
+ * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include "dsa_priv.h"
+#define REG_PORT(p)             (8 + (p))
+#define REG_GLOBAL              0x0f
+static int reg_read(struct dsa_switch *ds, int addr, int reg)
+{
+        return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg);
+}
+#define REG_READ(addr, reg)                                     \
+        ({                                                      \
+                int __ret;                                      \
+                                                                \
+                __ret = reg_read(ds, addr, reg);                \
+                if (__ret < 0)                                  \
+                        return __ret;                           \
+                __ret;                                          \
+        })
+static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
+{
+        return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr,
+                             reg, val);
+}
+#define REG_WRITE(addr, reg, val)                               \
+        ({                                                      \
+                int __ret;                                      \
+                                                                \
+                __ret = reg_write(ds, addr, reg, val);          \
+                if (__ret < 0)                                  \
+                        return __ret;                           \
+        })
+static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr)
+{
+        int ret;
+        ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
+        if (ret >= 0) {
+                ret &= 0xfff0;
+                if (ret == 0x0600)
+                        return "Marvell 88E6060";
+        }
+        return NULL;
+}
+static int mv88e6060_switch_reset(struct dsa_switch *ds)
+{
+        int i;
+        int ret;
+        /*
+         * Set all ports to the disabled state.
+         */
+        for (i = 0; i < 6; i++) {
+                ret = REG_READ(REG_PORT(i), 0x04);
+                REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+        }
+        /*
+         * Wait for transmit queues to drain.
+         */
+        msleep(2);
+        /*
+         * Reset the switch.
+         */
+        REG_WRITE(REG_GLOBAL, 0x0a, 0xa130);
+        /*
+         * Wait up to one second for reset to complete.
+         */
+        for (i = 0; i < 1000; i++) {
+                ret = REG_READ(REG_GLOBAL, 0x00);
+                if ((ret & 0x8000) == 0x0000)
+                        break;
+                msleep(1);
+        }
+        if (i == 1000)
+                return -ETIMEDOUT;
+        return 0;
+}
+static int mv88e6060_setup_global(struct dsa_switch *ds)
+{
+        /*
+         * Disable discarding of frames with excessive collisions,
+         * set the maximum frame size to 1536 bytes, and mask all
+         * interrupt sources.
+         */
+        REG_WRITE(REG_GLOBAL, 0x04, 0x0800);
+        /*
+         * Enable automatic address learning, set the address
+         * database size to 1024 entries, and set the default aging
+         * time to 5 minutes.
+         */
+        REG_WRITE(REG_GLOBAL, 0x0a, 0x2130);
+        return 0;
+}
+static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
+{
+        int addr = REG_PORT(p);
+        /*
+         * Do not force flow control, disable Ingress and Egress
+         * Header tagging, disable VLAN tunneling, and set the port
+         * state to Forwarding.  Additionally, if this is the CPU
+         * port, enable Ingress and Egress Trailer tagging mode.
+         */
+        REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ?  0x4103 : 0x0003);
+        /*
+         * Port based VLAN map: give each port its own address
+         * database, allow the CPU port to talk to each of the 'real'
+         * ports, and allow each of the 'real' ports to only talk to
+         * the CPU port.
+         */
+        REG_WRITE(addr, 0x06,
+                        ((p & 0xf) << 12) |
+                         (dsa_is_cpu_port(ds, p) ?
+                                ds->phys_port_mask :
+                                (1 << ds->dst->cpu_port)));
+        /*
+         * Port Association Vector: when learning source addresses
+         * of packets, add the address to the address database using
+         * a port bitmap that has only the bit for this port set and
+         * the other bits clear.
+         */
+        REG_WRITE(addr, 0x0b, 1 << p);
+        return 0;
+}
+static int mv88e6060_setup(struct dsa_switch *ds)
+{
+        int i;
+        int ret;
+        ret = mv88e6060_switch_reset(ds);
+        if (ret < 0)
+                return ret;
+        /* @@@ initialise atu */
+        ret = mv88e6060_setup_global(ds);
+        if (ret < 0)
+                return ret;
+        for (i = 0; i < 6; i++) {
+                ret = mv88e6060_setup_port(ds, i);
+                if (ret < 0)
+                        return ret;
+        }
+        return 0;
+}
+static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr)
+{
+        REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
+        REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
+        REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
+        return 0;
+}
+static int mv88e6060_port_to_phy_addr(int port)
+{
+        if (port >= 0 && port <= 5)
+                return port;
+        return -1;
+}
+static int mv88e6060_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+        int addr;
+        addr = mv88e6060_port_to_phy_addr(port);
+        if (addr == -1)
+                return 0xffff;
+        return reg_read(ds, addr, regnum);
+}
+static int
+mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
+{
+        int addr;
+        addr = mv88e6060_port_to_phy_addr(port);
+        if (addr == -1)
+                return 0xffff;
+        return reg_write(ds, addr, regnum, val);
+}
+static void mv88e6060_poll_link(struct dsa_switch *ds)
+{
+        int i;
+        for (i = 0; i < DSA_MAX_PORTS; i++) {
+                struct net_device *dev;
+                int uninitialized_var(port_status);
+                int link;
+                int speed;
+                int duplex;
+                int fc;
+                dev = ds->ports[i];
+                if (dev == NULL)
+                        continue;
+                link = 0;
+                if (dev->flags & IFF_UP) {
+                        port_status = reg_read(ds, REG_PORT(i), 0x00);
+                        if (port_status < 0)
+                                continue;
+                        link = !!(port_status & 0x1000);
+                }
+                if (!link) {
+                        if (netif_carrier_ok(dev)) {
+                                printk(KERN_INFO "%s: link down\n", dev->name);
+                                netif_carrier_off(dev);
+                        }
+                        continue;
+                }
+                speed = (port_status & 0x0100) ? 100 : 10;
+                duplex = (port_status & 0x0200) ? 1 : 0;
+                fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0;
+                if (!netif_carrier_ok(dev)) {
+                        printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
+                                         "flow control %sabled\n", dev->name,
+                                         speed, duplex ? "full" : "half",
+                                         fc ? "en" : "dis");
+                        netif_carrier_on(dev);
+                }
+        }
+}
+static struct dsa_switch_driver mv88e6060_switch_driver = {
+        .tag_protocol   = htons(ETH_P_TRAILER),
+        .probe          = mv88e6060_probe,
+        .setup          = mv88e6060_setup,
+        .set_addr       = mv88e6060_set_addr,
+        .phy_read       = mv88e6060_phy_read,
+        .phy_write      = mv88e6060_phy_write,
+        .poll_link      = mv88e6060_poll_link,
+};
+static int __init mv88e6060_init(void)
+{
+        register_switch_driver(&mv88e6060_switch_driver);
+        return 0;
+}
+module_init(mv88e6060_init);
+static void __exit mv88e6060_cleanup(void)
+{
+        unregister_switch_driver(&mv88e6060_switch_driver);
+}
+module_exit(mv88e6060_cleanup);
diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c
new file mode 100644
index 00000000000..52faaa21a4d
--- /dev/null
+++ b/net/dsa/mv88e6123_61_65.c
@@ -0,0 +1,447 @@
+/*
+ * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include "dsa_priv.h"
+#include "mv88e6xxx.h"
+static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr)
+{
+        int ret;
+        ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
+        if (ret >= 0) {
+                ret &= 0xfff0;
+                if (ret == 0x1210)
+                        return "Marvell 88E6123";
+                if (ret == 0x1610)
+                        return "Marvell 88E6161";
+                if (ret == 0x1650)
+                        return "Marvell 88E6165";
+        }
+        return NULL;
+}
+static int mv88e6123_61_65_switch_reset(struct dsa_switch *ds)
+{
+        int i;
+        int ret;
+        /*
+         * Set all ports to the disabled state.
+         */
+        for (i = 0; i < 8; i++) {
+                ret = REG_READ(REG_PORT(i), 0x04);
+                REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+        }
+        /*
+         * Wait for transmit queues to drain.
+         */
+        msleep(2);
+        /*
+         * Reset the switch.
+         */
+        REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
+        /*
+         * Wait up to one second for reset to complete.
+         */
+        for (i = 0; i < 1000; i++) {
+                ret = REG_READ(REG_GLOBAL, 0x00);
+                if ((ret & 0xc800) == 0xc800)
+                        break;
+                msleep(1);
+        }
+        if (i == 1000)
+                return -ETIMEDOUT;
+        return 0;
+}
+static int mv88e6123_61_65_setup_global(struct dsa_switch *ds)
+{
+        int ret;
+        int i;
+        /*
+         * Disable the PHY polling unit (since there won't be any
+         * external PHYs to poll), don't discard packets with
+         * excessive collisions, and mask all interrupt sources.
+         */
+        REG_WRITE(REG_GLOBAL, 0x04, 0x0000);
+        /*
+         * Set the default address aging time to 5 minutes, and
+         * enable address learn messages to be sent to all message
+         * ports.
+         */
+        REG_WRITE(REG_GLOBAL, 0x0a, 0x0148);
+        /*
+         * Configure the priority mapping registers.
+         */
+        ret = mv88e6xxx_config_prio(ds);
+        if (ret < 0)
+                return ret;
+        /*
+         * Configure the upstream port, and configure the upstream
+         * port as the port to which ingress and egress monitor frames
+         * are to be sent.
+         */
+        REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110));
+        /*
+         * Disable remote management for now, and set the switch's
+         * DSA device number.
+         */
+        REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f);
+        /*
+         * Send all frames with destination addresses matching
+         * 01:80:c2:00:00:2x to the CPU port.
+         */
+        REG_WRITE(REG_GLOBAL2, 0x02, 0xffff);
+        /*
+         * Send all frames with destination addresses matching
+         * 01:80:c2:00:00:0x to the CPU port.
+         */
+        REG_WRITE(REG_GLOBAL2, 0x03, 0xffff);
+        /*
+         * Disable the loopback filter, disable flow control
+         * messages, disable flood broadcast override, disable
+         * removing of provider tags, disable ATU age violation
+         * interrupts, disable tag flow control, force flow
+         * control priority to the highest, and send all special
+         * multicast frames to the CPU at the highest priority.
+         */
+        REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
+        /*
+         * Program the DSA routing table.
+         */
+        for (i = 0; i < 32; i++) {
+                int nexthop;
+                nexthop = 0x1f;
+                if (i != ds->index && i < ds->dst->pd->nr_chips)
+                        nexthop = ds->pd->rtable[i] & 0x1f;
+                REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop);
+        }
+        /*
+         * Clear all trunk masks.
+         */
+        for (i = 0; i < 8; i++)
+                REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff);
+        /*
+         * Clear all trunk mappings.
+         */
+        for (i = 0; i < 16; i++)
+                REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11));
+        /*
+         * Disable ingress rate limiting by resetting all ingress
+         * rate limit registers to their initial state.
+         */
+        for (i = 0; i < 6; i++)
+                REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8));
+        /*
+         * Initialise cross-chip port VLAN table to reset defaults.
+         */
+        REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000);
+        /*
+         * Clear the priority override table.
+         */
+        for (i = 0; i < 16; i++)
+                REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8));
+        /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */
+        return 0;
+}
+static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
+{
+        int addr = REG_PORT(p);
+        u16 val;
+        /*
+         * MAC Forcing register: don't force link, speed, duplex
+         * or flow control state to any particular values on physical
+         * ports, but force the CPU port and all DSA ports to 1000 Mb/s
+         * full duplex.
+         */
+        if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
+                REG_WRITE(addr, 0x01, 0x003e);
+        else
+                REG_WRITE(addr, 0x01, 0x0003);
+        /*
+         * Do not limit the period of time that this port can be
+         * paused for by the remote end or the period of time that
+         * this port can pause the remote end.
+         */
+        REG_WRITE(addr, 0x02, 0x0000);
+        /*
+         * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
+         * disable Header mode, enable IGMP/MLD snooping, disable VLAN
+         * tunneling, determine priority by looking at 802.1p and IP
+         * priority fields (IP prio has precedence), and set STP state
+         * to Forwarding.
+         *
+         * If this is the CPU link, use DSA or EDSA tagging depending
+         * on which tagging mode was configured.
+         *
+         * If this is a link to another switch, use DSA tagging mode.
+         *
+         * If this is the upstream port for this switch, enable
+         * forwarding of unknown unicasts and multicasts.
+         */
+        val = 0x0433;
+        if (dsa_is_cpu_port(ds, p)) {
+                if (ds->dst->tag_protocol == htons(ETH_P_EDSA))
+                        val |= 0x3300;
+                else
+                        val |= 0x0100;
+        }
+        if (ds->dsa_port_mask & (1 << p))
+                val |= 0x0100;
+        if (p == dsa_upstream_port(ds))
+                val |= 0x000c;
+        REG_WRITE(addr, 0x04, val);
+        /*
+         * Port Control 1: disable trunking.  Also, if this is the
+         * CPU port, enable learn messages to be sent to this port.
+         */
+        REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
+        /*
+         * Port based VLAN map: give each port its own address
+         * database, allow the CPU port to talk to each of the 'real'
+         * ports, and allow each of the 'real' ports to only talk to
+         * the upstream port.
+         */
+        val = (p & 0xf) << 12;
+        if (dsa_is_cpu_port(ds, p))
+                val |= ds->phys_port_mask;
+        else
+                val |= 1 << dsa_upstream_port(ds);
+        REG_WRITE(addr, 0x06, val);
+        /*
+         * Default VLAN ID and priority: don't set a default VLAN
+         * ID, and set the default packet priority to zero.
+         */
+        REG_WRITE(addr, 0x07, 0x0000);
+        /*
+         * Port Control 2: don't force a good FCS, set the maximum
+         * frame size to 10240 bytes, don't let the switch add or
+         * strip 802.1q tags, don't discard tagged or untagged frames
+         * on this port, do a destination address lookup on all
+         * received packets as usual, disable ARP mirroring and don't
+         * send a copy of all transmitted/received frames on this port
+         * to the CPU.
+         */
+        REG_WRITE(addr, 0x08, 0x2080);
+        /*
+         * Egress rate control: disable egress rate control.
+         */
+        REG_WRITE(addr, 0x09, 0x0001);
+        /*
+         * Egress rate control 2: disable egress rate control.
+         */
+        REG_WRITE(addr, 0x0a, 0x0000);
+        /*
+         * Port Association Vector: when learning source addresses
+         * of packets, add the address to the address database using
+         * a port bitmap that has only the bit for this port set and
+         * the other bits clear.
+         */
+        REG_WRITE(addr, 0x0b, 1 << p);
+        /*
+         * Port ATU control: disable limiting the number of address
+         * database entries that this port is allowed to use.
+         */
+        REG_WRITE(addr, 0x0c, 0x0000);
+        /*
+         * Priorit Override: disable DA, SA and VTU priority override.
+         */
+        REG_WRITE(addr, 0x0d, 0x0000);
+        /*
+         * Port Ethertype: use the Ethertype DSA Ethertype value.
+         */
+        REG_WRITE(addr, 0x0f, ETH_P_EDSA);
+        /*
+         * Tag Remap: use an identity 802.1p prio -> switch prio
+         * mapping.
+         */
+        REG_WRITE(addr, 0x18, 0x3210);
+        /*
+         * Tag Remap 2: use an identity 802.1p prio -> switch prio
+         * mapping.
+         */
+        REG_WRITE(addr, 0x19, 0x7654);
+        return 0;
+}
+static int mv88e6123_61_65_setup(struct dsa_switch *ds)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+        int i;
+        int ret;
+        mutex_init(&ps->smi_mutex);
+        mutex_init(&ps->stats_mutex);
+        ret = mv88e6123_61_65_switch_reset(ds);
+        if (ret < 0)
+                return ret;
+        /* @@@ initialise vtu and atu */
+        ret = mv88e6123_61_65_setup_global(ds);
+        if (ret < 0)
+                return ret;
+        for (i = 0; i < 6; i++) {
+                ret = mv88e6123_61_65_setup_port(ds, i);
+                if (ret < 0)
+                        return ret;
+        }
+        return 0;
+}
+static int mv88e6123_61_65_port_to_phy_addr(int port)
+{
+        if (port >= 0 && port <= 4)
+                return port;
+        return -1;
+}
+static int
+mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+        int addr = mv88e6123_61_65_port_to_phy_addr(port);
+        return mv88e6xxx_phy_read(ds, addr, regnum);
+}
+static int
+mv88e6123_61_65_phy_write(struct dsa_switch *ds,
+                              int port, int regnum, u16 val)
+{
+        int addr = mv88e6123_61_65_port_to_phy_addr(port);
+        return mv88e6xxx_phy_write(ds, addr, regnum, val);
+}
+static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = {
+        { "in_good_octets", 8, 0x00, },
+        { "in_bad_octets", 4, 0x02, },
+        { "in_unicast", 4, 0x04, },
+        { "in_broadcasts", 4, 0x06, },
+        { "in_multicasts", 4, 0x07, },
+        { "in_pause", 4, 0x16, },
+        { "in_undersize", 4, 0x18, },
+        { "in_fragments", 4, 0x19, },
+        { "in_oversize", 4, 0x1a, },
+        { "in_jabber", 4, 0x1b, },
+        { "in_rx_error", 4, 0x1c, },
+        { "in_fcs_error", 4, 0x1d, },
+        { "out_octets", 8, 0x0e, },
+        { "out_unicast", 4, 0x10, },
+        { "out_broadcasts", 4, 0x13, },
+        { "out_multicasts", 4, 0x12, },
+        { "out_pause", 4, 0x15, },
+        { "excessive", 4, 0x11, },
+        { "collisions", 4, 0x1e, },
+        { "deferred", 4, 0x05, },
+        { "single", 4, 0x14, },
+        { "multiple", 4, 0x17, },
+        { "out_fcs_error", 4, 0x03, },
+        { "late", 4, 0x1f, },
+        { "hist_64bytes", 4, 0x08, },
+        { "hist_65_127bytes", 4, 0x09, },
+        { "hist_128_255bytes", 4, 0x0a, },
+        { "hist_256_511bytes", 4, 0x0b, },
+        { "hist_512_1023bytes", 4, 0x0c, },
+        { "hist_1024_max_bytes", 4, 0x0d, },
+};
+static void
+mv88e6123_61_65_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+{
+        mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats),
+                              mv88e6123_61_65_hw_stats, port, data);
+}
+static void
+mv88e6123_61_65_get_ethtool_stats(struct dsa_switch *ds,
+                                  int port, uint64_t *data)
+{
+        mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats),
+                                    mv88e6123_61_65_hw_stats, port, data);
+}
+static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds)
+{
+        return ARRAY_SIZE(mv88e6123_61_65_hw_stats);
+}
+static struct dsa_switch_driver mv88e6123_61_65_switch_driver = {
+        .tag_protocol           = cpu_to_be16(ETH_P_EDSA),
+        .priv_size              = sizeof(struct mv88e6xxx_priv_state),
+        .probe                  = mv88e6123_61_65_probe,
+        .setup                  = mv88e6123_61_65_setup,
+        .set_addr               = mv88e6xxx_set_addr_indirect,
+        .phy_read               = mv88e6123_61_65_phy_read,
+        .phy_write              = mv88e6123_61_65_phy_write,
+        .poll_link              = mv88e6xxx_poll_link,
+        .get_strings            = mv88e6123_61_65_get_strings,
+        .get_ethtool_stats      = mv88e6123_61_65_get_ethtool_stats,
+        .get_sset_count         = mv88e6123_61_65_get_sset_count,
+};
+static int __init mv88e6123_61_65_init(void)
+{
+        register_switch_driver(&mv88e6123_61_65_switch_driver);
+        return 0;
+}
+module_init(mv88e6123_61_65_init);
+static void __exit mv88e6123_61_65_cleanup(void)
+{
+        unregister_switch_driver(&mv88e6123_61_65_switch_driver);
+}
+module_exit(mv88e6123_61_65_cleanup);
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
new file mode 100644
index 00000000000..9bd1061fa4e
--- /dev/null
+++ b/net/dsa/mv88e6131.c
@@ -0,0 +1,443 @@
+/*
+ * net/dsa/mv88e6131.c - Marvell 88e6095/6095f/6131 switch chip support
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include "dsa_priv.h"
+#include "mv88e6xxx.h"
+/*
+ * Switch product IDs
+ */
+#define ID_6085         0x04a0
+#define ID_6095         0x0950
+#define ID_6131         0x1060
+static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr)
+{
+        int ret;
+        ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
+        if (ret >= 0) {
+                ret &= 0xfff0;
+                if (ret == ID_6085)
+                        return "Marvell 88E6085";
+                if (ret == ID_6095)
+                        return "Marvell 88E6095/88E6095F";
+                if (ret == ID_6131)
+                        return "Marvell 88E6131";
+        }
+        return NULL;
+}
+static int mv88e6131_switch_reset(struct dsa_switch *ds)
+{
+        int i;
+        int ret;
+        /*
+         * Set all ports to the disabled state.
+         */
+        for (i = 0; i < 11; i++) {
+                ret = REG_READ(REG_PORT(i), 0x04);
+                REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+        }
+        /*
+         * Wait for transmit queues to drain.
+         */
+        msleep(2);
+        /*
+         * Reset the switch.
+         */
+        REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
+        /*
+         * Wait up to one second for reset to complete.
+         */
+        for (i = 0; i < 1000; i++) {
+                ret = REG_READ(REG_GLOBAL, 0x00);
+                if ((ret & 0xc800) == 0xc800)
+                        break;
+                msleep(1);
+        }
+        if (i == 1000)
+                return -ETIMEDOUT;
+        return 0;
+}
+static int mv88e6131_setup_global(struct dsa_switch *ds)
+{
+        int ret;
+        int i;
+        /*
+         * Enable the PHY polling unit, don't discard packets with
+         * excessive collisions, use a weighted fair queueing scheme
+         * to arbitrate between packet queues, set the maximum frame
+         * size to 1632, and mask all interrupt sources.
+         */
+        REG_WRITE(REG_GLOBAL, 0x04, 0x4400);
+        /*
+         * Set the default address aging time to 5 minutes, and
+         * enable address learn messages to be sent to all message
+         * ports.
+         */
+        REG_WRITE(REG_GLOBAL, 0x0a, 0x0148);
+        /*
+         * Configure the priority mapping registers.
+         */
+        ret = mv88e6xxx_config_prio(ds);
+        if (ret < 0)
+                return ret;
+        /*
+         * Set the VLAN ethertype to 0x8100.
+         */
+        REG_WRITE(REG_GLOBAL, 0x19, 0x8100);
+        /*
+         * Disable ARP mirroring, and configure the upstream port as
+         * the port to which ingress and egress monitor frames are to
+         * be sent.
+         */
+        REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0);
+        /*
+         * Disable cascade port functionality unless this device
+         * is used in a cascade configuration, and set the switch's
+         * DSA device number.
+         */
+        if (ds->dst->pd->nr_chips > 1)
+                REG_WRITE(REG_GLOBAL, 0x1c, 0xf000 | (ds->index & 0x1f));
+        else
+                REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f));
+        /*
+         * Send all frames with destination addresses matching
+         * 01:80:c2:00:00:0x to the CPU port.
+         */
+        REG_WRITE(REG_GLOBAL2, 0x03, 0xffff);
+        /*
+         * Ignore removed tag data on doubly tagged packets, disable
+         * flow control messages, force flow control priority to the
+         * highest, and send all special multicast frames to the CPU
+         * port at the highest priority.
+         */
+        REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
+        /*
+         * Program the DSA routing table.
+         */
+        for (i = 0; i < 32; i++) {
+                int nexthop;
+                nexthop = 0x1f;
+                if (i != ds->index && i < ds->dst->pd->nr_chips)
+                        nexthop = ds->pd->rtable[i] & 0x1f;
+                REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop);
+        }
+        /*
+         * Clear all trunk masks.
+         */
+        for (i = 0; i < 8; i++)
+                REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7ff);
+        /*
+         * Clear all trunk mappings.
+         */
+        for (i = 0; i < 16; i++)
+                REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11));
+        /*
+         * Force the priority of IGMP/MLD snoop frames and ARP frames
+         * to the highest setting.
+         */
+        REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff);
+        return 0;
+}
+static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+        int addr = REG_PORT(p);
+        u16 val;
+        /*
+         * MAC Forcing register: don't force link, speed, duplex
+         * or flow control state to any particular values on physical
+         * ports, but force the CPU port and all DSA ports to 1000 Mb/s
+         * (100 Mb/s on 6085) full duplex.
+         */
+        if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
+                if (ps->id == ID_6085)
+                        REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */
+                else
+                        REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */
+        else
+                REG_WRITE(addr, 0x01, 0x0003);
+        /*
+         * Port Control: disable Core Tag, disable Drop-on-Lock,
+         * transmit frames unmodified, disable Header mode,
+         * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN
+         * tunneling, determine priority by looking at 802.1p and
+         * IP priority fields (IP prio has precedence), and set STP
+         * state to Forwarding.
+         *
+         * If this is the upstream port for this switch, enable
+         * forwarding of unknown unicasts, and enable DSA tagging
+         * mode.
+         *
+         * If this is the link to another switch, use DSA tagging
+         * mode, but do not enable forwarding of unknown unicasts.
+         */
+        val = 0x0433;
+        if (p == dsa_upstream_port(ds)) {
+                val |= 0x0104;
+                /*
+                 * On 6085, unknown multicast forward is controlled
+                 * here rather than in Port Control 2 register.
+                 */
+                if (ps->id == ID_6085)
+                        val |= 0x0008;
+        }
+        if (ds->dsa_port_mask & (1 << p))
+                val |= 0x0100;
+        REG_WRITE(addr, 0x04, val);
+        /*
+         * Port Control 1: disable trunking.  Also, if this is the
+         * CPU port, enable learn messages to be sent to this port.
+         */
+        REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
+        /*
+         * Port based VLAN map: give each port its own address
+         * database, allow the CPU port to talk to each of the 'real'
+         * ports, and allow each of the 'real' ports to only talk to
+         * the upstream port.
+         */
+        val = (p & 0xf) << 12;
+        if (dsa_is_cpu_port(ds, p))
+                val |= ds->phys_port_mask;
+        else
+                val |= 1 << dsa_upstream_port(ds);
+        REG_WRITE(addr, 0x06, val);
+        /*
+         * Default VLAN ID and priority: don't set a default VLAN
+         * ID, and set the default packet priority to zero.
+         */
+        REG_WRITE(addr, 0x07, 0x0000);
+        /*
+         * Port Control 2: don't force a good FCS, don't use
+         * VLAN-based, source address-based or destination
+         * address-based priority overrides, don't let the switch
+         * add or strip 802.1q tags, don't discard tagged or
+         * untagged frames on this port, do a destination address
+         * lookup on received packets as usual, don't send a copy
+         * of all transmitted/received frames on this port to the
+         * CPU, and configure the upstream port number.
+         *
+         * If this is the upstream port for this switch, enable
+         * forwarding of unknown multicast addresses.
+         */
+        if (ps->id == ID_6085)
+                /*
+                 * on 6085, bits 3:0 are reserved, bit 6 control ARP
+                 * mirroring, and multicast forward is handled in
+                 * Port Control register.
+                 */
+                REG_WRITE(addr, 0x08, 0x0080);
+        else {
+                val = 0x0080 | dsa_upstream_port(ds);
+                if (p == dsa_upstream_port(ds))
+                        val |= 0x0040;
+                REG_WRITE(addr, 0x08, val);
+        }
+        /*
+         * Rate Control: disable ingress rate limiting.
+         */
+        REG_WRITE(addr, 0x09, 0x0000);
+        /*
+         * Rate Control 2: disable egress rate limiting.
+         */
+        REG_WRITE(addr, 0x0a, 0x0000);
+        /*
+         * Port Association Vector: when learning source addresses
+         * of packets, add the address to the address database using
+         * a port bitmap that has only the bit for this port set and
+         * the other bits clear.
+         */
+        REG_WRITE(addr, 0x0b, 1 << p);
+        /*
+         * Tag Remap: use an identity 802.1p prio -> switch prio
+         * mapping.
+         */
+        REG_WRITE(addr, 0x18, 0x3210);
+        /*
+         * Tag Remap 2: use an identity 802.1p prio -> switch prio
+         * mapping.
+         */
+        REG_WRITE(addr, 0x19, 0x7654);
+        return 0;
+}
+static int mv88e6131_setup(struct dsa_switch *ds)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+        int i;
+        int ret;
+        mutex_init(&ps->smi_mutex);
+        mv88e6xxx_ppu_state_init(ds);
+        mutex_init(&ps->stats_mutex);
+        ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
+        ret = mv88e6131_switch_reset(ds);
+        if (ret < 0)
+                return ret;
+        /* @@@ initialise vtu and atu */
+        ret = mv88e6131_setup_global(ds);
+        if (ret < 0)
+                return ret;
+        for (i = 0; i < 11; i++) {
+                ret = mv88e6131_setup_port(ds, i);
+                if (ret < 0)
+                        return ret;
+        }
+        return 0;
+}
+static int mv88e6131_port_to_phy_addr(int port)
+{
+        if (port >= 0 && port <= 11)
+                return port;
+        return -1;
+}
+static int
+mv88e6131_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+        int addr = mv88e6131_port_to_phy_addr(port);
+        return mv88e6xxx_phy_read_ppu(ds, addr, regnum);
+}
+static int
+mv88e6131_phy_write(struct dsa_switch *ds,
+                              int port, int regnum, u16 val)
+{
+        int addr = mv88e6131_port_to_phy_addr(port);
+        return mv88e6xxx_phy_write_ppu(ds, addr, regnum, val);
+}
+static struct mv88e6xxx_hw_stat mv88e6131_hw_stats[] = {
+        { "in_good_octets", 8, 0x00, },
+        { "in_bad_octets", 4, 0x02, },
+        { "in_unicast", 4, 0x04, },
+        { "in_broadcasts", 4, 0x06, },
+        { "in_multicasts", 4, 0x07, },
+        { "in_pause", 4, 0x16, },
+        { "in_undersize", 4, 0x18, },
+        { "in_fragments", 4, 0x19, },
+        { "in_oversize", 4, 0x1a, },
+        { "in_jabber", 4, 0x1b, },
+        { "in_rx_error", 4, 0x1c, },
+        { "in_fcs_error", 4, 0x1d, },
+        { "out_octets", 8, 0x0e, },
+        { "out_unicast", 4, 0x10, },
+        { "out_broadcasts", 4, 0x13, },
+        { "out_multicasts", 4, 0x12, },
+        { "out_pause", 4, 0x15, },
+        { "excessive", 4, 0x11, },
+        { "collisions", 4, 0x1e, },
+        { "deferred", 4, 0x05, },
+        { "single", 4, 0x14, },
+        { "multiple", 4, 0x17, },
+        { "out_fcs_error", 4, 0x03, },
+        { "late", 4, 0x1f, },
+        { "hist_64bytes", 4, 0x08, },
+        { "hist_65_127bytes", 4, 0x09, },
+        { "hist_128_255bytes", 4, 0x0a, },
+        { "hist_256_511bytes", 4, 0x0b, },
+        { "hist_512_1023bytes", 4, 0x0c, },
+        { "hist_1024_max_bytes", 4, 0x0d, },
+};
+static void
+mv88e6131_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+{
+        mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6131_hw_stats),
+                              mv88e6131_hw_stats, port, data);
+}
+static void
+mv88e6131_get_ethtool_stats(struct dsa_switch *ds,
+                                  int port, uint64_t *data)
+{
+        mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6131_hw_stats),
+                                    mv88e6131_hw_stats, port, data);
+}
+static int mv88e6131_get_sset_count(struct dsa_switch *ds)
+{
+        return ARRAY_SIZE(mv88e6131_hw_stats);
+}
+static struct dsa_switch_driver mv88e6131_switch_driver = {
+        .tag_protocol           = cpu_to_be16(ETH_P_DSA),
+        .priv_size              = sizeof(struct mv88e6xxx_priv_state),
+        .probe                  = mv88e6131_probe,
+        .setup                  = mv88e6131_setup,
+        .set_addr               = mv88e6xxx_set_addr_direct,
+        .phy_read               = mv88e6131_phy_read,
+        .phy_write              = mv88e6131_phy_write,
+        .poll_link              = mv88e6xxx_poll_link,
+        .get_strings            = mv88e6131_get_strings,
+        .get_ethtool_stats      = mv88e6131_get_ethtool_stats,
+        .get_sset_count         = mv88e6131_get_sset_count,
+};
+static int __init mv88e6131_init(void)
+{
+        register_switch_driver(&mv88e6131_switch_driver);
+        return 0;
+}
+module_init(mv88e6131_init);
+static void __exit mv88e6131_cleanup(void)
+{
+        unregister_switch_driver(&mv88e6131_switch_driver);
+}
+module_exit(mv88e6131_cleanup);
diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c
new file mode 100644
index 00000000000..efe661a9def
--- /dev/null
+++ b/net/dsa/mv88e6xxx.c
@@ -0,0 +1,522 @@
+/*
+ * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include "dsa_priv.h"
+#include "mv88e6xxx.h"
+/*
+ * If the switch's ADDR[4:0] strap pins are strapped to zero, it will
+ * use all 32 SMI bus addresses on its SMI bus, and all switch registers
+ * will be directly accessible on some {device address,register address}
+ * pair.  If the ADDR[4:0] pins are not strapped to zero, the switch
+ * will only respond to SMI transactions to that specific address, and
+ * an indirect addressing mechanism needs to be used to access its
+ * registers.
+ */
+static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr)
+{
+        int ret;
+        int i;
+        for (i = 0; i < 16; i++) {
+                ret = mdiobus_read(bus, sw_addr, 0);
+                if (ret < 0)
+                        return ret;
+                if ((ret & 0x8000) == 0)
+                        return 0;
+        }
+        return -ETIMEDOUT;
+}
+int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg)
+{
+        int ret;
+        if (sw_addr == 0)
+                return mdiobus_read(bus, addr, reg);
+        /*
+         * Wait for the bus to become free.
+         */
+        ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
+        if (ret < 0)
+                return ret;
+        /*
+         * Transmit the read command.
+         */
+        ret = mdiobus_write(bus, sw_addr, 0, 0x9800 | (addr << 5) | reg);
+        if (ret < 0)
+                return ret;
+        /*
+         * Wait for the read command to complete.
+         */
+        ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
+        if (ret < 0)
+                return ret;
+        /*
+         * Read the data.
+         */
+        ret = mdiobus_read(bus, sw_addr, 1);
+        if (ret < 0)
+                return ret;
+        return ret & 0xffff;
+}
+int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+        int ret;
+        mutex_lock(&ps->smi_mutex);
+        ret = __mv88e6xxx_reg_read(ds->master_mii_bus,
+                                   ds->pd->sw_addr, addr, reg);
+        mutex_unlock(&ps->smi_mutex);
+        return ret;
+}
+int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
+                          int reg, u16 val)
+{
+        int ret;
+        if (sw_addr == 0)
+                return mdiobus_write(bus, addr, reg, val);
+        /*
+         * Wait for the bus to become free.
+         */
+        ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
+        if (ret < 0)
+                return ret;
+        /*
+         * Transmit the data to write.
+         */
+        ret = mdiobus_write(bus, sw_addr, 1, val);
+        if (ret < 0)
+                return ret;
+        /*
+         * Transmit the write command.
+         */
+        ret = mdiobus_write(bus, sw_addr, 0, 0x9400 | (addr << 5) | reg);
+        if (ret < 0)
+                return ret;
+        /*
+         * Wait for the write command to complete.
+         */
+        ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
+        if (ret < 0)
+                return ret;
+        return 0;
+}
+int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+        int ret;
+        mutex_lock(&ps->smi_mutex);
+        ret = __mv88e6xxx_reg_write(ds->master_mii_bus,
+                                    ds->pd->sw_addr, addr, reg, val);
+        mutex_unlock(&ps->smi_mutex);
+        return ret;
+}
+int mv88e6xxx_config_prio(struct dsa_switch *ds)
+{
+        /*
+         * Configure the IP ToS mapping registers.
+         */
+        REG_WRITE(REG_GLOBAL, 0x10, 0x0000);
+        REG_WRITE(REG_GLOBAL, 0x11, 0x0000);
+        REG_WRITE(REG_GLOBAL, 0x12, 0x5555);
+        REG_WRITE(REG_GLOBAL, 0x13, 0x5555);
+        REG_WRITE(REG_GLOBAL, 0x14, 0xaaaa);
+        REG_WRITE(REG_GLOBAL, 0x15, 0xaaaa);
+        REG_WRITE(REG_GLOBAL, 0x16, 0xffff);
+        REG_WRITE(REG_GLOBAL, 0x17, 0xffff);
+        /*
+         * Configure the IEEE 802.1p priority mapping register.
+         */
+        REG_WRITE(REG_GLOBAL, 0x18, 0xfa41);
+        return 0;
+}
+int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr)
+{
+        REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
+        REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
+        REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
+        return 0;
+}
+int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr)
+{
+        int i;
+        int ret;
+        for (i = 0; i < 6; i++) {
+                int j;
+                /*
+                 * Write the MAC address byte.
+                 */
+                REG_WRITE(REG_GLOBAL2, 0x0d, 0x8000 | (i << 8) | addr[i]);
+                /*
+                 * Wait for the write to complete.
+                 */
+                for (j = 0; j < 16; j++) {
+                        ret = REG_READ(REG_GLOBAL2, 0x0d);
+                        if ((ret & 0x8000) == 0)
+                                break;
+                }
+                if (j == 16)
+                        return -ETIMEDOUT;
+        }
+        return 0;
+}
+int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum)
+{
+        if (addr >= 0)
+                return mv88e6xxx_reg_read(ds, addr, regnum);
+        return 0xffff;
+}
+int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val)
+{
+        if (addr >= 0)
+                return mv88e6xxx_reg_write(ds, addr, regnum, val);
+        return 0;
+}
+#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU
+static int mv88e6xxx_ppu_disable(struct dsa_switch *ds)
+{
+        int ret;
+        int i;
+        ret = REG_READ(REG_GLOBAL, 0x04);
+        REG_WRITE(REG_GLOBAL, 0x04, ret & ~0x4000);
+        for (i = 0; i < 1000; i++) {
+                ret = REG_READ(REG_GLOBAL, 0x00);
+                msleep(1);
+                if ((ret & 0xc000) != 0xc000)
+                        return 0;
+        }
+        return -ETIMEDOUT;
+}
+static int mv88e6xxx_ppu_enable(struct dsa_switch *ds)
+{
+        int ret;
+        int i;
+        ret = REG_READ(REG_GLOBAL, 0x04);
+        REG_WRITE(REG_GLOBAL, 0x04, ret | 0x4000);
+        for (i = 0; i < 1000; i++) {
+                ret = REG_READ(REG_GLOBAL, 0x00);
+                msleep(1);
+                if ((ret & 0xc000) == 0xc000)
+                        return 0;
+        }
+        return -ETIMEDOUT;
+}
+static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly)
+{
+        struct mv88e6xxx_priv_state *ps;
+        ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work);
+        if (mutex_trylock(&ps->ppu_mutex)) {
+                struct dsa_switch *ds = ((struct dsa_switch *)ps) - 1;
+                if (mv88e6xxx_ppu_enable(ds) == 0)
+                        ps->ppu_disabled = 0;
+                mutex_unlock(&ps->ppu_mutex);
+        }
+}
+static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)_ps;
+        schedule_work(&ps->ppu_work);
+}
+static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+        int ret;
+        mutex_lock(&ps->ppu_mutex);
+        /*
+         * If the PHY polling unit is enabled, disable it so that
+         * we can access the PHY registers.  If it was already
+         * disabled, cancel the timer that is going to re-enable
+         * it.
+         */
+        if (!ps->ppu_disabled) {
+                ret = mv88e6xxx_ppu_disable(ds);
+                if (ret < 0) {
+                        mutex_unlock(&ps->ppu_mutex);
+                        return ret;
+                }
+                ps->ppu_disabled = 1;
+        } else {
+                del_timer(&ps->ppu_timer);
+                ret = 0;
+        }
+        return ret;
+}
+static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+        /*
+         * Schedule a timer to re-enable the PHY polling unit.
+         */
+        mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10));
+        mutex_unlock(&ps->ppu_mutex);
+}
+void mv88e6xxx_ppu_state_init(struct dsa_switch *ds)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+        mutex_init(&ps->ppu_mutex);
+        INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work);
+        init_timer(&ps->ppu_timer);
+        ps->ppu_timer.data = (unsigned long)ps;
+        ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
+}
+int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum)
+{
+        int ret;
+        ret = mv88e6xxx_ppu_access_get(ds);
+        if (ret >= 0) {
+                ret = mv88e6xxx_reg_read(ds, addr, regnum);
+                mv88e6xxx_ppu_access_put(ds);
+        }
+        return ret;
+}
+int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr,
+                            int regnum, u16 val)
+{
+        int ret;
+        ret = mv88e6xxx_ppu_access_get(ds);
+        if (ret >= 0) {
+                ret = mv88e6xxx_reg_write(ds, addr, regnum, val);
+                mv88e6xxx_ppu_access_put(ds);
+        }
+        return ret;
+}
+#endif
+void mv88e6xxx_poll_link(struct dsa_switch *ds)
+{
+        int i;
+        for (i = 0; i < DSA_MAX_PORTS; i++) {
+                struct net_device *dev;
+                int uninitialized_var(port_status);
+                int link;
+                int speed;
+                int duplex;
+                int fc;
+                dev = ds->ports[i];
+                if (dev == NULL)
+                        continue;
+                link = 0;
+                if (dev->flags & IFF_UP) {
+                        port_status = mv88e6xxx_reg_read(ds, REG_PORT(i), 0x00);
+                        if (port_status < 0)
+                                continue;
+                        link = !!(port_status & 0x0800);
+                }
+                if (!link) {
+                        if (netif_carrier_ok(dev)) {
+                                printk(KERN_INFO "%s: link down\n", dev->name);
+                                netif_carrier_off(dev);
+                        }
+                        continue;
+                }
+                switch (port_status & 0x0300) {
+                case 0x0000:
+                        speed = 10;
+                        break;
+                case 0x0100:
+                        speed = 100;
+                        break;
+                case 0x0200:
+                        speed = 1000;
+                        break;
+                default:
+                        speed = -1;
+                        break;
+                }
+                duplex = (port_status & 0x0400) ? 1 : 0;
+                fc = (port_status & 0x8000) ? 1 : 0;
+                if (!netif_carrier_ok(dev)) {
+                        printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
+                                         "flow control %sabled\n", dev->name,
+                                         speed, duplex ? "full" : "half",
+                                         fc ? "en" : "dis");
+                        netif_carrier_on(dev);
+                }
+        }
+}
+static int mv88e6xxx_stats_wait(struct dsa_switch *ds)
+{
+        int ret;
+        int i;
+        for (i = 0; i < 10; i++) {
+                ret = REG_READ(REG_GLOBAL, 0x1d);
+                if ((ret & 0x8000) == 0)
+                        return 0;
+        }
+        return -ETIMEDOUT;
+}
+static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port)
+{
+        int ret;
+        /*
+         * Snapshot the hardware statistics counters for this port.
+         */
+        REG_WRITE(REG_GLOBAL, 0x1d, 0xdc00 | port);
+        /*
+         * Wait for the snapshotting to complete.
+         */
+        ret = mv88e6xxx_stats_wait(ds);
+        if (ret < 0)
+                return ret;
+        return 0;
+}
+static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val)
+{
+        u32 _val;
+        int ret;
+        *val = 0;
+        ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1d, 0xcc00 | stat);
+        if (ret < 0)
+                return;
+        ret = mv88e6xxx_stats_wait(ds);
+        if (ret < 0)
+                return;
+        ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1e);
+        if (ret < 0)
+                return;
+        _val = ret << 16;
+        ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1f);
+        if (ret < 0)
+                return;
+        *val = _val | ret;
+}
+void mv88e6xxx_get_strings(struct dsa_switch *ds,
+                           int nr_stats, struct mv88e6xxx_hw_stat *stats,
+                           int port, uint8_t *data)
+{
+        int i;
+        for (i = 0; i < nr_stats; i++) {
+                memcpy(data + i * ETH_GSTRING_LEN,
+                       stats[i].string, ETH_GSTRING_LEN);
+        }
+}
+void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
+                                 int nr_stats, struct mv88e6xxx_hw_stat *stats,
+                                 int port, uint64_t *data)
+{
+        struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
+        int ret;
+        int i;
+        mutex_lock(&ps->stats_mutex);
+        ret = mv88e6xxx_stats_snapshot(ds, port);
+        if (ret < 0) {
+                mutex_unlock(&ps->stats_mutex);
+                return;
+        }
+        /*
+         * Read each of the counters.
+         */
+        for (i = 0; i < nr_stats; i++) {
+                struct mv88e6xxx_hw_stat *s = stats + i;
+                u32 low;
+                u32 high;
+                mv88e6xxx_stats_read(ds, s->reg, &low);
+                if (s->sizeof_stat == 8)
+                        mv88e6xxx_stats_read(ds, s->reg + 1, &high);
+                else
+                        high = 0;
+                data[i] = (((u64)high) << 32) | low;
+        }
+        mutex_unlock(&ps->stats_mutex);
+}
diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h
new file mode 100644
index 00000000000..61156ca26a0
--- /dev/null
+++ b/net/dsa/mv88e6xxx.h
@@ -0,0 +1,95 @@
+/*
+ * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __MV88E6XXX_H
+#define __MV88E6XXX_H
+#define REG_PORT(p)             (0x10 + (p))
+#define REG_GLOBAL              0x1b
+#define REG_GLOBAL2             0x1c
+struct mv88e6xxx_priv_state {
+        /*
+         * When using multi-chip addressing, this mutex protects
+         * access to the indirect access registers.  (In single-chip
+         * mode, this mutex is effectively useless.)
+         */
+        struct mutex    smi_mutex;
+#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU
+        /*
+         * Handles automatic disabling and re-enabling of the PHY
+         * polling unit.
+         */
+        struct mutex            ppu_mutex;
+        int                     ppu_disabled;
+        struct work_struct      ppu_work;
+        struct timer_list       ppu_timer;
+#endif
+        /*
+         * This mutex serialises access to the statistics unit.
+         * Hold this mutex over snapshot + dump sequences.
+         */
+        struct mutex    stats_mutex;
+        int             id; /* switch product id */
+};
+struct mv88e6xxx_hw_stat {
+        char string[ETH_GSTRING_LEN];
+        int sizeof_stat;
+        int reg;
+};
+int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg);
+int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg);
+int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
+                          int reg, u16 val);
+int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val);
+int mv88e6xxx_config_prio(struct dsa_switch *ds);
+int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr);
+int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr);
+int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum);
+int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val);
+void mv88e6xxx_ppu_state_init(struct dsa_switch *ds);
+int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum);
+int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr,
+                            int regnum, u16 val);
+void mv88e6xxx_poll_link(struct dsa_switch *ds);
+void mv88e6xxx_get_strings(struct dsa_switch *ds,
+                           int nr_stats, struct mv88e6xxx_hw_stat *stats,
+                           int port, uint8_t *data);
+void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
+                                 int nr_stats, struct mv88e6xxx_hw_stat *stats,
+                                 int port, uint64_t *data);
+#define REG_READ(addr, reg)                                             \
+        ({                                                              \
+                int __ret;                                              \
+                                                                        \
+                __ret = mv88e6xxx_reg_read(ds, addr, reg);              \
+                if (__ret < 0)                                          \
+                        return __ret;                                   \
+                __ret;                                                  \
+        })
+#define REG_WRITE(addr, reg, val)                                       \
+        ({                                                              \
+                int __ret;                                              \
+                                                                        \
+                __ret = mv88e6xxx_reg_write(ds, addr, reg, val);        \
+                if (__ret < 0)                                          \
+                        return __ret;                                   \
+        })
+#endif
diff --git a/net/econet/Kconfig b/net/econet/Kconfig
new file mode 100644
index 00000000000..39a2d2975e0
--- /dev/null
+++ b/net/econet/Kconfig
@@ -0,0 +1,36 @@
+#
+# Acorn Econet/AUN protocols 
+#
+config ECONET
+        tristate "Acorn Econet/AUN protocols (EXPERIMENTAL)"
+        depends on EXPERIMENTAL && INET
+        ---help---
+          Econet is a fairly old and slow networking protocol mainly used by
+          Acorn computers to access file and print servers. It uses native
+          Econet network cards. AUN is an implementation of the higher level
+          parts of Econet that runs over ordinary Ethernet connections, on
+          top of the UDP packet protocol, which in turn runs on top of the
+          Internet protocol IP.
+          If you say Y here, you can choose with the next two options whether
+          to send Econet/AUN traffic over a UDP Ethernet connection or over
+          a native Econet network card.
+          To compile this driver as a module, choose M here: the module
+          will be called econet.
+config ECONET_AUNUDP
+        bool "AUN over UDP"
+        depends on ECONET
+        help
+          Say Y here if you want to send Econet/AUN traffic over a UDP
+          connection (UDP is a packet based protocol that runs on top of the
+          Internet protocol IP) using an ordinary Ethernet network card.
+config ECONET_NATIVE
+        bool "Native Econet"
+        depends on ECONET
+        help
+          Say Y here if you have a native Econet network card installed in
+          your computer.
diff --git a/net/econet/Makefile b/net/econet/Makefile
new file mode 100644
index 00000000000..05fae8be2fe
--- /dev/null
+++ b/net/econet/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for Econet support code.
+#
+obj-$(CONFIG_ECONET) += econet.o
+econet-y := af_econet.o
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
new file mode 100644
index 00000000000..1c1f26c5d67
--- /dev/null
+++ b/net/econet/af_econet.c
@@ -0,0 +1,1170 @@
+/*
+ *      An implementation of the Acorn Econet and AUN protocols.
+ *      Philip Blundell <philb@gnu.org>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ */
+#define pr_fmt(fmt) fmt
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/route.h>
+#include <linux/inet.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/wireless.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <net/sock.h>
+#include <net/inet_common.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/if_ec.h>
+#include <net/udp.h>
+#include <net/ip.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/bitops.h>
+#include <linux/mutex.h>
+#include <linux/uaccess.h>
+#include <asm/system.h>
+static const struct proto_ops econet_ops;
+static struct hlist_head econet_sklist;
+static DEFINE_SPINLOCK(econet_lock);
+static DEFINE_MUTEX(econet_mutex);
+/* Since there are only 256 possible network numbers (or fewer, depends
+   how you count) it makes sense to use a simple lookup table. */
+static struct net_device *net2dev_map[256];
+#define EC_PORT_IP      0xd2
+#ifdef CONFIG_ECONET_AUNUDP
+static DEFINE_SPINLOCK(aun_queue_lock);
+static struct socket *udpsock;
+#define AUN_PORT        0x8000
+struct aunhdr {
+        unsigned char code;             /* AUN magic protocol byte */
+        unsigned char port;
+        unsigned char cb;
+        unsigned char pad;
+        unsigned long handle;
+};
+static unsigned long aun_seq;
+/* Queue of packets waiting to be transmitted. */
+static struct sk_buff_head aun_queue;
+static struct timer_list ab_cleanup_timer;
+#endif          /* CONFIG_ECONET_AUNUDP */
+/* Per-packet information */
+struct ec_cb {
+        struct sockaddr_ec sec;
+        unsigned long cookie;           /* Supplied by user. */
+#ifdef CONFIG_ECONET_AUNUDP
+        int done;
+        unsigned long seq;              /* Sequencing */
+        unsigned long timeout;          /* Timeout */
+        unsigned long start;            /* jiffies */
+#endif
+#ifdef CONFIG_ECONET_NATIVE
+        void (*sent)(struct sk_buff *, int result);
+#endif
+};
+static void econet_remove_socket(struct hlist_head *list, struct sock *sk)
+{
+        spin_lock_bh(&econet_lock);
+        sk_del_node_init(sk);
+        spin_unlock_bh(&econet_lock);
+}
+static void econet_insert_socket(struct hlist_head *list, struct sock *sk)
+{
+        spin_lock_bh(&econet_lock);
+        sk_add_node(sk, list);
+        spin_unlock_bh(&econet_lock);
+}
+/*
+ *      Pull a packet from our receive queue and hand it to the user.
+ *      If necessary we block.
+ */
+static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
+                          struct msghdr *msg, size_t len, int flags)
+{
+        struct sock *sk = sock->sk;
+        struct sk_buff *skb;
+        size_t copied;
+        int err;
+        msg->msg_namelen = sizeof(struct sockaddr_ec);
+        mutex_lock(&econet_mutex);
+        /*
+         *      Call the generic datagram receiver. This handles all sorts
+         *      of horrible races and re-entrancy so we can forget about it
+         *      in the protocol layers.
+         *
+         *      Now it will return ENETDOWN, if device have just gone down,
+         *      but then it will block.
+         */
+        skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+        /*
+         *      An error occurred so return it. Because skb_recv_datagram()
+         *      handles the blocking we don't see and worry about blocking
+         *      retries.
+         */
+        if (skb == NULL)
+                goto out;
+        /*
+         *      You lose any data beyond the buffer you gave. If it worries a
+         *      user program they can ask the device for its MTU anyway.
+         */
+        copied = skb->len;
+        if (copied > len) {
+                copied = len;
+                msg->msg_flags |= MSG_TRUNC;
+        }
+        /* We can't use skb_copy_datagram here */
+        err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
+        if (err)
+                goto out_free;
+        sk->sk_stamp = skb->tstamp;
+        if (msg->msg_name)
+                memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+        /*
+         *      Free or return the buffer as appropriate. Again this
+         *      hides all the races and re-entrancy issues from us.
+         */
+        err = copied;
+out_free:
+        skb_free_datagram(sk, skb);
+out:
+        mutex_unlock(&econet_mutex);
+        return err;
+}
+/*
+ *      Bind an Econet socket.
+ */
+static int econet_bind(struct socket *sock, struct sockaddr *uaddr,
+                       int addr_len)
+{
+        struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr;
+        struct sock *sk;
+        struct econet_sock *eo;
+        /*
+         *      Check legality
+         */
+        if (addr_len < sizeof(struct sockaddr_ec) ||
+            sec->sec_family != AF_ECONET)
+                return -EINVAL;
+        mutex_lock(&econet_mutex);
+        sk = sock->sk;
+        eo = ec_sk(sk);
+        eo->cb      = sec->cb;
+        eo->port    = sec->port;
+        eo->station = sec->addr.station;
+        eo->net     = sec->addr.net;
+        mutex_unlock(&econet_mutex);
+        return 0;
+}
+#if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
+/*
+ *      Queue a transmit result for the user to be told about.
+ */
+static void tx_result(struct sock *sk, unsigned long cookie, int result)
+{
+        struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
+        struct ec_cb *eb;
+        struct sockaddr_ec *sec;
+        if (skb == NULL) {
+                pr_debug("econet: memory squeeze, transmit result dropped\n");
+                return;
+        }
+        eb = (struct ec_cb *)&skb->cb;
+        sec = (struct sockaddr_ec *)&eb->sec;
+        memset(sec, 0, sizeof(struct sockaddr_ec));
+        sec->cookie = cookie;
+        sec->type = ECTYPE_TRANSMIT_STATUS | result;
+        sec->sec_family = AF_ECONET;
+        if (sock_queue_rcv_skb(sk, skb) < 0)
+                kfree_skb(skb);
+}
+#endif
+#ifdef CONFIG_ECONET_NATIVE
+/*
+ *      Called by the Econet hardware driver when a packet transmit
+ *      has completed.  Tell the user.
+ */
+static void ec_tx_done(struct sk_buff *skb, int result)
+{
+        struct ec_cb *eb = (struct ec_cb *)&skb->cb;
+        tx_result(skb->sk, eb->cookie, result);
+}
+#endif
+/*
+ *      Send a packet.  We have to work out which device it's going out on
+ *      and hence whether to use real Econet or the UDP emulation.
+ */
+static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
+                          struct msghdr *msg, size_t len)
+{
+        struct sockaddr_ec *saddr = (struct sockaddr_ec *)msg->msg_name;
+        struct net_device *dev;
+        struct ec_addr addr;
+        int err;
+        unsigned char port, cb;
+#if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
+        struct sock *sk = sock->sk;
+        struct sk_buff *skb;
+        struct ec_cb *eb;
+#endif
+#ifdef CONFIG_ECONET_AUNUDP
+        struct msghdr udpmsg;
+        struct iovec iov[2];
+        struct aunhdr ah;
+        struct sockaddr_in udpdest;
+        __kernel_size_t size;
+        mm_segment_t oldfs;
+        char *userbuf;
+#endif
+        /*
+         *      Check the flags.
+         */
+        if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
+                return -EINVAL;
+        /*
+         *      Get and verify the address.
+         */
+        mutex_lock(&econet_mutex);
+        if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) {
+                mutex_unlock(&econet_mutex);
+                return -EINVAL;
+        }
+        addr.station = saddr->addr.station;
+        addr.net = saddr->addr.net;
+        port = saddr->port;
+        cb = saddr->cb;
+        /* Look for a device with the right network number. */
+        dev = net2dev_map[addr.net];
+        /* If not directly reachable, use some default */
+        if (dev == NULL) {
+                dev = net2dev_map[0];
+                /* No interfaces at all? */
+                if (dev == NULL) {
+                        mutex_unlock(&econet_mutex);
+                        return -ENETDOWN;
+                }
+        }
+        if (dev->type == ARPHRD_ECONET) {
+                /* Real hardware Econet.  We're not worthy etc. */
+#ifdef CONFIG_ECONET_NATIVE
+                unsigned short proto = 0;
+                int res;
+                if (len + 15 > dev->mtu) {
+                        mutex_unlock(&econet_mutex);
+                        return -EMSGSIZE;
+                }
+                dev_hold(dev);
+                skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
+                                          msg->msg_flags & MSG_DONTWAIT, &err);
+                if (skb == NULL)
+                        goto out_unlock;
+                skb_reserve(skb, LL_RESERVED_SPACE(dev));
+                skb_reset_network_header(skb);
+                eb = (struct ec_cb *)&skb->cb;
+                eb->cookie = saddr->cookie;
+                eb->sec = *saddr;
+                eb->sent = ec_tx_done;
+                err = -EINVAL;
+                res = dev_hard_header(skb, dev, ntohs(proto), &addr, NULL, len);
+                if (res < 0)
+                        goto out_free;
+                if (res > 0) {
+                        struct ec_framehdr *fh;
+                        /* Poke in our control byte and
+                           port number.  Hack, hack.  */
+                        fh = (struct ec_framehdr *)skb->data;
+                        fh->cb = cb;
+                        fh->port = port;
+                        if (sock->type != SOCK_DGRAM) {
+                                skb_reset_tail_pointer(skb);
+                                skb->len = 0;
+                        }
+                }
+                /* Copy the data. Returns -EFAULT on error */
+                err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+                skb->protocol = proto;
+                skb->dev = dev;
+                skb->priority = sk->sk_priority;
+                if (err)
+                        goto out_free;
+                err = -ENETDOWN;
+                if (!(dev->flags & IFF_UP))
+                        goto out_free;
+                /*
+                 *      Now send it
+                 */
+                dev_queue_xmit(skb);
+                dev_put(dev);
+                mutex_unlock(&econet_mutex);
+                return len;
+out_free:
+                kfree_skb(skb);
+out_unlock:
+                if (dev)
+                        dev_put(dev);
+#else
+                err = -EPROTOTYPE;
+#endif
+                mutex_unlock(&econet_mutex);
+                return err;
+        }
+#ifdef CONFIG_ECONET_AUNUDP
+        /* AUN virtual Econet. */
+        if (udpsock == NULL) {
+                mutex_unlock(&econet_mutex);
+                return -ENETDOWN;               /* No socket - can't send */
+        }
+        if (len > 32768) {
+                err = -E2BIG;
+                goto error;
+        }
+        /* Make up a UDP datagram and hand it off to some higher intellect. */
+        memset(&udpdest, 0, sizeof(udpdest));
+        udpdest.sin_family = AF_INET;
+        udpdest.sin_port = htons(AUN_PORT);
+        /* At the moment we use the stupid Acorn scheme of Econet address
+           y.x maps to IP a.b.c.x.  This should be replaced with something
+           more flexible and more aware of subnet masks.  */
+        {
+                struct in_device *idev;
+                unsigned long network = 0;
+                rcu_read_lock();
+                idev = __in_dev_get_rcu(dev);
+                if (idev) {
+                        if (idev->ifa_list)
+                                network = ntohl(idev->ifa_list->ifa_address) &
+                                        0xffffff00;             /* !!! */
+                }
+                rcu_read_unlock();
+                udpdest.sin_addr.s_addr = htonl(network | addr.station);
+        }
+        memset(&ah, 0, sizeof(ah));
+        ah.port = port;
+        ah.cb = cb & 0x7f;
+        ah.code = 2;            /* magic */
+        /* tack our header on the front of the iovec */
+        size = sizeof(struct aunhdr);
+        iov[0].iov_base = (void *)&ah;
+        iov[0].iov_len = size;
+        userbuf = vmalloc(len);
+        if (userbuf == NULL) {
+                err = -ENOMEM;
+                goto error;
+        }
+        iov[1].iov_base = userbuf;
+        iov[1].iov_len = len;
+        err = memcpy_fromiovec(userbuf, msg->msg_iov, len);
+        if (err)
+                goto error_free_buf;
+        /* Get a skbuff (no data, just holds our cb information) */
+        skb = sock_alloc_send_skb(sk, 0, msg->msg_flags & MSG_DONTWAIT, &err);
+        if (skb == NULL)
+                goto error_free_buf;
+        eb = (struct ec_cb *)&skb->cb;
+        eb->cookie = saddr->cookie;
+        eb->timeout = 5 * HZ;
+        eb->start = jiffies;
+        ah.handle = aun_seq;
+        eb->seq = (aun_seq++);
+        eb->sec = *saddr;
+        skb_queue_tail(&aun_queue, skb);
+        udpmsg.msg_name = (void *)&udpdest;
+        udpmsg.msg_namelen = sizeof(udpdest);
+        udpmsg.msg_iov = &iov[0];
+        udpmsg.msg_iovlen = 2;
+        udpmsg.msg_control = NULL;
+        udpmsg.msg_controllen = 0;
+        udpmsg.msg_flags = 0;
+        oldfs = get_fs();
+        set_fs(KERNEL_DS);              /* More privs :-) */
+        err = sock_sendmsg(udpsock, &udpmsg, size);
+        set_fs(oldfs);
+error_free_buf:
+        vfree(userbuf);
+error:
+#else
+        err = -EPROTOTYPE;
+#endif
+        mutex_unlock(&econet_mutex);
+        return err;
+}
+/*
+ *      Look up the address of a socket.
+ */
+static int econet_getname(struct socket *sock, struct sockaddr *uaddr,
+                          int *uaddr_len, int peer)
+{
+        struct sock *sk;
+        struct econet_sock *eo;
+        struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr;
+        if (peer)
+                return -EOPNOTSUPP;
+        memset(sec, 0, sizeof(*sec));
+        mutex_lock(&econet_mutex);
+        sk = sock->sk;
+        eo = ec_sk(sk);
+        sec->sec_family   = AF_ECONET;
+        sec->port         = eo->port;
+        sec->addr.station = eo->station;
+        sec->addr.net     = eo->net;
+        mutex_unlock(&econet_mutex);
+        *uaddr_len = sizeof(*sec);
+        return 0;
+}
+static void econet_destroy_timer(unsigned long data)
+{
+        struct sock *sk = (struct sock *)data;
+        if (!sk_has_allocations(sk)) {
+                sk_free(sk);
+                return;
+        }
+        sk->sk_timer.expires = jiffies + 10 * HZ;
+        add_timer(&sk->sk_timer);
+        pr_debug("econet: socket destroy delayed\n");
+}
+/*
+ *      Close an econet socket.
+ */
+static int econet_release(struct socket *sock)
+{
+        struct sock *sk;
+        mutex_lock(&econet_mutex);
+        sk = sock->sk;
+        if (!sk)
+                goto out_unlock;
+        econet_remove_socket(&econet_sklist, sk);
+        /*
+         *      Now the socket is dead. No more input will appear.
+         */
+        sk->sk_state_change(sk);        /* It is useless. Just for sanity. */
+        sock_orphan(sk);
+        /* Purge queues */
+        skb_queue_purge(&sk->sk_receive_queue);
+        if (sk_has_allocations(sk)) {
+                sk->sk_timer.data     = (unsigned long)sk;
+                sk->sk_timer.expires  = jiffies + HZ;
+                sk->sk_timer.function = econet_destroy_timer;
+                add_timer(&sk->sk_timer);
+                goto out_unlock;
+        }
+        sk_free(sk);
+out_unlock:
+        mutex_unlock(&econet_mutex);
+        return 0;
+}
+static struct proto econet_proto = {
+        .name     = "ECONET",
+        .owner    = THIS_MODULE,
+        .obj_size = sizeof(struct econet_sock),
+};
+/*
+ *      Create an Econet socket
+ */
+static int econet_create(struct net *net, struct socket *sock, int protocol,
+                         int kern)
+{
+        struct sock *sk;
+        struct econet_sock *eo;
+        int err;
+        if (!net_eq(net, &init_net))
+                return -EAFNOSUPPORT;
+        /* Econet only provides datagram services. */
+        if (sock->type != SOCK_DGRAM)
+                return -ESOCKTNOSUPPORT;
+        sock->state = SS_UNCONNECTED;
+        err = -ENOBUFS;
+        sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto);
+        if (sk == NULL)
+                goto out;
+        sk->sk_reuse = 1;
+        sock->ops = &econet_ops;
+        sock_init_data(sock, sk);
+        eo = ec_sk(sk);
+        sock_reset_flag(sk, SOCK_ZAPPED);
+        sk->sk_family = PF_ECONET;
+        eo->num = protocol;
+        econet_insert_socket(&econet_sklist, sk);
+        return 0;
+out:
+        return err;
+}
+/*
+ *      Handle Econet specific ioctls
+ */
+static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
+{
+        struct ifreq ifr;
+        struct ec_device *edev;
+        struct net_device *dev;
+        struct sockaddr_ec *sec;
+        int err;
+        /*
+         *      Fetch the caller's info block into kernel space
+         */
+        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+                return -EFAULT;
+        dev = dev_get_by_name(&init_net, ifr.ifr_name);
+        if (dev == NULL)
+                return -ENODEV;
+        sec = (struct sockaddr_ec *)&ifr.ifr_addr;
+        mutex_lock(&econet_mutex);
+        err = 0;
+        switch (cmd) {
+        case SIOCSIFADDR:
+                if (!capable(CAP_NET_ADMIN)) {
+                        err = -EPERM;
+                        break;
+                }
+                edev = dev->ec_ptr;
+                if (edev == NULL) {
+                        /* Magic up a new one. */
+                        edev = kzalloc(sizeof(struct ec_device), GFP_KERNEL);
+                        if (edev == NULL) {
+                                err = -ENOMEM;
+                                break;
+                        }
+                        dev->ec_ptr = edev;
+                } else
+                        net2dev_map[edev->net] = NULL;
+                edev->station = sec->addr.station;
+                edev->net = sec->addr.net;
+                net2dev_map[sec->addr.net] = dev;
+                if (!net2dev_map[0])
+                        net2dev_map[0] = dev;
+                break;
+        case SIOCGIFADDR:
+                edev = dev->ec_ptr;
+                if (edev == NULL) {
+                        err = -ENODEV;
+                        break;
+                }
+                memset(sec, 0, sizeof(struct sockaddr_ec));
+                sec->addr.station = edev->station;
+                sec->addr.net = edev->net;
+                sec->sec_family = AF_ECONET;
+                dev_put(dev);
+                if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+                        err = -EFAULT;
+                break;
+        default:
+                err = -EINVAL;
+                break;
+        }
+        mutex_unlock(&econet_mutex);
+        dev_put(dev);
+        return err;
+}
+/*
+ *      Handle generic ioctls
+ */
+static int econet_ioctl(struct socket *sock, unsigned int cmd,
+                        unsigned long arg)
+{
+        struct sock *sk = sock->sk;
+        void __user *argp = (void __user *)arg;
+        switch (cmd) {
+        case SIOCGSTAMP:
+                return sock_get_timestamp(sk, argp);
+        case SIOCGSTAMPNS:
+                return sock_get_timestampns(sk, argp);
+        case SIOCSIFADDR:
+        case SIOCGIFADDR:
+                return ec_dev_ioctl(sock, cmd, argp);
+        }
+        return -ENOIOCTLCMD;
+}
+static const struct net_proto_family econet_family_ops = {
+        .family =       PF_ECONET,
+        .create =       econet_create,
+        .owner  =       THIS_MODULE,
+};
+static const struct proto_ops econet_ops = {
+        .family =       PF_ECONET,
+        .owner =        THIS_MODULE,
+        .release =      econet_release,
+        .bind =         econet_bind,
+        .connect =      sock_no_connect,
+        .socketpair =   sock_no_socketpair,
+        .accept =       sock_no_accept,
+        .getname =      econet_getname,
+        .poll =         datagram_poll,
+        .ioctl =        econet_ioctl,
+        .listen =       sock_no_listen,
+        .shutdown =     sock_no_shutdown,
+        .setsockopt =   sock_no_setsockopt,
+        .getsockopt =   sock_no_getsockopt,
+        .sendmsg =      econet_sendmsg,
+        .recvmsg =      econet_recvmsg,
+        .mmap =         sock_no_mmap,
+        .sendpage =     sock_no_sendpage,
+};
+#if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
+/*
+ *      Find the listening socket, if any, for the given data.
+ */
+static struct sock *ec_listening_socket(unsigned char port, unsigned char
+                                 station, unsigned char net)
+{
+        struct sock *sk;
+        struct hlist_node *node;
+        spin_lock(&econet_lock);
+        sk_for_each(sk, node, &econet_sklist) {
+                struct econet_sock *opt = ec_sk(sk);
+                if ((opt->port == port || opt->port == 0) &&
+                    (opt->station == station || opt->station == 0) &&
+                    (opt->net == net || opt->net == 0)) {
+                        sock_hold(sk);
+                        goto found;
+                }
+        }
+        sk = NULL;
+found:
+        spin_unlock(&econet_lock);
+        return sk;
+}
+/*
+ *      Queue a received packet for a socket.
+ */
+static int ec_queue_packet(struct sock *sk, struct sk_buff *skb,
+                           unsigned char stn, unsigned char net,
+                           unsigned char cb, unsigned char port)
+{
+        struct ec_cb *eb = (struct ec_cb *)&skb->cb;
+        struct sockaddr_ec *sec = (struct sockaddr_ec *)&eb->sec;
+        memset(sec, 0, sizeof(struct sockaddr_ec));
+        sec->sec_family = AF_ECONET;
+        sec->type = ECTYPE_PACKET_RECEIVED;
+        sec->port = port;
+        sec->cb = cb;
+        sec->addr.net = net;
+        sec->addr.station = stn;
+        return sock_queue_rcv_skb(sk, skb);
+}
+#endif
+#ifdef CONFIG_ECONET_AUNUDP
+/*
+ *      Send an AUN protocol response.
+ */
+static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb)
+{
+        struct sockaddr_in sin = {
+                .sin_family = AF_INET,
+                .sin_port = htons(AUN_PORT),
+                .sin_addr = {.s_addr = addr}
+        };
+        struct aunhdr ah = {.code = code, .cb = cb, .handle = seq};
+        struct kvec iov = {.iov_base = (void *)&ah, .iov_len = sizeof(ah)};
+        struct msghdr udpmsg;
+        udpmsg.msg_name = (void *)&sin;
+        udpmsg.msg_namelen = sizeof(sin);
+        udpmsg.msg_control = NULL;
+        udpmsg.msg_controllen = 0;
+        udpmsg.msg_flags = 0;
+        kernel_sendmsg(udpsock, &udpmsg, &iov, 1, sizeof(ah));
+}
+/*
+ *      Handle incoming AUN packets.  Work out if anybody wants them,
+ *      and send positive or negative acknowledgements as appropriate.
+ */
+static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
+{
+        struct iphdr *ip = ip_hdr(skb);
+        unsigned char stn = ntohl(ip->saddr) & 0xff;
+        struct dst_entry *dst = skb_dst(skb);
+        struct ec_device *edev = NULL;
+        struct sock *sk = NULL;
+        struct sk_buff *newskb;
+        if (dst)
+                edev = dst->dev->ec_ptr;
+        if (!edev)
+                goto bad;
+        sk = ec_listening_socket(ah->port, stn, edev->net);
+        if (sk == NULL)
+                goto bad;               /* Nobody wants it */
+        newskb = alloc_skb((len - sizeof(struct aunhdr) + 15) & ~15,
+                           GFP_ATOMIC);
+        if (newskb == NULL) {
+                pr_debug("AUN: memory squeeze, dropping packet\n");
+                /* Send nack and hope sender tries again */
+                goto bad;
+        }
+        memcpy(skb_put(newskb, len - sizeof(struct aunhdr)), (void *)(ah + 1),
+               len - sizeof(struct aunhdr));
+        if (ec_queue_packet(sk, newskb, stn, edev->net, ah->cb, ah->port)) {
+                /* Socket is bankrupt. */
+                kfree_skb(newskb);
+                goto bad;
+        }
+        aun_send_response(ip->saddr, ah->handle, 3, 0);
+        sock_put(sk);
+        return;
+bad:
+        aun_send_response(ip->saddr, ah->handle, 4, 0);
+        if (sk)
+                sock_put(sk);
+}
+/*
+ *      Handle incoming AUN transmit acknowledgements.  If the sequence
+ *      number matches something in our backlog then kill it and tell
+ *      the user.  If the remote took too long to reply then we may have
+ *      dropped the packet already.
+ */
+static void aun_tx_ack(unsigned long seq, int result)
+{
+        struct sk_buff *skb;
+        unsigned long flags;
+        struct ec_cb *eb;
+        spin_lock_irqsave(&aun_queue_lock, flags);
+        skb_queue_walk(&aun_queue, skb) {
+                eb = (struct ec_cb *)&skb->cb;
+                if (eb->seq == seq)
+                        goto foundit;
+        }
+        spin_unlock_irqrestore(&aun_queue_lock, flags);
+        pr_debug("AUN: unknown sequence %ld\n", seq);
+        return;
+foundit:
+        tx_result(skb->sk, eb->cookie, result);
+        skb_unlink(skb, &aun_queue);
+        spin_unlock_irqrestore(&aun_queue_lock, flags);
+        kfree_skb(skb);
+}
+/*
+ *      Deal with received AUN frames - sort out what type of thing it is
+ *      and hand it to the right function.
+ */
+static void aun_data_available(struct sock *sk, int slen)
+{
+        int err;
+        struct sk_buff *skb;
+        unsigned char *data;
+        struct aunhdr *ah;
+        size_t len;
+        while ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) {
+                if (err == -EAGAIN) {
+                        pr_err("AUN: no data available?!\n");
+                        return;
+                }
+                pr_debug("AUN: recvfrom() error %d\n", -err);
+        }
+        data = skb_transport_header(skb) + sizeof(struct udphdr);
+        ah = (struct aunhdr *)data;
+        len = skb->len - sizeof(struct udphdr);
+        switch (ah->code) {
+        case 2:
+                aun_incoming(skb, ah, len);
+                break;
+        case 3:
+                aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_OK);
+                break;
+        case 4:
+                aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_NOT_LISTENING);
+                break;
+        default:
+                pr_debug("AUN: unknown packet type: %d\n", data[0]);
+        }
+        skb_free_datagram(sk, skb);
+}
+/*
+ *      Called by the timer to manage the AUN transmit queue.  If a packet
+ *      was sent to a dead or nonexistent host then we will never get an
+ *      acknowledgement back.  After a few seconds we need to spot this and
+ *      drop the packet.
+ */
+static void ab_cleanup(unsigned long h)
+{
+        struct sk_buff *skb, *n;
+        unsigned long flags;
+        spin_lock_irqsave(&aun_queue_lock, flags);
+        skb_queue_walk_safe(&aun_queue, skb, n) {
+                struct ec_cb *eb = (struct ec_cb *)&skb->cb;
+                if ((jiffies - eb->start) > eb->timeout) {
+                        tx_result(skb->sk, eb->cookie,
+                                  ECTYPE_TRANSMIT_NOT_PRESENT);
+                        skb_unlink(skb, &aun_queue);
+                        kfree_skb(skb);
+                }
+        }
+        spin_unlock_irqrestore(&aun_queue_lock, flags);
+        mod_timer(&ab_cleanup_timer, jiffies + (HZ * 2));
+}
+static int __init aun_udp_initialise(void)
+{
+        int error;
+        struct sockaddr_in sin;
+        skb_queue_head_init(&aun_queue);
+        setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
+        ab_cleanup_timer.expires = jiffies + (HZ * 2);
+        add_timer(&ab_cleanup_timer);
+        memset(&sin, 0, sizeof(sin));
+        sin.sin_port = htons(AUN_PORT);
+        /* We can count ourselves lucky Acorn machines are too dim to
+           speak IPv6. :-) */
+        error = sock_create_kern(PF_INET, SOCK_DGRAM, 0, &udpsock);
+        if (error < 0) {
+                pr_err("AUN: socket error %d\n", -error);
+                return error;
+        }
+        udpsock->sk->sk_reuse = 1;
+        udpsock->sk->sk_allocation = GFP_ATOMIC; /* we're going to call it
+                                                    from interrupts */
+        error = udpsock->ops->bind(udpsock, (struct sockaddr *)&sin,
+                                   sizeof(sin));
+        if (error < 0) {
+                pr_err("AUN: bind error %d\n", -error);
+                goto release;
+        }
+        udpsock->sk->sk_data_ready = aun_data_available;
+        return 0;
+release:
+        sock_release(udpsock);
+        udpsock = NULL;
+        return error;
+}
+#endif
+#ifdef CONFIG_ECONET_NATIVE
+/*
+ *      Receive an Econet frame from a device.
+ */
+static int econet_rcv(struct sk_buff *skb, struct net_device *dev,
+                      struct packet_type *pt, struct net_device *orig_dev)
+{
+        struct ec_framehdr *hdr;
+        struct sock *sk = NULL;
+        struct ec_device *edev = dev->ec_ptr;
+        if (!net_eq(dev_net(dev), &init_net))
+                goto drop;
+        if (skb->pkt_type == PACKET_OTHERHOST)
+                goto drop;
+        if (!edev)
+                goto drop;
+        skb = skb_share_check(skb, GFP_ATOMIC);
+        if (skb == NULL)
+                return NET_RX_DROP;
+        if (!pskb_may_pull(skb, sizeof(struct ec_framehdr)))
+                goto drop;
+        hdr = (struct ec_framehdr *)skb->data;
+        /* First check for encapsulated IP */
+        if (hdr->port == EC_PORT_IP) {
+                skb->protocol = htons(ETH_P_IP);
+                skb_pull(skb, sizeof(struct ec_framehdr));
+                netif_rx(skb);
+                return NET_RX_SUCCESS;
+        }
+        sk = ec_listening_socket(hdr->port, hdr->src_stn, hdr->src_net);
+        if (!sk)
+                goto drop;
+        if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb,
+                            hdr->port))
+                goto drop;
+        sock_put(sk);
+        return NET_RX_SUCCESS;
+drop:
+        if (sk)
+                sock_put(sk);
+        kfree_skb(skb);
+        return NET_RX_DROP;
+}
+static struct packet_type econet_packet_type __read_mostly = {
+        .type = cpu_to_be16(ETH_P_ECONET),
+        .func = econet_rcv,
+};
+static void econet_hw_initialise(void)
+{
+        dev_add_pack(&econet_packet_type);
+}
+#endif
+static int econet_notifier(struct notifier_block *this, unsigned long msg,
+                           void *data)
+{
+        struct net_device *dev = data;
+        struct ec_device *edev;
+        if (!net_eq(dev_net(dev), &init_net))
+                return NOTIFY_DONE;
+        switch (msg) {
+        case NETDEV_UNREGISTER:
+                /* A device has gone down - kill any data we hold for it. */
+                edev = dev->ec_ptr;
+                if (edev) {
+                        if (net2dev_map[0] == dev)
+                                net2dev_map[0] = NULL;
+                        net2dev_map[edev->net] = NULL;
+                        kfree(edev);
+                        dev->ec_ptr = NULL;
+                }
+                break;
+        }
+        return NOTIFY_DONE;
+}
+static struct notifier_block econet_netdev_notifier = {
+        .notifier_call = econet_notifier,
+};
+static void __exit econet_proto_exit(void)
+{
+#ifdef CONFIG_ECONET_AUNUDP
+        del_timer(&ab_cleanup_timer);
+        if (udpsock)
+                sock_release(udpsock);
+#endif
+        unregister_netdevice_notifier(&econet_netdev_notifier);
+#ifdef CONFIG_ECONET_NATIVE
+        dev_remove_pack(&econet_packet_type);
+#endif
+        sock_unregister(econet_family_ops.family);
+        proto_unregister(&econet_proto);
+}
+static int __init econet_proto_init(void)
+{
+        int err = proto_register(&econet_proto, 0);
+        if (err != 0)
+                goto out;
+        sock_register(&econet_family_ops);
+#ifdef CONFIG_ECONET_AUNUDP
+        aun_udp_initialise();
+#endif
+#ifdef CONFIG_ECONET_NATIVE
+        econet_hw_initialise();
+#endif
+        register_netdevice_notifier(&econet_netdev_notifier);
+out:
+        return err;
+}
+module_init(econet_proto_init);
+module_exit(econet_proto_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_ECONET);
diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c
new file mode 100644
index 00000000000..85d574addbc
--- /dev/null
+++ b/net/ethernet/pe2.c
@@ -0,0 +1,37 @@
+#include <linux/in.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/datalink.h>
+static int pEII_request(struct datalink_proto *dl,
+                        struct sk_buff *skb, unsigned char *dest_node)
+{
+        struct net_device *dev = skb->dev;
+        skb->protocol = htons(ETH_P_IPX);
+        dev_hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len);
+        return dev_queue_xmit(skb);
+}
+struct datalink_proto *make_EII_client(void)
+{
+        struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
+        if (proto) {
+                proto->header_length = 0;
+                proto->request = pEII_request;
+        }
+        return proto;
+}
+EXPORT_SYMBOL(make_EII_client);
+void destroy_EII_client(struct datalink_proto *dl)
+{
+        kfree(dl);
+}
+EXPORT_SYMBOL(destroy_EII_client);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
new file mode 100644
index 00000000000..e59aabd0eae
--- /dev/null
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -0,0 +1,637 @@
+/*
+ * This is a module which is used for queueing IPv4 packets and
+ * communicating with userspace via netlink.
+ *
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4/ip_queue.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/security.h>
+#include <linux/net.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <net/netfilter/nf_queue.h>
+#include <net/ip.h>
+#define IPQ_QMAX_DEFAULT 1024
+#define IPQ_PROC_FS_NAME "ip_queue"
+#define NET_IPQ_QMAX 2088
+#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
+typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
+static DEFINE_SPINLOCK(queue_lock);
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
+static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
+static struct sock *ipqnl __read_mostly;
+static LIST_HEAD(queue_list);
+static DEFINE_MUTEX(ipqnl_mutex);
+static inline void
+__ipq_enqueue_entry(struct nf_queue_entry *entry)
+{
+       list_add_tail(&entry->list, &queue_list);
+       queue_total++;
+}
+static inline int
+__ipq_set_mode(unsigned char mode, unsigned int range)
+{
+        int status = 0;
+        switch(mode) {
+        case IPQ_COPY_NONE:
+        case IPQ_COPY_META:
+                copy_mode = mode;
+                copy_range = 0;
+                break;
+        case IPQ_COPY_PACKET:
+                if (range > 0xFFFF)
+                        range = 0xFFFF;
+                copy_range = range;
+                copy_mode = mode;
+                break;
+        default:
+                status = -EINVAL;
+        }
+        return status;
+}
+static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
+static inline void
+__ipq_reset(void)
+{
+        peer_pid = 0;
+        net_disable_timestamp();
+        __ipq_set_mode(IPQ_COPY_NONE, 0);
+        __ipq_flush(NULL, 0);
+}
+static struct nf_queue_entry *
+ipq_find_dequeue_entry(unsigned long id)
+{
+        struct nf_queue_entry *entry = NULL, *i;
+        spin_lock_bh(&queue_lock);
+        list_for_each_entry(i, &queue_list, list) {
+                if ((unsigned long)i == id) {
+                        entry = i;
+                        break;
+                }
+        }
+        if (entry) {
+                list_del(&entry->list);
+                queue_total--;
+        }
+        spin_unlock_bh(&queue_lock);
+        return entry;
+}
+static void
+__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+        struct nf_queue_entry *entry, *next;
+        list_for_each_entry_safe(entry, next, &queue_list, list) {
+                if (!cmpfn || cmpfn(entry, data)) {
+                        list_del(&entry->list);
+                        queue_total--;
+                        nf_reinject(entry, NF_DROP);
+                }
+        }
+}
+static void
+ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+        spin_lock_bh(&queue_lock);
+        __ipq_flush(cmpfn, data);
+        spin_unlock_bh(&queue_lock);
+}
+static struct sk_buff *
+ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
+{
+        sk_buff_data_t old_tail;
+        size_t size = 0;
+        size_t data_len = 0;
+        struct sk_buff *skb;
+        struct ipq_packet_msg *pmsg;
+        struct nlmsghdr *nlh;
+        struct timeval tv;
+        switch (ACCESS_ONCE(copy_mode)) {
+        case IPQ_COPY_META:
+        case IPQ_COPY_NONE:
+                size = NLMSG_SPACE(sizeof(*pmsg));
+                break;
+        case IPQ_COPY_PACKET:
+                if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
+                    (*errp = skb_checksum_help(entry->skb)))
+                        return NULL;
+                data_len = ACCESS_ONCE(copy_range);
+                if (data_len == 0 || data_len > entry->skb->len)
+                        data_len = entry->skb->len;
+                size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
+                break;
+        default:
+                *errp = -EINVAL;
+                return NULL;
+        }
+        skb = alloc_skb(size, GFP_ATOMIC);
+        if (!skb)
+                goto nlmsg_failure;
+        old_tail = skb->tail;
+        nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
+        pmsg = NLMSG_DATA(nlh);
+        memset(pmsg, 0, sizeof(*pmsg));
+        pmsg->packet_id       = (unsigned long )entry;
+        pmsg->data_len        = data_len;
+        tv = ktime_to_timeval(entry->skb->tstamp);
+        pmsg->timestamp_sec   = tv.tv_sec;
+        pmsg->timestamp_usec  = tv.tv_usec;
+        pmsg->mark            = entry->skb->mark;
+        pmsg->hook            = entry->hook;
+        pmsg->hw_protocol     = entry->skb->protocol;
+        if (entry->indev)
+                strcpy(pmsg->indev_name, entry->indev->name);
+        else
+                pmsg->indev_name[0] = '\0';
+        if (entry->outdev)
+                strcpy(pmsg->outdev_name, entry->outdev->name);
+        else
+                pmsg->outdev_name[0] = '\0';
+        if (entry->indev && entry->skb->dev &&
+            entry->skb->mac_header != entry->skb->network_header) {
+                pmsg->hw_type = entry->skb->dev->type;
+                pmsg->hw_addrlen = dev_parse_header(entry->skb,
+                                                    pmsg->hw_addr);
+        }
+        if (data_len)
+                if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
+                        BUG();
+        nlh->nlmsg_len = skb->tail - old_tail;
+        return skb;
+nlmsg_failure:
+        kfree_skb(skb);
+        *errp = -EINVAL;
+        printk(KERN_ERR "ip_queue: error creating packet message\n");
+        return NULL;
+}
+static int
+ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+{
+        int status = -EINVAL;
+        struct sk_buff *nskb;
+        if (copy_mode == IPQ_COPY_NONE)
+                return -EAGAIN;
+        nskb = ipq_build_packet_message(entry, &status);
+        if (nskb == NULL)
+                return status;
+        spin_lock_bh(&queue_lock);
+        if (!peer_pid)
+                goto err_out_free_nskb;
+        if (queue_total >= queue_maxlen) {
+                queue_dropped++;
+                status = -ENOSPC;
+                if (net_ratelimit())
+                          printk (KERN_WARNING "ip_queue: full at %d entries, "
+                                  "dropping packets(s). Dropped: %d\n", queue_total,
+                                  queue_dropped);
+                goto err_out_free_nskb;
+        }
+        /* netlink_unicast will either free the nskb or attach it to a socket */
+        status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
+        if (status < 0) {
+                queue_user_dropped++;
+                goto err_out_unlock;
+        }
+        __ipq_enqueue_entry(entry);
+        spin_unlock_bh(&queue_lock);
+        return status;
+err_out_free_nskb:
+        kfree_skb(nskb);
+err_out_unlock:
+        spin_unlock_bh(&queue_lock);
+        return status;
+}
+static int
+ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
+{
+        int diff;
+        struct iphdr *user_iph = (struct iphdr *)v->payload;
+        struct sk_buff *nskb;
+        if (v->data_len < sizeof(*user_iph))
+                return 0;
+        diff = v->data_len - e->skb->len;
+        if (diff < 0) {
+                if (pskb_trim(e->skb, v->data_len))
+                        return -ENOMEM;
+        } else if (diff > 0) {
+                if (v->data_len > 0xFFFF)
+                        return -EINVAL;
+                if (diff > skb_tailroom(e->skb)) {
+                        nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
+                                               diff, GFP_ATOMIC);
+                        if (!nskb) {
+                                printk(KERN_WARNING "ip_queue: error "
+                                      "in mangle, dropping packet\n");
+                                return -ENOMEM;
+                        }
+                        kfree_skb(e->skb);
+                        e->skb = nskb;
+                }
+                skb_put(e->skb, diff);
+        }
+        if (!skb_make_writable(e->skb, v->data_len))
+                return -ENOMEM;
+        skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
+        e->skb->ip_summed = CHECKSUM_NONE;
+        return 0;
+}
+static int
+ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
+{
+        struct nf_queue_entry *entry;
+        if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
+                return -EINVAL;
+        entry = ipq_find_dequeue_entry(vmsg->id);
+        if (entry == NULL)
+                return -ENOENT;
+        else {
+                int verdict = vmsg->value;
+                if (vmsg->data_len && vmsg->data_len == len)
+                        if (ipq_mangle_ipv4(vmsg, entry) < 0)
+                                verdict = NF_DROP;
+                nf_reinject(entry, verdict);
+                return 0;
+        }
+}
+static int
+ipq_set_mode(unsigned char mode, unsigned int range)
+{
+        int status;
+        spin_lock_bh(&queue_lock);
+        status = __ipq_set_mode(mode, range);
+        spin_unlock_bh(&queue_lock);
+        return status;
+}
+static int
+ipq_receive_peer(struct ipq_peer_msg *pmsg,
+                 unsigned char type, unsigned int len)
+{
+        int status = 0;
+        if (len < sizeof(*pmsg))
+                return -EINVAL;
+        switch (type) {
+        case IPQM_MODE:
+                status = ipq_set_mode(pmsg->msg.mode.value,
+                                      pmsg->msg.mode.range);
+                break;
+        case IPQM_VERDICT:
+                status = ipq_set_verdict(&pmsg->msg.verdict,
+                                         len - sizeof(*pmsg));
+                break;
+        default:
+                status = -EINVAL;
+        }
+        return status;
+}
+static int
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
+{
+        if (entry->indev)
+                if (entry->indev->ifindex == ifindex)
+                        return 1;
+        if (entry->outdev)
+                if (entry->outdev->ifindex == ifindex)
+                        return 1;
+#ifdef CONFIG_BRIDGE_NETFILTER
+        if (entry->skb->nf_bridge) {
+                if (entry->skb->nf_bridge->physindev &&
+                    entry->skb->nf_bridge->physindev->ifindex == ifindex)
+                        return 1;
+                if (entry->skb->nf_bridge->physoutdev &&
+                    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+                        return 1;
+        }
+#endif
+        return 0;
+}
+static void
+ipq_dev_drop(int ifindex)
+{
+        ipq_flush(dev_cmp, ifindex);
+}
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+static inline void
+__ipq_rcv_skb(struct sk_buff *skb)
+{
+        int status, type, pid, flags;
+        unsigned int nlmsglen, skblen;
+        struct nlmsghdr *nlh;
+        skblen = skb->len;
+        if (skblen < sizeof(*nlh))
+                return;
+        nlh = nlmsg_hdr(skb);
+        nlmsglen = nlh->nlmsg_len;
+        if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
+                return;
+        pid = nlh->nlmsg_pid;
+        flags = nlh->nlmsg_flags;
+        if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
+                RCV_SKB_FAIL(-EINVAL);
+        if (flags & MSG_TRUNC)
+                RCV_SKB_FAIL(-ECOMM);
+        type = nlh->nlmsg_type;
+        if (type < NLMSG_NOOP || type >= IPQM_MAX)
+                RCV_SKB_FAIL(-EINVAL);
+        if (type <= IPQM_BASE)
+                return;
+        if (security_netlink_recv(skb, CAP_NET_ADMIN))
+                RCV_SKB_FAIL(-EPERM);
+        spin_lock_bh(&queue_lock);
+        if (peer_pid) {
+                if (peer_pid != pid) {
+                        spin_unlock_bh(&queue_lock);
+                        RCV_SKB_FAIL(-EBUSY);
+                }
+        } else {
+                net_enable_timestamp();
+                peer_pid = pid;
+        }
+        spin_unlock_bh(&queue_lock);
+        status = ipq_receive_peer(NLMSG_DATA(nlh), type,
+                                  nlmsglen - NLMSG_LENGTH(0));
+        if (status < 0)
+                RCV_SKB_FAIL(status);
+        if (flags & NLM_F_ACK)
+                netlink_ack(skb, nlh, 0);
+}
+static void
+ipq_rcv_skb(struct sk_buff *skb)
+{
+        mutex_lock(&ipqnl_mutex);
+        __ipq_rcv_skb(skb);
+        mutex_unlock(&ipqnl_mutex);
+}
+static int
+ipq_rcv_dev_event(struct notifier_block *this,
+                  unsigned long event, void *ptr)
+{
+        struct net_device *dev = ptr;
+        if (!net_eq(dev_net(dev), &init_net))
+                return NOTIFY_DONE;
+        /* Drop any packets associated with the downed device */
+        if (event == NETDEV_DOWN)
+                ipq_dev_drop(dev->ifindex);
+        return NOTIFY_DONE;
+}
+static struct notifier_block ipq_dev_notifier = {
+        .notifier_call  = ipq_rcv_dev_event,
+};
+static int
+ipq_rcv_nl_event(struct notifier_block *this,
+                 unsigned long event, void *ptr)
+{
+        struct netlink_notify *n = ptr;
+        if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
+                spin_lock_bh(&queue_lock);
+                if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
+                        __ipq_reset();
+                spin_unlock_bh(&queue_lock);
+        }
+        return NOTIFY_DONE;
+}
+static struct notifier_block ipq_nl_notifier = {
+        .notifier_call  = ipq_rcv_nl_event,
+};
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *ipq_sysctl_header;
+static ctl_table ipq_table[] = {
+        {
+                .procname       = NET_IPQ_QMAX_NAME,
+                .data           = &queue_maxlen,
+                .maxlen         = sizeof(queue_maxlen),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec
+        },
+        { }
+};
+#endif
+#ifdef CONFIG_PROC_FS
+static int ip_queue_show(struct seq_file *m, void *v)
+{
+        spin_lock_bh(&queue_lock);
+        seq_printf(m,
+                      "Peer PID          : %d\n"
+                      "Copy mode         : %hu\n"
+                      "Copy range        : %u\n"
+                      "Queue length      : %u\n"
+                      "Queue max. length : %u\n"
+                      "Queue dropped     : %u\n"
+                      "Netlink dropped   : %u\n",
+                      peer_pid,
+                      copy_mode,
+                      copy_range,
+                      queue_total,
+                      queue_maxlen,
+                      queue_dropped,
+                      queue_user_dropped);
+        spin_unlock_bh(&queue_lock);
+        return 0;
+}
+static int ip_queue_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, ip_queue_show, NULL);
+}
+static const struct file_operations ip_queue_proc_fops = {
+        .open           = ip_queue_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+        .owner          = THIS_MODULE,
+};
+#endif
+static const struct nf_queue_handler nfqh = {
+        .name   = "ip_queue",
+        .outfn  = &ipq_enqueue_packet,
+};
+static int __init ip_queue_init(void)
+{
+        int status = -ENOMEM;
+        struct proc_dir_entry *proc __maybe_unused;
+        netlink_register_notifier(&ipq_nl_notifier);
+        ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
+                                      ipq_rcv_skb, NULL, THIS_MODULE);
+        if (ipqnl == NULL) {
+                printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
+                goto cleanup_netlink_notifier;
+        }
+#ifdef CONFIG_PROC_FS
+        proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
+                           &ip_queue_proc_fops);
+        if (!proc) {
+                printk(KERN_ERR "ip_queue: failed to create proc entry\n");
+                goto cleanup_ipqnl;
+        }
+#endif
+        register_netdevice_notifier(&ipq_dev_notifier);
+#ifdef CONFIG_SYSCTL
+        ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
+#endif
+        status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
+        if (status < 0) {
+                printk(KERN_ERR "ip_queue: failed to register queue handler\n");
+                goto cleanup_sysctl;
+        }
+        return status;
+cleanup_sysctl:
+#ifdef CONFIG_SYSCTL
+        unregister_sysctl_table(ipq_sysctl_header);
+#endif
+        unregister_netdevice_notifier(&ipq_dev_notifier);
+        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+cleanup_ipqnl: __maybe_unused
+        netlink_kernel_release(ipqnl);
+        mutex_lock(&ipqnl_mutex);
+        mutex_unlock(&ipqnl_mutex);
+cleanup_netlink_notifier:
+        netlink_unregister_notifier(&ipq_nl_notifier);
+        return status;
+}
+static void __exit ip_queue_fini(void)
+{
+        nf_unregister_queue_handlers(&nfqh);
+        ipq_flush(NULL, 0);
+#ifdef CONFIG_SYSCTL
+        unregister_sysctl_table(ipq_sysctl_header);
+#endif
+        unregister_netdevice_notifier(&ipq_dev_notifier);
+        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+        netlink_kernel_release(ipqnl);
+        mutex_lock(&ipqnl_mutex);
+        mutex_unlock(&ipqnl_mutex);
+        netlink_unregister_notifier(&ipq_nl_notifier);
+}
+MODULE_DESCRIPTION("IPv4 packet queue handler");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
+module_init(ip_queue_init);
+module_exit(ip_queue_fini);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
new file mode 100644
index 00000000000..d76d6c9ed94
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -0,0 +1,516 @@
+/*
+ * This is a module which is used for logging packets.
+ */
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ipt_LOG.h>
+#include <net/netfilter/nf_log.h>
+#include <net/netfilter/xt_log.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
+/* One level of recursion won't kill us */
+static void dump_packet(struct sbuff *m,
+                        const struct nf_loginfo *info,
+                        const struct sk_buff *skb,
+                        unsigned int iphoff)
+{
+        struct iphdr _iph;
+        const struct iphdr *ih;
+        unsigned int logflags;
+        if (info->type == NF_LOG_TYPE_LOG)
+                logflags = info->u.log.logflags;
+        else
+                logflags = NF_LOG_MASK;
+        ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+        if (ih == NULL) {
+                sb_add(m, "TRUNCATED");
+                return;
+        }
+        /* Important fields:
+         * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+        /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+        sb_add(m, "SRC=%pI4 DST=%pI4 ",
+               &ih->saddr, &ih->daddr);
+        /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+        sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+               ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+               ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+        /* Max length: 6 "CE DF MF " */
+        if (ntohs(ih->frag_off) & IP_CE)
+                sb_add(m, "CE ");
+        if (ntohs(ih->frag_off) & IP_DF)
+                sb_add(m, "DF ");
+        if (ntohs(ih->frag_off) & IP_MF)
+                sb_add(m, "MF ");
+        /* Max length: 11 "FRAG:65535 " */
+        if (ntohs(ih->frag_off) & IP_OFFSET)
+                sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+        if ((logflags & IPT_LOG_IPOPT) &&
+            ih->ihl * 4 > sizeof(struct iphdr)) {
+                const unsigned char *op;
+                unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
+                unsigned int i, optsize;
+                optsize = ih->ihl * 4 - sizeof(struct iphdr);
+                op = skb_header_pointer(skb, iphoff+sizeof(_iph),
+                                        optsize, _opt);
+                if (op == NULL) {
+                        sb_add(m, "TRUNCATED");
+                        return;
+                }
+                /* Max length: 127 "OPT (" 15*4*2chars ") " */
+                sb_add(m, "OPT (");
+                for (i = 0; i < optsize; i++)
+                        sb_add(m, "%02X", op[i]);
+                sb_add(m, ") ");
+        }
+        switch (ih->protocol) {
+        case IPPROTO_TCP: {
+                struct tcphdr _tcph;
+                const struct tcphdr *th;
+                /* Max length: 10 "PROTO=TCP " */
+                sb_add(m, "PROTO=TCP ");
+                if (ntohs(ih->frag_off) & IP_OFFSET)
+                        break;
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+                                        sizeof(_tcph), &_tcph);
+                if (th == NULL) {
+                        sb_add(m, "INCOMPLETE [%u bytes] ",
+                               skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                /* Max length: 20 "SPT=65535 DPT=65535 " */
+                sb_add(m, "SPT=%u DPT=%u ",
+                       ntohs(th->source), ntohs(th->dest));
+                /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+                if (logflags & IPT_LOG_TCPSEQ)
+                        sb_add(m, "SEQ=%u ACK=%u ",
+                               ntohl(th->seq), ntohl(th->ack_seq));
+                /* Max length: 13 "WINDOW=65535 " */
+                sb_add(m, "WINDOW=%u ", ntohs(th->window));
+                /* Max length: 9 "RES=0x3F " */
+                sb_add(m, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+                /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+                if (th->cwr)
+                        sb_add(m, "CWR ");
+                if (th->ece)
+                        sb_add(m, "ECE ");
+                if (th->urg)
+                        sb_add(m, "URG ");
+                if (th->ack)
+                        sb_add(m, "ACK ");
+                if (th->psh)
+                        sb_add(m, "PSH ");
+                if (th->rst)
+                        sb_add(m, "RST ");
+                if (th->syn)
+                        sb_add(m, "SYN ");
+                if (th->fin)
+                        sb_add(m, "FIN ");
+                /* Max length: 11 "URGP=65535 " */
+                sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
+                if ((logflags & IPT_LOG_TCPOPT) &&
+                    th->doff * 4 > sizeof(struct tcphdr)) {
+                        unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
+                        const unsigned char *op;
+                        unsigned int i, optsize;
+                        optsize = th->doff * 4 - sizeof(struct tcphdr);
+                        op = skb_header_pointer(skb,
+                                                iphoff+ih->ihl*4+sizeof(_tcph),
+                                                optsize, _opt);
+                        if (op == NULL) {
+                                sb_add(m, "TRUNCATED");
+                                return;
+                        }
+                        /* Max length: 127 "OPT (" 15*4*2chars ") " */
+                        sb_add(m, "OPT (");
+                        for (i = 0; i < optsize; i++)
+                                sb_add(m, "%02X", op[i]);
+                        sb_add(m, ") ");
+                }
+                break;
+        }
+        case IPPROTO_UDP:
+        case IPPROTO_UDPLITE: {
+                struct udphdr _udph;
+                const struct udphdr *uh;
+                if (ih->protocol == IPPROTO_UDP)
+                        /* Max length: 10 "PROTO=UDP "     */
+                        sb_add(m, "PROTO=UDP " );
+                else    /* Max length: 14 "PROTO=UDPLITE " */
+                        sb_add(m, "PROTO=UDPLITE ");
+                if (ntohs(ih->frag_off) & IP_OFFSET)
+                        break;
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+                                        sizeof(_udph), &_udph);
+                if (uh == NULL) {
+                        sb_add(m, "INCOMPLETE [%u bytes] ",
+                               skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                /* Max length: 20 "SPT=65535 DPT=65535 " */
+                sb_add(m, "SPT=%u DPT=%u LEN=%u ",
+                       ntohs(uh->source), ntohs(uh->dest),
+                       ntohs(uh->len));
+                break;
+        }
+        case IPPROTO_ICMP: {
+                struct icmphdr _icmph;
+                const struct icmphdr *ich;
+                static const size_t required_len[NR_ICMP_TYPES+1]
+                        = { [ICMP_ECHOREPLY] = 4,
+                            [ICMP_DEST_UNREACH]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_SOURCE_QUENCH]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_REDIRECT]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_ECHO] = 4,
+                            [ICMP_TIME_EXCEEDED]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_PARAMETERPROB]
+                            = 8 + sizeof(struct iphdr),
+                            [ICMP_TIMESTAMP] = 20,
+                            [ICMP_TIMESTAMPREPLY] = 20,
+                            [ICMP_ADDRESS] = 12,
+                            [ICMP_ADDRESSREPLY] = 12 };
+                /* Max length: 11 "PROTO=ICMP " */
+                sb_add(m, "PROTO=ICMP ");
+                if (ntohs(ih->frag_off) & IP_OFFSET)
+                        break;
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+                                         sizeof(_icmph), &_icmph);
+                if (ich == NULL) {
+                        sb_add(m, "INCOMPLETE [%u bytes] ",
+                               skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                /* Max length: 18 "TYPE=255 CODE=255 " */
+                sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                if (ich->type <= NR_ICMP_TYPES &&
+                    required_len[ich->type] &&
+                    skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
+                        sb_add(m, "INCOMPLETE [%u bytes] ",
+                               skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                switch (ich->type) {
+                case ICMP_ECHOREPLY:
+                case ICMP_ECHO:
+                        /* Max length: 19 "ID=65535 SEQ=65535 " */
+                        sb_add(m, "ID=%u SEQ=%u ",
+                               ntohs(ich->un.echo.id),
+                               ntohs(ich->un.echo.sequence));
+                        break;
+                case ICMP_PARAMETERPROB:
+                        /* Max length: 14 "PARAMETER=255 " */
+                        sb_add(m, "PARAMETER=%u ",
+                               ntohl(ich->un.gateway) >> 24);
+                        break;
+                case ICMP_REDIRECT:
+                        /* Max length: 24 "GATEWAY=255.255.255.255 " */
+                        sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
+                        /* Fall through */
+                case ICMP_DEST_UNREACH:
+                case ICMP_SOURCE_QUENCH:
+                case ICMP_TIME_EXCEEDED:
+                        /* Max length: 3+maxlen */
+                        if (!iphoff) { /* Only recurse once. */
+                                sb_add(m, "[");
+                                dump_packet(m, info, skb,
+                                            iphoff + ih->ihl*4+sizeof(_icmph));
+                                sb_add(m, "] ");
+                        }
+                        /* Max length: 10 "MTU=65535 " */
+                        if (ich->type == ICMP_DEST_UNREACH &&
+                            ich->code == ICMP_FRAG_NEEDED)
+                                sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu));
+                }
+                break;
+        }
+        /* Max Length */
+        case IPPROTO_AH: {
+                struct ip_auth_hdr _ahdr;
+                const struct ip_auth_hdr *ah;
+                if (ntohs(ih->frag_off) & IP_OFFSET)
+                        break;
+                /* Max length: 9 "PROTO=AH " */
+                sb_add(m, "PROTO=AH ");
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
+                                        sizeof(_ahdr), &_ahdr);
+                if (ah == NULL) {
+                        sb_add(m, "INCOMPLETE [%u bytes] ",
+                               skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                /* Length: 15 "SPI=0xF1234567 " */
+                sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
+                break;
+        }
+        case IPPROTO_ESP: {
+                struct ip_esp_hdr _esph;
+                const struct ip_esp_hdr *eh;
+                /* Max length: 10 "PROTO=ESP " */
+                sb_add(m, "PROTO=ESP ");
+                if (ntohs(ih->frag_off) & IP_OFFSET)
+                        break;
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+                                        sizeof(_esph), &_esph);
+                if (eh == NULL) {
+                        sb_add(m, "INCOMPLETE [%u bytes] ",
+                               skb->len - iphoff - ih->ihl*4);
+                        break;
+                }
+                /* Length: 15 "SPI=0xF1234567 " */
+                sb_add(m, "SPI=0x%x ", ntohl(eh->spi));
+                break;
+        }
+        /* Max length: 10 "PROTO 255 " */
+        default:
+                sb_add(m, "PROTO=%u ", ih->protocol);
+        }
+        /* Max length: 15 "UID=4294967295 " */
+        if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
+                read_lock_bh(&skb->sk->sk_callback_lock);
+                if (skb->sk->sk_socket && skb->sk->sk_socket->file)
+                        sb_add(m, "UID=%u GID=%u ",
+                                skb->sk->sk_socket->file->f_cred->fsuid,
+                                skb->sk->sk_socket->file->f_cred->fsgid);
+                read_unlock_bh(&skb->sk->sk_callback_lock);
+        }
+        /* Max length: 16 "MARK=0xFFFFFFFF " */
+        if (!iphoff && skb->mark)
+                sb_add(m, "MARK=0x%x ", skb->mark);
+        /* Proto    Max log string length */
+        /* IP:      40+46+6+11+127 = 230 */
+        /* TCP:     10+max(25,20+30+13+9+32+11+127) = 252 */
+        /* UDP:     10+max(25,20) = 35 */
+        /* UDPLITE: 14+max(25,20) = 39 */
+        /* ICMP:    11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+        /* ESP:     10+max(25)+15 = 50 */
+        /* AH:      9+max(25)+15 = 49 */
+        /* unknown: 10 */
+        /* (ICMP allows recursion one level deep) */
+        /* maxlen =  IP + ICMP +  IP + max(TCP,UDP,ICMP,unknown) */
+        /* maxlen = 230+   91  + 230 + 252 = 803 */
+}
+static void dump_mac_header(struct sbuff *m,
+                            const struct nf_loginfo *info,
+                            const struct sk_buff *skb)
+{
+        struct net_device *dev = skb->dev;
+        unsigned int logflags = 0;
+        if (info->type == NF_LOG_TYPE_LOG)
+                logflags = info->u.log.logflags;
+        if (!(logflags & IPT_LOG_MACDECODE))
+                goto fallback;
+        switch (dev->type) {
+        case ARPHRD_ETHER:
+                sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+                       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+                       ntohs(eth_hdr(skb)->h_proto));
+                return;
+        default:
+                break;
+        }
+fallback:
+        sb_add(m, "MAC=");
+        if (dev->hard_header_len &&
+            skb->mac_header != skb->network_header) {
+                const unsigned char *p = skb_mac_header(skb);
+                unsigned int i;
+                sb_add(m, "%02x", *p++);
+                for (i = 1; i < dev->hard_header_len; i++, p++)
+                        sb_add(m, ":%02x", *p);
+        }
+        sb_add(m, " ");
+}
+static struct nf_loginfo default_loginfo = {
+        .type   = NF_LOG_TYPE_LOG,
+        .u = {
+                .log = {
+                        .level    = 5,
+                        .logflags = NF_LOG_MASK,
+                },
+        },
+};
+static void
+ipt_log_packet(u_int8_t pf,
+               unsigned int hooknum,
+               const struct sk_buff *skb,
+               const struct net_device *in,
+               const struct net_device *out,
+               const struct nf_loginfo *loginfo,
+               const char *prefix)
+{
+        struct sbuff *m = sb_open();
+        if (!loginfo)
+                loginfo = &default_loginfo;
+        sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
+               prefix,
+               in ? in->name : "",
+               out ? out->name : "");
+#ifdef CONFIG_BRIDGE_NETFILTER
+        if (skb->nf_bridge) {
+                const struct net_device *physindev;
+                const struct net_device *physoutdev;
+                physindev = skb->nf_bridge->physindev;
+                if (physindev && in != physindev)
+                        sb_add(m, "PHYSIN=%s ", physindev->name);
+                physoutdev = skb->nf_bridge->physoutdev;
+                if (physoutdev && out != physoutdev)
+                        sb_add(m, "PHYSOUT=%s ", physoutdev->name);
+        }
+#endif
+        if (in != NULL)
+                dump_mac_header(m, loginfo, skb);
+        dump_packet(m, loginfo, skb, 0);
+        sb_close(m);
+}
+static unsigned int
+log_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+        const struct ipt_log_info *loginfo = par->targinfo;
+        struct nf_loginfo li;
+        li.type = NF_LOG_TYPE_LOG;
+        li.u.log.level = loginfo->level;
+        li.u.log.logflags = loginfo->logflags;
+        ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li,
+                       loginfo->prefix);
+        return XT_CONTINUE;
+}
+static int log_tg_check(const struct xt_tgchk_param *par)
+{
+        const struct ipt_log_info *loginfo = par->targinfo;
+        if (loginfo->level >= 8) {
+                pr_debug("level %u >= 8\n", loginfo->level);
+                return -EINVAL;
+        }
+        if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
+                pr_debug("prefix is not null-terminated\n");
+                return -EINVAL;
+        }
+        return 0;
+}
+static struct xt_target log_tg_reg __read_mostly = {
+        .name           = "LOG",
+        .family         = NFPROTO_IPV4,
+        .target         = log_tg,
+        .targetsize     = sizeof(struct ipt_log_info),
+        .checkentry     = log_tg_check,
+        .me             = THIS_MODULE,
+};
+static struct nf_logger ipt_log_logger __read_mostly = {
+        .name           = "ipt_LOG",
+        .logfn          = &ipt_log_packet,
+        .me             = THIS_MODULE,
+};
+static int __init log_tg_init(void)
+{
+        int ret;
+        ret = xt_register_target(&log_tg_reg);
+        if (ret < 0)
+                return ret;
+        nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
+        return 0;
+}
+static void __exit log_tg_exit(void)
+{
+        nf_log_unregister(&ipt_log_logger);
+        xt_unregister_target(&log_tg_reg);
+}
+module_init(log_tg_init);
+module_exit(log_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
new file mode 100644
index 00000000000..6cdb298f103
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -0,0 +1,98 @@
+/* NETMAP - static NAT mapping of IP network addresses (1:1).
+ * The mapping can be applied to source (POSTROUTING),
+ * destination (PREROUTING), or both (with separate rules).
+ */
+/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat_rule.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
+MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
+static int netmap_tg_check(const struct xt_tgchk_param *par)
+{
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
+        if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
+                pr_debug("bad MAP_IPS.\n");
+                return -EINVAL;
+        }
+        if (mr->rangesize != 1) {
+                pr_debug("bad rangesize %u.\n", mr->rangesize);
+                return -EINVAL;
+        }
+        return 0;
+}
+static unsigned int
+netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+        struct nf_conn *ct;
+        enum ip_conntrack_info ctinfo;
+        __be32 new_ip, netmask;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
+        struct nf_nat_range newrange;
+        NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+                     par->hooknum == NF_INET_POST_ROUTING ||
+                     par->hooknum == NF_INET_LOCAL_OUT ||
+                     par->hooknum == NF_INET_LOCAL_IN);
+        ct = nf_ct_get(skb, &ctinfo);
+        netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
+        if (par->hooknum == NF_INET_PRE_ROUTING ||
+            par->hooknum == NF_INET_LOCAL_OUT)
+                new_ip = ip_hdr(skb)->daddr & ~netmask;
+        else
+                new_ip = ip_hdr(skb)->saddr & ~netmask;
+        new_ip |= mr->range[0].min_ip & netmask;
+        newrange = ((struct nf_nat_range)
+                { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+                  new_ip, new_ip,
+                  mr->range[0].min, mr->range[0].max });
+        /* Hand modified range to generic setup. */
+        return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+}
+static struct xt_target netmap_tg_reg __read_mostly = {
+        .name           = "NETMAP",
+        .family         = NFPROTO_IPV4,
+        .target         = netmap_tg,
+        .targetsize     = sizeof(struct nf_nat_multi_range_compat),
+        .table          = "nat",
+        .hooks          = (1 << NF_INET_PRE_ROUTING) |
+                          (1 << NF_INET_POST_ROUTING) |
+                          (1 << NF_INET_LOCAL_OUT) |
+                          (1 << NF_INET_LOCAL_IN),
+        .checkentry     = netmap_tg_check,
+        .me             = THIS_MODULE
+};
+static int __init netmap_tg_init(void)
+{
+        return xt_register_target(&netmap_tg_reg);
+}
+static void __exit netmap_tg_exit(void)
+{
+        xt_unregister_target(&netmap_tg_reg);
+}
+module_init(netmap_tg_init);
+module_exit(netmap_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
new file mode 100644
index 00000000000..18a0656505a
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -0,0 +1,110 @@
+/* Redirect.  Simple mapping which alters dst to a local IP address. */
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <net/protocol.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat_rule.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
+/* FIXME: Take multiple ranges --RR */
+static int redirect_tg_check(const struct xt_tgchk_param *par)
+{
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
+        if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+                pr_debug("bad MAP_IPS.\n");
+                return -EINVAL;
+        }
+        if (mr->rangesize != 1) {
+                pr_debug("bad rangesize %u.\n", mr->rangesize);
+                return -EINVAL;
+        }
+        return 0;
+}
+static unsigned int
+redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+        struct nf_conn *ct;
+        enum ip_conntrack_info ctinfo;
+        __be32 newdst;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
+        struct nf_nat_range newrange;
+        NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+                     par->hooknum == NF_INET_LOCAL_OUT);
+        ct = nf_ct_get(skb, &ctinfo);
+        NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+        /* Local packets: make them go to loopback */
+        if (par->hooknum == NF_INET_LOCAL_OUT)
+                newdst = htonl(0x7F000001);
+        else {
+                struct in_device *indev;
+                struct in_ifaddr *ifa;
+                newdst = 0;
+                rcu_read_lock();
+                indev = __in_dev_get_rcu(skb->dev);
+                if (indev && (ifa = indev->ifa_list))
+                        newdst = ifa->ifa_local;
+                rcu_read_unlock();
+                if (!newdst)
+                        return NF_DROP;
+        }
+        /* Transfer from original range. */
+        newrange = ((struct nf_nat_range)
+                { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+                  newdst, newdst,
+                  mr->range[0].min, mr->range[0].max });
+        /* Hand modified range to generic setup. */
+        return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
+}
+static struct xt_target redirect_tg_reg __read_mostly = {
+        .name           = "REDIRECT",
+        .family         = NFPROTO_IPV4,
+        .target         = redirect_tg,
+        .targetsize     = sizeof(struct nf_nat_multi_range_compat),
+        .table          = "nat",
+        .hooks          = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
+        .checkentry     = redirect_tg_check,
+        .me             = THIS_MODULE,
+};
+static int __init redirect_tg_init(void)
+{
+        return xt_register_target(&redirect_tg_reg);
+}
+static void __exit redirect_tg_exit(void)
+{
+        xt_unregister_target(&redirect_tg_reg);
+}
+module_init(redirect_tg_init);
+module_exit(redirect_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
new file mode 100644
index 00000000000..2b57e52c746
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -0,0 +1,127 @@
+/* IP tables module for matching the value of the IPv4 and TCP ECN bits
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ecn.h>
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
+MODULE_LICENSE("GPL");
+static inline bool match_ip(const struct sk_buff *skb,
+                            const struct ipt_ecn_info *einfo)
+{
+        return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
+               !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
+}
+static inline bool match_tcp(const struct sk_buff *skb,
+                             const struct ipt_ecn_info *einfo,
+                             bool *hotdrop)
+{
+        struct tcphdr _tcph;
+        const struct tcphdr *th;
+        /* In practice, TCP match does this, so can't fail.  But let's
+         * be good citizens.
+         */
+        th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+        if (th == NULL) {
+                *hotdrop = false;
+                return false;
+        }
+        if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
+                if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
+                        if (th->ece == 1)
+                                return false;
+                } else {
+                        if (th->ece == 0)
+                                return false;
+                }
+        }
+        if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
+                if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
+                        if (th->cwr == 1)
+                                return false;
+                } else {
+                        if (th->cwr == 0)
+                                return false;
+                }
+        }
+        return true;
+}
+static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+        const struct ipt_ecn_info *info = par->matchinfo;
+        if (info->operation & IPT_ECN_OP_MATCH_IP)
+                if (!match_ip(skb, info))
+                        return false;
+        if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
+                if (!match_tcp(skb, info, &par->hotdrop))
+                        return false;
+        }
+        return true;
+}
+static int ecn_mt_check(const struct xt_mtchk_param *par)
+{
+        const struct ipt_ecn_info *info = par->matchinfo;
+        const struct ipt_ip *ip = par->entryinfo;
+        if (info->operation & IPT_ECN_OP_MATCH_MASK)
+                return -EINVAL;
+        if (info->invert & IPT_ECN_OP_MATCH_MASK)
+                return -EINVAL;
+        if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
+            (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
+                pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+                return -EINVAL;
+        }
+        return 0;
+}
+static struct xt_match ecn_mt_reg __read_mostly = {
+        .name           = "ecn",
+        .family         = NFPROTO_IPV4,
+        .match          = ecn_mt,
+        .matchsize      = sizeof(struct ipt_ecn_info),
+        .checkentry     = ecn_mt_check,
+        .me             = THIS_MODULE,
+};
+static int __init ecn_mt_init(void)
+{
+        return xt_register_match(&ecn_mt_reg);
+}
+static void __exit ecn_mt_exit(void)
+{
+        xt_unregister_match(&ecn_mt_reg);
+}
+module_init(ecn_mt_init);
+module_exit(ecn_mt_exit);
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
new file mode 100644
index 00000000000..703f366fd23
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -0,0 +1,85 @@
+/* Amanda extension for TCP NAT alteration.
+ * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
+ * based on a copy of HW's ip_nat_irc.c as well as other modules
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_amanda.h>
+MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
+MODULE_DESCRIPTION("Amanda NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_amanda");
+static unsigned int help(struct sk_buff *skb,
+                         enum ip_conntrack_info ctinfo,
+                         unsigned int matchoff,
+                         unsigned int matchlen,
+                         struct nf_conntrack_expect *exp)
+{
+        char buffer[sizeof("65535")];
+        u_int16_t port;
+        unsigned int ret;
+        /* Connection comes from client. */
+        exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+        exp->dir = IP_CT_DIR_ORIGINAL;
+        /* When you see the packet, we need to NAT it the same as the
+         * this one (ie. same IP: it will be TCP and master is UDP). */
+        exp->expectfn = nf_nat_follow_master;
+        /* Try to get same port: if not, try to change it. */
+        for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+                int res;
+                exp->tuple.dst.u.tcp.port = htons(port);
+                res = nf_ct_expect_related(exp);
+                if (res == 0)
+                        break;
+                else if (res != -EBUSY) {
+                        port = 0;
+                        break;
+                }
+        }
+        if (port == 0)
+                return NF_DROP;
+        sprintf(buffer, "%u", port);
+        ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
+                                       matchoff, matchlen,
+                                       buffer, strlen(buffer));
+        if (ret != NF_ACCEPT)
+                nf_ct_unexpect_related(exp);
+        return ret;
+}
+static void __exit nf_nat_amanda_fini(void)
+{
+        rcu_assign_pointer(nf_nat_amanda_hook, NULL);
+        synchronize_rcu();
+}
+static int __init nf_nat_amanda_init(void)
+{
+        BUG_ON(nf_nat_amanda_hook != NULL);
+        rcu_assign_pointer(nf_nat_amanda_hook, help);
+        return 0;
+}
+module_init(nf_nat_amanda_init);
+module_exit(nf_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
new file mode 100644
index 00000000000..3346de5d94d
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -0,0 +1,779 @@
+/* NAT for netfilter; shared with compatibility layer. */
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/gfp.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h>  /* For tcp_prot in getorigdst */
+#include <linux/icmp.h>
+#include <linux/udp.h>
+#include <linux/jhash.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+static DEFINE_SPINLOCK(nf_nat_lock);
+static struct nf_conntrack_l3proto *l3proto __read_mostly;
+#define MAX_IP_NAT_PROTO 256
+static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO]
+                                                __read_mostly;
+static inline const struct nf_nat_protocol *
+__nf_nat_proto_find(u_int8_t protonum)
+{
+        return rcu_dereference(nf_nat_protos[protonum]);
+}
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct net *net, u16 zone,
+            const struct nf_conntrack_tuple *tuple)
+{
+        unsigned int hash;
+        /* Original src, to ensure we map it consistently if poss. */
+        hash = jhash_3words((__force u32)tuple->src.u3.ip,
+                            (__force u32)tuple->src.u.all ^ zone,
+                            tuple->dst.protonum, 0);
+        return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
+}
+/* Is this tuple already taken? (not by us) */
+int
+nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
+                  const struct nf_conn *ignored_conntrack)
+{
+        /* Conntrack tracking doesn't keep track of outgoing tuples; only
+           incoming ones.  NAT means they don't have a fixed mapping,
+           so we invert the tuple and look for the incoming reply.
+           We could keep a separate hash if this proves too slow. */
+        struct nf_conntrack_tuple reply;
+        nf_ct_invert_tuplepr(&reply, tuple);
+        return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
+}
+EXPORT_SYMBOL(nf_nat_used_tuple);
+/* If we source map this tuple so reply looks like reply_tuple, will
+ * that meet the constraints of range. */
+static int
+in_range(const struct nf_conntrack_tuple *tuple,
+         const struct nf_nat_range *range)
+{
+        const struct nf_nat_protocol *proto;
+        int ret = 0;
+        /* If we are supposed to map IPs, then we must be in the
+           range specified, otherwise let this drag us onto a new src IP. */
+        if (range->flags & IP_NAT_RANGE_MAP_IPS) {
+                if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
+                    ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
+                        return 0;
+        }
+        rcu_read_lock();
+        proto = __nf_nat_proto_find(tuple->dst.protonum);
+        if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
+            proto->in_range(tuple, IP_NAT_MANIP_SRC,
+                            &range->min, &range->max))
+                ret = 1;
+        rcu_read_unlock();
+        return ret;
+}
+static inline int
+same_src(const struct nf_conn *ct,
+         const struct nf_conntrack_tuple *tuple)
+{
+        const struct nf_conntrack_tuple *t;
+        t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+        return (t->dst.protonum == tuple->dst.protonum &&
+                t->src.u3.ip == tuple->src.u3.ip &&
+                t->src.u.all == tuple->src.u.all);
+}
+/* Only called for SRC manip */
+static int
+find_appropriate_src(struct net *net, u16 zone,
+                     const struct nf_conntrack_tuple *tuple,
+                     struct nf_conntrack_tuple *result,
+                     const struct nf_nat_range *range)
+{
+        unsigned int h = hash_by_src(net, zone, tuple);
+        const struct nf_conn_nat *nat;
+        const struct nf_conn *ct;
+        const struct hlist_node *n;
+        rcu_read_lock();
+        hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
+                ct = nat->ct;
+                if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
+                        /* Copy source part from reply tuple. */
+                        nf_ct_invert_tuplepr(result,
+                                       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+                        result->dst = tuple->dst;
+                        if (in_range(result, range)) {
+                                rcu_read_unlock();
+                                return 1;
+                        }
+                }
+        }
+        rcu_read_unlock();
+        return 0;
+}
+/* For [FUTURE] fragmentation handling, we want the least-used
+   src-ip/dst-ip/proto triple.  Fairness doesn't come into it.  Thus
+   if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
+   1-65535, we don't do pro-rata allocation based on ports; we choose
+   the ip with the lowest src-ip/dst-ip/proto usage.
+*/
+static void
+find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
+                    const struct nf_nat_range *range,
+                    const struct nf_conn *ct,
+                    enum nf_nat_manip_type maniptype)
+{
+        __be32 *var_ipp;
+        /* Host order */
+        u_int32_t minip, maxip, j;
+        /* No IP mapping?  Do nothing. */
+        if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
+                return;
+        if (maniptype == IP_NAT_MANIP_SRC)
+                var_ipp = &tuple->src.u3.ip;
+        else
+                var_ipp = &tuple->dst.u3.ip;
+        /* Fast path: only one choice. */
+        if (range->min_ip == range->max_ip) {
+                *var_ipp = range->min_ip;
+                return;
+        }
+        /* Hashing source and destination IPs gives a fairly even
+         * spread in practice (if there are a small number of IPs
+         * involved, there usually aren't that many connections
+         * anyway).  The consistency means that servers see the same
+         * client coming from the same IP (some Internet Banking sites
+         * like this), even across reboots. */
+        minip = ntohl(range->min_ip);
+        maxip = ntohl(range->max_ip);
+        j = jhash_2words((__force u32)tuple->src.u3.ip,
+                         range->flags & IP_NAT_RANGE_PERSISTENT ?
+                                0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
+        j = ((u64)j * (maxip - minip + 1)) >> 32;
+        *var_ipp = htonl(minip + j);
+}
+/* Manipulate the tuple into the range given.  For NF_INET_POST_ROUTING,
+ * we change the source to map into the range.  For NF_INET_PRE_ROUTING
+ * and NF_INET_LOCAL_OUT, we change the destination to map into the
+ * range.  It might not be possible to get a unique tuple, but we try.
+ * At worst (or if we race), we will end up with a final duplicate in
+ * __ip_conntrack_confirm and drop the packet. */
+static void
+get_unique_tuple(struct nf_conntrack_tuple *tuple,
+                 const struct nf_conntrack_tuple *orig_tuple,
+                 const struct nf_nat_range *range,
+                 struct nf_conn *ct,
+                 enum nf_nat_manip_type maniptype)
+{
+        struct net *net = nf_ct_net(ct);
+        const struct nf_nat_protocol *proto;
+        u16 zone = nf_ct_zone(ct);
+        /* 1) If this srcip/proto/src-proto-part is currently mapped,
+           and that same mapping gives a unique tuple within the given
+           range, use that.
+           This is only required for source (ie. NAT/masq) mappings.
+           So far, we don't do local source mappings, so multiple
+           manips not an issue.  */
+        if (maniptype == IP_NAT_MANIP_SRC &&
+            !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
+                /* try the original tuple first */
+                if (in_range(orig_tuple, range)) {
+                        if (!nf_nat_used_tuple(orig_tuple, ct)) {
+                                *tuple = *orig_tuple;
+                                return;
+                        }
+                } else if (find_appropriate_src(net, zone, orig_tuple, tuple,
+                           range)) {
+                        pr_debug("get_unique_tuple: Found current src map\n");
+                        if (!nf_nat_used_tuple(tuple, ct))
+                                return;
+                }
+        }
+        /* 2) Select the least-used IP/proto combination in the given
+           range. */
+        *tuple = *orig_tuple;
+        find_best_ips_proto(zone, tuple, range, ct, maniptype);
+        /* 3) The per-protocol part of the manip is made to map into
+           the range to make a unique tuple. */
+        rcu_read_lock();
+        proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
+        /* Only bother mapping if it's not already in range and unique */
+        if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
+                if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
+                        if (proto->in_range(tuple, maniptype, &range->min,
+                                            &range->max) &&
+                            (range->min.all == range->max.all ||
+                             !nf_nat_used_tuple(tuple, ct)))
+                                goto out;
+                } else if (!nf_nat_used_tuple(tuple, ct)) {
+                        goto out;
+                }
+        }
+        /* Last change: get protocol to try to obtain unique tuple. */
+        proto->unique_tuple(tuple, range, maniptype, ct);
+out:
+        rcu_read_unlock();
+}
+unsigned int
+nf_nat_setup_info(struct nf_conn *ct,
+                  const struct nf_nat_range *range,
+                  enum nf_nat_manip_type maniptype)
+{
+        struct net *net = nf_ct_net(ct);
+        struct nf_conntrack_tuple curr_tuple, new_tuple;
+        struct nf_conn_nat *nat;
+        /* nat helper or nfctnetlink also setup binding */
+        nat = nfct_nat(ct);
+        if (!nat) {
+                nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+                if (nat == NULL) {
+                        pr_debug("failed to add NAT extension\n");
+                        return NF_ACCEPT;
+                }
+        }
+        NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
+                     maniptype == IP_NAT_MANIP_DST);
+        BUG_ON(nf_nat_initialized(ct, maniptype));
+        /* What we've got will look like inverse of reply. Normally
+           this is what is in the conntrack, except for prior
+           manipulations (future optimization: if num_manips == 0,
+           orig_tp =
+           conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
+        nf_ct_invert_tuplepr(&curr_tuple,
+                             &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+        get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
+        if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
+                struct nf_conntrack_tuple reply;
+                /* Alter conntrack table so will recognize replies. */
+                nf_ct_invert_tuplepr(&reply, &new_tuple);
+                nf_conntrack_alter_reply(ct, &reply);
+                /* Non-atomic: we own this at the moment. */
+                if (maniptype == IP_NAT_MANIP_SRC)
+                        ct->status |= IPS_SRC_NAT;
+                else
+                        ct->status |= IPS_DST_NAT;
+        }
+        if (maniptype == IP_NAT_MANIP_SRC) {
+                unsigned int srchash;
+                srchash = hash_by_src(net, nf_ct_zone(ct),
+                                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+                spin_lock_bh(&nf_nat_lock);
+                /* nf_conntrack_alter_reply might re-allocate exntension aera */
+                nat = nfct_nat(ct);
+                nat->ct = ct;
+                hlist_add_head_rcu(&nat->bysource,
+                                   &net->ipv4.nat_bysource[srchash]);
+                spin_unlock_bh(&nf_nat_lock);
+        }
+        /* It's done. */
+        if (maniptype == IP_NAT_MANIP_DST)
+                ct->status |= IPS_DST_NAT_DONE;
+        else
+                ct->status |= IPS_SRC_NAT_DONE;
+        return NF_ACCEPT;
+}
+EXPORT_SYMBOL(nf_nat_setup_info);
+/* Returns true if succeeded. */
+static bool
+manip_pkt(u_int16_t proto,
+          struct sk_buff *skb,
+          unsigned int iphdroff,
+          const struct nf_conntrack_tuple *target,
+          enum nf_nat_manip_type maniptype)
+{
+        struct iphdr *iph;
+        const struct nf_nat_protocol *p;
+        if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+                return false;
+        iph = (void *)skb->data + iphdroff;
+        /* Manipulate protcol part. */
+        /* rcu_read_lock()ed by nf_hook_slow */
+        p = __nf_nat_proto_find(proto);
+        if (!p->manip_pkt(skb, iphdroff, target, maniptype))
+                return false;
+        iph = (void *)skb->data + iphdroff;
+        if (maniptype == IP_NAT_MANIP_SRC) {
+                csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+                iph->saddr = target->src.u3.ip;
+        } else {
+                csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+                iph->daddr = target->dst.u3.ip;
+        }
+        return true;
+}
+/* Do packet manipulations according to nf_nat_setup_info. */
+unsigned int nf_nat_packet(struct nf_conn *ct,
+                           enum ip_conntrack_info ctinfo,
+                           unsigned int hooknum,
+                           struct sk_buff *skb)
+{
+        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+        unsigned long statusbit;
+        enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
+        if (mtype == IP_NAT_MANIP_SRC)
+                statusbit = IPS_SRC_NAT;
+        else
+                statusbit = IPS_DST_NAT;
+        /* Invert if this is reply dir. */
+        if (dir == IP_CT_DIR_REPLY)
+                statusbit ^= IPS_NAT_MASK;
+        /* Non-atomic: these bits don't change. */
+        if (ct->status & statusbit) {
+                struct nf_conntrack_tuple target;
+                /* We are aiming to look like inverse of other direction. */
+                nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+                if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
+                        return NF_DROP;
+        }
+        return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_nat_packet);
+/* Dir is direction ICMP is coming from (opposite to packet it contains) */
+int nf_nat_icmp_reply_translation(struct nf_conn *ct,
+                                  enum ip_conntrack_info ctinfo,
+                                  unsigned int hooknum,
+                                  struct sk_buff *skb)
+{
+        struct {
+                struct icmphdr icmp;
+                struct iphdr ip;
+        } *inside;
+        const struct nf_conntrack_l4proto *l4proto;
+        struct nf_conntrack_tuple inner, target;
+        int hdrlen = ip_hdrlen(skb);
+        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+        unsigned long statusbit;
+        enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+        if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+                return 0;
+        inside = (void *)skb->data + hdrlen;
+        /* We're actually going to mangle it beyond trivial checksum
+           adjustment, so make sure the current checksum is correct. */
+        if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+                return 0;
+        /* Must be RELATED */
+        NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
+                     skb->nfctinfo == IP_CT_RELATED_REPLY);
+        /* Redirects on non-null nats must be dropped, else they'll
+           start talking to each other without our translation, and be
+           confused... --RR */
+        if (inside->icmp.type == ICMP_REDIRECT) {
+                /* If NAT isn't finished, assume it and drop. */
+                if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+                        return 0;
+                if (ct->status & IPS_NAT_MASK)
+                        return 0;
+        }
+        if (manip == IP_NAT_MANIP_SRC)
+                statusbit = IPS_SRC_NAT;
+        else
+                statusbit = IPS_DST_NAT;
+        /* Invert if this is reply dir. */
+        if (dir == IP_CT_DIR_REPLY)
+                statusbit ^= IPS_NAT_MASK;
+        if (!(ct->status & statusbit))
+                return 1;
+        pr_debug("icmp_reply_translation: translating error %p manip %u "
+                 "dir %s\n", skb, manip,
+                 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
+        /* rcu_read_lock()ed by nf_hook_slow */
+        l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
+        if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
+                             (hdrlen +
+                              sizeof(struct icmphdr) + inside->ip.ihl * 4),
+                             (u_int16_t)AF_INET, inside->ip.protocol,
+                             &inner, l3proto, l4proto))
+                return 0;
+        /* Change inner back to look like incoming packet.  We do the
+           opposite manip on this hook to normal, because it might not
+           pass all hooks (locally-generated ICMP).  Consider incoming
+           packet: PREROUTING (DST manip), routing produces ICMP, goes
+           through POSTROUTING (which must correct the DST manip). */
+        if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
+                       &ct->tuplehash[!dir].tuple, !manip))
+                return 0;
+        if (skb->ip_summed != CHECKSUM_PARTIAL) {
+                /* Reloading "inside" here since manip_pkt inner. */
+                inside = (void *)skb->data + hdrlen;
+                inside->icmp.checksum = 0;
+                inside->icmp.checksum =
+                        csum_fold(skb_checksum(skb, hdrlen,
+                                               skb->len - hdrlen, 0));
+        }
+        /* Change outer to look the reply to an incoming packet
+         * (proto 0 means don't invert per-proto part). */
+        nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+        if (!manip_pkt(0, skb, 0, &target, manip))
+                return 0;
+        return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+/* Protocol registration. */
+int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
+{
+        int ret = 0;
+        spin_lock_bh(&nf_nat_lock);
+        if (rcu_dereference_protected(
+                        nf_nat_protos[proto->protonum],
+                        lockdep_is_held(&nf_nat_lock)
+                        ) != &nf_nat_unknown_protocol) {
+                ret = -EBUSY;
+                goto out;
+        }
+        rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
+ out:
+        spin_unlock_bh(&nf_nat_lock);
+        return ret;
+}
+EXPORT_SYMBOL(nf_nat_protocol_register);
+/* No one stores the protocol anywhere; simply delete it. */
+void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
+{
+        spin_lock_bh(&nf_nat_lock);
+        rcu_assign_pointer(nf_nat_protos[proto->protonum],
+                           &nf_nat_unknown_protocol);
+        spin_unlock_bh(&nf_nat_lock);
+        synchronize_rcu();
+}
+EXPORT_SYMBOL(nf_nat_protocol_unregister);
+/* No one using conntrack by the time this called. */
+static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
+{
+        struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+        if (nat == NULL || nat->ct == NULL)
+                return;
+        NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
+        spin_lock_bh(&nf_nat_lock);
+        hlist_del_rcu(&nat->bysource);
+        spin_unlock_bh(&nf_nat_lock);
+}
+static void nf_nat_move_storage(void *new, void *old)
+{
+        struct nf_conn_nat *new_nat = new;
+        struct nf_conn_nat *old_nat = old;
+        struct nf_conn *ct = old_nat->ct;
+        if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
+                return;
+        spin_lock_bh(&nf_nat_lock);
+        hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
+        spin_unlock_bh(&nf_nat_lock);
+}
+static struct nf_ct_ext_type nat_extend __read_mostly = {
+        .len            = sizeof(struct nf_conn_nat),
+        .align          = __alignof__(struct nf_conn_nat),
+        .destroy        = nf_nat_cleanup_conntrack,
+        .move           = nf_nat_move_storage,
+        .id             = NF_CT_EXT_NAT,
+        .flags          = NF_CT_EXT_F_PREALLOC,
+};
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static const struct nf_nat_protocol *
+nf_nat_proto_find_get(u_int8_t protonum)
+{
+        const struct nf_nat_protocol *p;
+        rcu_read_lock();
+        p = __nf_nat_proto_find(protonum);
+        if (!try_module_get(p->me))
+                p = &nf_nat_unknown_protocol;
+        rcu_read_unlock();
+        return p;
+}
+static void
+nf_nat_proto_put(const struct nf_nat_protocol *p)
+{
+        module_put(p->me);
+}
+static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
+        [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
+        [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
+};
+static int nfnetlink_parse_nat_proto(struct nlattr *attr,
+                                     const struct nf_conn *ct,
+                                     struct nf_nat_range *range)
+{
+        struct nlattr *tb[CTA_PROTONAT_MAX+1];
+        const struct nf_nat_protocol *npt;
+        int err;
+        err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
+        if (err < 0)
+                return err;
+        npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
+        if (npt->nlattr_to_range)
+                err = npt->nlattr_to_range(tb, range);
+        nf_nat_proto_put(npt);
+        return err;
+}
+static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
+        [CTA_NAT_MINIP]         = { .type = NLA_U32 },
+        [CTA_NAT_MAXIP]         = { .type = NLA_U32 },
+};
+static int
+nfnetlink_parse_nat(const struct nlattr *nat,
+                    const struct nf_conn *ct, struct nf_nat_range *range)
+{
+        struct nlattr *tb[CTA_NAT_MAX+1];
+        int err;
+        memset(range, 0, sizeof(*range));
+        err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
+        if (err < 0)
+                return err;
+        if (tb[CTA_NAT_MINIP])
+                range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
+        if (!tb[CTA_NAT_MAXIP])
+                range->max_ip = range->min_ip;
+        else
+                range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
+        if (range->min_ip)
+                range->flags |= IP_NAT_RANGE_MAP_IPS;
+        if (!tb[CTA_NAT_PROTO])
+                return 0;
+        err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
+        if (err < 0)
+                return err;
+        return 0;
+}
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+                          enum nf_nat_manip_type manip,
+                          const struct nlattr *attr)
+{
+        struct nf_nat_range range;
+        if (nfnetlink_parse_nat(attr, ct, &range) < 0)
+                return -EINVAL;
+        if (nf_nat_initialized(ct, manip))
+                return -EEXIST;
+        return nf_nat_setup_info(ct, &range, manip);
+}
+#else
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+                          enum nf_nat_manip_type manip,
+                          const struct nlattr *attr)
+{
+        return -EOPNOTSUPP;
+}
+#endif
+static int __net_init nf_nat_net_init(struct net *net)
+{
+        /* Leave them the same for the moment. */
+        net->ipv4.nat_htable_size = net->ct.htable_size;
+        net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
+        if (!net->ipv4.nat_bysource)
+                return -ENOMEM;
+        return 0;
+}
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int clean_nat(struct nf_conn *i, void *data)
+{
+        struct nf_conn_nat *nat = nfct_nat(i);
+        if (!nat)
+                return 0;
+        memset(nat, 0, sizeof(*nat));
+        i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
+        return 0;
+}
+static void __net_exit nf_nat_net_exit(struct net *net)
+{
+        nf_ct_iterate_cleanup(net, &clean_nat, NULL);
+        synchronize_rcu();
+        nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
+}
+static struct pernet_operations nf_nat_net_ops = {
+        .init = nf_nat_net_init,
+        .exit = nf_nat_net_exit,
+};
+static int __init nf_nat_init(void)
+{
+        size_t i;
+        int ret;
+        need_ipv4_conntrack();
+        ret = nf_ct_extend_register(&nat_extend);
+        if (ret < 0) {
+                printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
+                return ret;
+        }
+        ret = register_pernet_subsys(&nf_nat_net_ops);
+        if (ret < 0)
+                goto cleanup_extend;
+        /* Sew in builtin protocols. */
+        spin_lock_bh(&nf_nat_lock);
+        for (i = 0; i < MAX_IP_NAT_PROTO; i++)
+                rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
+        rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
+        rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
+        rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
+        spin_unlock_bh(&nf_nat_lock);
+        /* Initialize fake conntrack so that NAT will skip it */
+        nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
+        l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
+        BUG_ON(nf_nat_seq_adjust_hook != NULL);
+        rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
+        BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
+        rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
+                           nfnetlink_parse_nat_setup);
+        BUG_ON(nf_ct_nat_offset != NULL);
+        rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset);
+        return 0;
+ cleanup_extend:
+        nf_ct_extend_unregister(&nat_extend);
+        return ret;
+}
+static void __exit nf_nat_cleanup(void)
+{
+        unregister_pernet_subsys(&nf_nat_net_ops);
+        nf_ct_l3proto_put(l3proto);
+        nf_ct_extend_unregister(&nat_extend);
+        rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
+        rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
+        rcu_assign_pointer(nf_ct_nat_offset, NULL);
+        synchronize_net();
+}
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-ipv4");
+module_init(nf_nat_init);
+module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
new file mode 100644
index 00000000000..dc73abb3fe2
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -0,0 +1,137 @@
+/* FTP extension for TCP NAT alteration. */
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_ftp.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("ftp NAT helper");
+MODULE_ALIAS("ip_nat_ftp");
+/* FIXME: Time out? --RR */
+static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type,
+                              char *buffer, size_t buflen,
+                              __be32 addr, u16 port)
+{
+        switch (type) {
+        case NF_CT_FTP_PORT:
+        case NF_CT_FTP_PASV:
+                return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u",
+                                ((unsigned char *)&addr)[0],
+                                ((unsigned char *)&addr)[1],
+                                ((unsigned char *)&addr)[2],
+                                ((unsigned char *)&addr)[3],
+                                port >> 8,
+                                port & 0xFF);
+        case NF_CT_FTP_EPRT:
+                return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port);
+        case NF_CT_FTP_EPSV:
+                return snprintf(buffer, buflen, "|||%u|", port);
+        }
+        return 0;
+}
+/* So, this packet has hit the connection tracking matching code.
+   Mangle it, and change the expectation to match the new version. */
+static unsigned int nf_nat_ftp(struct sk_buff *skb,
+                               enum ip_conntrack_info ctinfo,
+                               enum nf_ct_ftp_type type,
+                               unsigned int matchoff,
+                               unsigned int matchlen,
+                               struct nf_conntrack_expect *exp)
+{
+        __be32 newip;
+        u_int16_t port;
+        int dir = CTINFO2DIR(ctinfo);
+        struct nf_conn *ct = exp->master;
+        char buffer[sizeof("|1|255.255.255.255|65535|")];
+        unsigned int buflen;
+        pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
+        /* Connection will come from wherever this packet goes, hence !dir */
+        newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+        exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+        exp->dir = !dir;
+        /* When you see the packet, we need to NAT it the same as the
+         * this one. */
+        exp->expectfn = nf_nat_follow_master;
+        /* Try to get same port: if not, try to change it. */
+        for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+                int ret;
+                exp->tuple.dst.u.tcp.port = htons(port);
+                ret = nf_ct_expect_related(exp);
+                if (ret == 0)
+                        break;
+                else if (ret != -EBUSY) {
+                        port = 0;
+                        break;
+                }
+        }
+        if (port == 0)
+                return NF_DROP;
+        buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port);
+        if (!buflen)
+                goto out;
+        pr_debug("calling nf_nat_mangle_tcp_packet\n");
+        if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
+                                      matchlen, buffer, buflen))
+                goto out;
+        return NF_ACCEPT;
+out:
+        nf_ct_unexpect_related(exp);
+        return NF_DROP;
+}
+static void __exit nf_nat_ftp_fini(void)
+{
+        rcu_assign_pointer(nf_nat_ftp_hook, NULL);
+        synchronize_rcu();
+}
+static int __init nf_nat_ftp_init(void)
+{
+        BUG_ON(nf_nat_ftp_hook != NULL);
+        rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
+        return 0;
+}
+/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+        printk(KERN_INFO KBUILD_MODNAME
+               ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+        return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+module_init(nf_nat_ftp_init);
+module_exit(nf_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
new file mode 100644
index 00000000000..ebc5f8894f9
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -0,0 +1,451 @@
+/* ip_nat_helper.c - generic support functions for NAT helpers
+ *
+ * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
+ * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/kmod.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+#define DUMP_OFFSET(x) \
+        pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
+                 x->offset_before, x->offset_after, x->correction_pos);
+static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
+/* Setup TCP sequence correction given this change at this sequence */
+static inline void
+adjust_tcp_sequence(u32 seq,
+                    int sizediff,
+                    struct nf_conn *ct,
+                    enum ip_conntrack_info ctinfo)
+{
+        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+        struct nf_conn_nat *nat = nfct_nat(ct);
+        struct nf_nat_seq *this_way = &nat->seq[dir];
+        pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
+                 seq, sizediff);
+        pr_debug("adjust_tcp_sequence: Seq_offset before: ");
+        DUMP_OFFSET(this_way);
+        spin_lock_bh(&nf_nat_seqofs_lock);
+        /* SYN adjust. If it's uninitialized, or this is after last
+         * correction, record it: we don't handle more than one
+         * adjustment in the window, but do deal with common case of a
+         * retransmit */
+        if (this_way->offset_before == this_way->offset_after ||
+            before(this_way->correction_pos, seq)) {
+                this_way->correction_pos = seq;
+                this_way->offset_before = this_way->offset_after;
+                this_way->offset_after += sizediff;
+        }
+        spin_unlock_bh(&nf_nat_seqofs_lock);
+        pr_debug("adjust_tcp_sequence: Seq_offset after: ");
+        DUMP_OFFSET(this_way);
+}
+/* Get the offset value, for conntrack */
+s16 nf_nat_get_offset(const struct nf_conn *ct,
+                      enum ip_conntrack_dir dir,
+                      u32 seq)
+{
+        struct nf_conn_nat *nat = nfct_nat(ct);
+        struct nf_nat_seq *this_way;
+        s16 offset;
+        if (!nat)
+                return 0;
+        this_way = &nat->seq[dir];
+        spin_lock_bh(&nf_nat_seqofs_lock);
+        offset = after(seq, this_way->correction_pos)
+                 ? this_way->offset_after : this_way->offset_before;
+        spin_unlock_bh(&nf_nat_seqofs_lock);
+        return offset;
+}
+EXPORT_SYMBOL_GPL(nf_nat_get_offset);
+/* Frobs data inside this packet, which is linear. */
+static void mangle_contents(struct sk_buff *skb,
+                            unsigned int dataoff,
+                            unsigned int match_offset,
+                            unsigned int match_len,
+                            const char *rep_buffer,
+                            unsigned int rep_len)
+{
+        unsigned char *data;
+        BUG_ON(skb_is_nonlinear(skb));
+        data = skb_network_header(skb) + dataoff;
+        /* move post-replacement */
+        memmove(data + match_offset + rep_len,
+                data + match_offset + match_len,
+                skb->tail - (skb->network_header + dataoff +
+                             match_offset + match_len));
+        /* insert data from buffer */
+        memcpy(data + match_offset, rep_buffer, rep_len);
+        /* update skb info */
+        if (rep_len > match_len) {
+                pr_debug("nf_nat_mangle_packet: Extending packet by "
+                         "%u from %u bytes\n", rep_len - match_len, skb->len);
+                skb_put(skb, rep_len - match_len);
+        } else {
+                pr_debug("nf_nat_mangle_packet: Shrinking packet from "
+                         "%u from %u bytes\n", match_len - rep_len, skb->len);
+                __skb_trim(skb, skb->len + rep_len - match_len);
+        }
+        /* fix IP hdr checksum information */
+        ip_hdr(skb)->tot_len = htons(skb->len);
+        ip_send_check(ip_hdr(skb));
+}
+/* Unusual, but possible case. */
+static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
+{
+        if (skb->len + extra > 65535)
+                return 0;
+        if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
+                return 0;
+        return 1;
+}
+void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+                           __be32 seq, s16 off)
+{
+        if (!off)
+                return;
+        set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+        adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
+        nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
+}
+EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
+static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data,
+                        int datalen, __sum16 *check, int oldlen)
+{
+        struct rtable *rt = skb_rtable(skb);
+        if (skb->ip_summed != CHECKSUM_PARTIAL) {
+                if (!(rt->rt_flags & RTCF_LOCAL) &&
+                    (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
+                        skb->ip_summed = CHECKSUM_PARTIAL;
+                        skb->csum_start = skb_headroom(skb) +
+                                          skb_network_offset(skb) +
+                                          iph->ihl * 4;
+                        skb->csum_offset = (void *)check - data;
+                        *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+                                                    datalen, iph->protocol, 0);
+                } else {
+                        *check = 0;
+                        *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+                                                   datalen, iph->protocol,
+                                                   csum_partial(data, datalen,
+                                                                0));
+                        if (iph->protocol == IPPROTO_UDP && !*check)
+                                *check = CSUM_MANGLED_0;
+                }
+        } else
+                inet_proto_csum_replace2(check, skb,
+                                         htons(oldlen), htons(datalen), 1);
+}
+/* Generic function for mangling variable-length address changes inside
+ * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
+ * command in FTP).
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * */
+int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
+                               struct nf_conn *ct,
+                               enum ip_conntrack_info ctinfo,
+                               unsigned int match_offset,
+                               unsigned int match_len,
+                               const char *rep_buffer,
+                               unsigned int rep_len, bool adjust)
+{
+        struct iphdr *iph;
+        struct tcphdr *tcph;
+        int oldlen, datalen;
+        if (!skb_make_writable(skb, skb->len))
+                return 0;
+        if (rep_len > match_len &&
+            rep_len - match_len > skb_tailroom(skb) &&
+            !enlarge_skb(skb, rep_len - match_len))
+                return 0;
+        SKB_LINEAR_ASSERT(skb);
+        iph = ip_hdr(skb);
+        tcph = (void *)iph + iph->ihl*4;
+        oldlen = skb->len - iph->ihl*4;
+        mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
+                        match_offset, match_len, rep_buffer, rep_len);
+        datalen = skb->len - iph->ihl*4;
+        nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen);
+        if (adjust && rep_len != match_len)
+                nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
+                                      (int)rep_len - (int)match_len);
+        return 1;
+}
+EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
+/* Generic function for mangling variable-length address changes inside
+ * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
+ * command in the Amanda protocol)
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
+ *       should be fairly easy to do.
+ */
+int
+nf_nat_mangle_udp_packet(struct sk_buff *skb,
+                         struct nf_conn *ct,
+                         enum ip_conntrack_info ctinfo,
+                         unsigned int match_offset,
+                         unsigned int match_len,
+                         const char *rep_buffer,
+                         unsigned int rep_len)
+{
+        struct iphdr *iph;
+        struct udphdr *udph;
+        int datalen, oldlen;
+        /* UDP helpers might accidentally mangle the wrong packet */
+        iph = ip_hdr(skb);
+        if (skb->len < iph->ihl*4 + sizeof(*udph) +
+                               match_offset + match_len)
+                return 0;
+        if (!skb_make_writable(skb, skb->len))
+                return 0;
+        if (rep_len > match_len &&
+            rep_len - match_len > skb_tailroom(skb) &&
+            !enlarge_skb(skb, rep_len - match_len))
+                return 0;
+        iph = ip_hdr(skb);
+        udph = (void *)iph + iph->ihl*4;
+        oldlen = skb->len - iph->ihl*4;
+        mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
+                        match_offset, match_len, rep_buffer, rep_len);
+        /* update the length of the UDP packet */
+        datalen = skb->len - iph->ihl*4;
+        udph->len = htons(datalen);
+        /* fix udp checksum if udp checksum was previously calculated */
+        if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
+                return 1;
+        nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen);
+        return 1;
+}
+EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
+/* Adjust one found SACK option including checksum correction */
+static void
+sack_adjust(struct sk_buff *skb,
+            struct tcphdr *tcph,
+            unsigned int sackoff,
+            unsigned int sackend,
+            struct nf_nat_seq *natseq)
+{
+        while (sackoff < sackend) {
+                struct tcp_sack_block_wire *sack;
+                __be32 new_start_seq, new_end_seq;
+                sack = (void *)skb->data + sackoff;
+                if (after(ntohl(sack->start_seq) - natseq->offset_before,
+                          natseq->correction_pos))
+                        new_start_seq = htonl(ntohl(sack->start_seq)
+                                        - natseq->offset_after);
+                else
+                        new_start_seq = htonl(ntohl(sack->start_seq)
+                                        - natseq->offset_before);
+                if (after(ntohl(sack->end_seq) - natseq->offset_before,
+                          natseq->correction_pos))
+                        new_end_seq = htonl(ntohl(sack->end_seq)
+                                      - natseq->offset_after);
+                else
+                        new_end_seq = htonl(ntohl(sack->end_seq)
+                                      - natseq->offset_before);
+                pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+                         ntohl(sack->start_seq), new_start_seq,
+                         ntohl(sack->end_seq), new_end_seq);
+                inet_proto_csum_replace4(&tcph->check, skb,
+                                         sack->start_seq, new_start_seq, 0);
+                inet_proto_csum_replace4(&tcph->check, skb,
+                                         sack->end_seq, new_end_seq, 0);
+                sack->start_seq = new_start_seq;
+                sack->end_seq = new_end_seq;
+                sackoff += sizeof(*sack);
+        }
+}
+/* TCP SACK sequence number adjustment */
+static inline unsigned int
+nf_nat_sack_adjust(struct sk_buff *skb,
+                   struct tcphdr *tcph,
+                   struct nf_conn *ct,
+                   enum ip_conntrack_info ctinfo)
+{
+        unsigned int dir, optoff, optend;
+        struct nf_conn_nat *nat = nfct_nat(ct);
+        optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
+        optend = ip_hdrlen(skb) + tcph->doff * 4;
+        if (!skb_make_writable(skb, optend))
+                return 0;
+        dir = CTINFO2DIR(ctinfo);
+        while (optoff < optend) {
+                /* Usually: option, length. */
+                unsigned char *op = skb->data + optoff;
+                switch (op[0]) {
+                case TCPOPT_EOL:
+                        return 1;
+                case TCPOPT_NOP:
+                        optoff++;
+                        continue;
+                default:
+                        /* no partial options */
+                        if (optoff + 1 == optend ||
+                            optoff + op[1] > optend ||
+                            op[1] < 2)
+                                return 0;
+                        if (op[0] == TCPOPT_SACK &&
+                            op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
+                            ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+                                sack_adjust(skb, tcph, optoff+2,
+                                            optoff+op[1], &nat->seq[!dir]);
+                        optoff += op[1];
+                }
+        }
+        return 1;
+}
+/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
+int
+nf_nat_seq_adjust(struct sk_buff *skb,
+                  struct nf_conn *ct,
+                  enum ip_conntrack_info ctinfo)
+{
+        struct tcphdr *tcph;
+        int dir;
+        __be32 newseq, newack;
+        s16 seqoff, ackoff;
+        struct nf_conn_nat *nat = nfct_nat(ct);
+        struct nf_nat_seq *this_way, *other_way;
+        dir = CTINFO2DIR(ctinfo);
+        this_way = &nat->seq[dir];
+        other_way = &nat->seq[!dir];
+        if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+                return 0;
+        tcph = (void *)skb->data + ip_hdrlen(skb);
+        if (after(ntohl(tcph->seq), this_way->correction_pos))
+                seqoff = this_way->offset_after;
+        else
+                seqoff = this_way->offset_before;
+        if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+                  other_way->correction_pos))
+                ackoff = other_way->offset_after;
+        else
+                ackoff = other_way->offset_before;
+        newseq = htonl(ntohl(tcph->seq) + seqoff);
+        newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+        inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+        inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
+        pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+                 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+                 ntohl(newack));
+        tcph->seq = newseq;
+        tcph->ack_seq = newack;
+        return nf_nat_sack_adjust(skb, tcph, ct, ctinfo);
+}
+/* Setup NAT on this expected conntrack so it follows master. */
+/* If we fail to get a free NAT slot, we'll get dropped on confirm */
+void nf_nat_follow_master(struct nf_conn *ct,
+                          struct nf_conntrack_expect *exp)
+{
+        struct nf_nat_range range;
+        /* This must be a fresh one. */
+        BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+        /* Change src to where master sends to */
+        range.flags = IP_NAT_RANGE_MAP_IPS;
+        range.min_ip = range.max_ip
+                = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+        nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+        /* For DST manip, map port here to where it's expected. */
+        range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+        range.min = range.max = exp->saved_proto;
+        range.min_ip = range.max_ip
+                = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
+        nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+}
+EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
new file mode 100644
index 00000000000..535e1a80235
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -0,0 +1,99 @@
+/* IRC extension for TCP NAT alteration.
+ *
+ * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
+ * based on a copy of RR's ip_nat_ftp.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/tcp.h>
+#include <linux/kernel.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_irc.h>
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("IRC (DCC) NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_irc");
+static unsigned int help(struct sk_buff *skb,
+                         enum ip_conntrack_info ctinfo,
+                         unsigned int matchoff,
+                         unsigned int matchlen,
+                         struct nf_conntrack_expect *exp)
+{
+        char buffer[sizeof("4294967296 65635")];
+        u_int32_t ip;
+        u_int16_t port;
+        unsigned int ret;
+        /* Reply comes from server. */
+        exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+        exp->dir = IP_CT_DIR_REPLY;
+        exp->expectfn = nf_nat_follow_master;
+        /* Try to get same port: if not, try to change it. */
+        for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+                int ret;
+                exp->tuple.dst.u.tcp.port = htons(port);
+                ret = nf_ct_expect_related(exp);
+                if (ret == 0)
+                        break;
+                else if (ret != -EBUSY) {
+                        port = 0;
+                        break;
+                }
+        }
+        if (port == 0)
+                return NF_DROP;
+        ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip);
+        sprintf(buffer, "%u %u", ip, port);
+        pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n",
+                 buffer, &ip, port);
+        ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
+                                       matchoff, matchlen, buffer,
+                                       strlen(buffer));
+        if (ret != NF_ACCEPT)
+                nf_ct_unexpect_related(exp);
+        return ret;
+}
+static void __exit nf_nat_irc_fini(void)
+{
+        rcu_assign_pointer(nf_nat_irc_hook, NULL);
+        synchronize_rcu();
+}
+static int __init nf_nat_irc_init(void)
+{
+        BUG_ON(nf_nat_irc_hook != NULL);
+        rcu_assign_pointer(nf_nat_irc_hook, help);
+        return 0;
+}
+/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
+static int warn_set(const char *val, struct kernel_param *kp)
+{
+        printk(KERN_INFO KBUILD_MODNAME
+               ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+        return 0;
+}
+module_param_call(ports, warn_set, NULL, NULL, 0);
+module_init(nf_nat_irc_init);
+module_exit(nf_nat_irc_fini);
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
new file mode 100644
index 00000000000..f52d41ea069
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -0,0 +1,125 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/secure_seq.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
+                           enum nf_nat_manip_type maniptype,
+                           const union nf_conntrack_man_proto *min,
+                           const union nf_conntrack_man_proto *max)
+{
+        __be16 port;
+        if (maniptype == IP_NAT_MANIP_SRC)
+                port = tuple->src.u.all;
+        else
+                port = tuple->dst.u.all;
+        return ntohs(port) >= ntohs(min->all) &&
+               ntohs(port) <= ntohs(max->all);
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
+void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+                               const struct nf_nat_range *range,
+                               enum nf_nat_manip_type maniptype,
+                               const struct nf_conn *ct,
+                               u_int16_t *rover)
+{
+        unsigned int range_size, min, i;
+        __be16 *portptr;
+        u_int16_t off;
+        if (maniptype == IP_NAT_MANIP_SRC)
+                portptr = &tuple->src.u.all;
+        else
+                portptr = &tuple->dst.u.all;
+        /* If no range specified... */
+        if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+                /* If it's dst rewrite, can't change port */
+                if (maniptype == IP_NAT_MANIP_DST)
+                        return;
+                if (ntohs(*portptr) < 1024) {
+                        /* Loose convention: >> 512 is credential passing */
+                        if (ntohs(*portptr) < 512) {
+                                min = 1;
+                                range_size = 511 - min + 1;
+                        } else {
+                                min = 600;
+                                range_size = 1023 - min + 1;
+                        }
+                } else {
+                        min = 1024;
+                        range_size = 65535 - 1024 + 1;
+                }
+        } else {
+                min = ntohs(range->min.all);
+                range_size = ntohs(range->max.all) - min + 1;
+        }
+        if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+                off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
+                                                 maniptype == IP_NAT_MANIP_SRC
+                                                 ? tuple->dst.u.all
+                                                 : tuple->src.u.all);
+        else
+                off = *rover;
+        for (i = 0; ; ++off) {
+                *portptr = htons(min + off % range_size);
+                if (++i != range_size && nf_nat_used_tuple(tuple, ct))
+                        continue;
+                if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
+                        *rover = off;
+                return;
+        }
+        return;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
+                                 const struct nf_nat_range *range)
+{
+        NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
+        NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
+        return 0;
+nla_put_failure:
+        return -1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
+int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
+                                 struct nf_nat_range *range)
+{
+        if (tb[CTA_PROTONAT_PORT_MIN]) {
+                range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
+                range->max.all = range->min.tcp.port;
+                range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+        }
+        if (tb[CTA_PROTONAT_PORT_MAX]) {
+                range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
+                range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr);
+#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
new file mode 100644
index 00000000000..570faf2667b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -0,0 +1,108 @@
+/*
+ * DCCP NAT protocol helper
+ *
+ * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/dccp.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+static u_int16_t dccp_port_rover;
+static void
+dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
+                  const struct nf_nat_range *range,
+                  enum nf_nat_manip_type maniptype,
+                  const struct nf_conn *ct)
+{
+        nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+                                  &dccp_port_rover);
+}
+static bool
+dccp_manip_pkt(struct sk_buff *skb,
+               unsigned int iphdroff,
+               const struct nf_conntrack_tuple *tuple,
+               enum nf_nat_manip_type maniptype)
+{
+        const struct iphdr *iph = (const void *)(skb->data + iphdroff);
+        struct dccp_hdr *hdr;
+        unsigned int hdroff = iphdroff + iph->ihl * 4;
+        __be32 oldip, newip;
+        __be16 *portptr, oldport, newport;
+        int hdrsize = 8; /* DCCP connection tracking guarantees this much */
+        if (skb->len >= hdroff + sizeof(struct dccp_hdr))
+                hdrsize = sizeof(struct dccp_hdr);
+        if (!skb_make_writable(skb, hdroff + hdrsize))
+                return false;
+        iph = (struct iphdr *)(skb->data + iphdroff);
+        hdr = (struct dccp_hdr *)(skb->data + hdroff);
+        if (maniptype == IP_NAT_MANIP_SRC) {
+                oldip = iph->saddr;
+                newip = tuple->src.u3.ip;
+                newport = tuple->src.u.dccp.port;
+                portptr = &hdr->dccph_sport;
+        } else {
+                oldip = iph->daddr;
+                newip = tuple->dst.u3.ip;
+                newport = tuple->dst.u.dccp.port;
+                portptr = &hdr->dccph_dport;
+        }
+        oldport = *portptr;
+        *portptr = newport;
+        if (hdrsize < sizeof(*hdr))
+                return true;
+        inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1);
+        inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
+                                 0);
+        return true;
+}
+static const struct nf_nat_protocol nf_nat_protocol_dccp = {
+        .protonum               = IPPROTO_DCCP,
+        .me                     = THIS_MODULE,
+        .manip_pkt              = dccp_manip_pkt,
+        .in_range               = nf_nat_proto_in_range,
+        .unique_tuple           = dccp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+        .range_to_nlattr        = nf_nat_proto_range_to_nlattr,
+        .nlattr_to_range        = nf_nat_proto_nlattr_to_range,
+#endif
+};
+static int __init nf_nat_proto_dccp_init(void)
+{
+        return nf_nat_protocol_register(&nf_nat_protocol_dccp);
+}
+static void __exit nf_nat_proto_dccp_fini(void)
+{
+        nf_nat_protocol_unregister(&nf_nat_protocol_dccp);
+}
+module_init(nf_nat_proto_dccp_init);
+module_exit(nf_nat_proto_dccp_fini);
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("DCCP NAT protocol helper");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
new file mode 100644
index 00000000000..756331d4266
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <net/sctp/checksum.h>
+#include <net/netfilter/nf_nat_protocol.h>
+static u_int16_t nf_sctp_port_rover;
+static void
+sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
+                  const struct nf_nat_range *range,
+                  enum nf_nat_manip_type maniptype,
+                  const struct nf_conn *ct)
+{
+        nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+                                  &nf_sctp_port_rover);
+}
+static bool
+sctp_manip_pkt(struct sk_buff *skb,
+               unsigned int iphdroff,
+               const struct nf_conntrack_tuple *tuple,
+               enum nf_nat_manip_type maniptype)
+{
+        const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+        struct sk_buff *frag;
+        sctp_sctphdr_t *hdr;
+        unsigned int hdroff = iphdroff + iph->ihl*4;
+        __be32 oldip, newip;
+        __be32 crc32;
+        if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+                return false;
+        iph = (struct iphdr *)(skb->data + iphdroff);
+        hdr = (struct sctphdr *)(skb->data + hdroff);
+        if (maniptype == IP_NAT_MANIP_SRC) {
+                /* Get rid of src ip and src pt */
+                oldip = iph->saddr;
+                newip = tuple->src.u3.ip;
+                hdr->source = tuple->src.u.sctp.port;
+        } else {
+                /* Get rid of dst ip and dst pt */
+                oldip = iph->daddr;
+                newip = tuple->dst.u3.ip;
+                hdr->dest = tuple->dst.u.sctp.port;
+        }
+        crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
+        skb_walk_frags(skb, frag)
+                crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
+                                          crc32);
+        crc32 = sctp_end_cksum(crc32);
+        hdr->checksum = crc32;
+        return true;
+}
+static const struct nf_nat_protocol nf_nat_protocol_sctp = {
+        .protonum               = IPPROTO_SCTP,
+        .me                     = THIS_MODULE,
+        .manip_pkt              = sctp_manip_pkt,
+        .in_range               = nf_nat_proto_in_range,
+        .unique_tuple           = sctp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+        .range_to_nlattr        = nf_nat_proto_range_to_nlattr,
+        .nlattr_to_range        = nf_nat_proto_nlattr_to_range,
+#endif
+};
+static int __init nf_nat_proto_sctp_init(void)
+{
+        return nf_nat_protocol_register(&nf_nat_protocol_sctp);
+}
+static void __exit nf_nat_proto_sctp_exit(void)
+{
+        nf_nat_protocol_unregister(&nf_nat_protocol_sctp);
+}
+module_init(nf_nat_proto_sctp_init);
+module_exit(nf_nat_proto_sctp_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SCTP NAT protocol helper");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
new file mode 100644
index 00000000000..aa460a595d5
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -0,0 +1,92 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+static u_int16_t tcp_port_rover;
+static void
+tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
+                 const struct nf_nat_range *range,
+                 enum nf_nat_manip_type maniptype,
+                 const struct nf_conn *ct)
+{
+        nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
+}
+static bool
+tcp_manip_pkt(struct sk_buff *skb,
+              unsigned int iphdroff,
+              const struct nf_conntrack_tuple *tuple,
+              enum nf_nat_manip_type maniptype)
+{
+        const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+        struct tcphdr *hdr;
+        unsigned int hdroff = iphdroff + iph->ihl*4;
+        __be32 oldip, newip;
+        __be16 *portptr, newport, oldport;
+        int hdrsize = 8; /* TCP connection tracking guarantees this much */
+        /* this could be a inner header returned in icmp packet; in such
+           cases we cannot update the checksum field since it is outside of
+           the 8 bytes of transport layer headers we are guaranteed */
+        if (skb->len >= hdroff + sizeof(struct tcphdr))
+                hdrsize = sizeof(struct tcphdr);
+        if (!skb_make_writable(skb, hdroff + hdrsize))
+                return false;
+        iph = (struct iphdr *)(skb->data + iphdroff);
+        hdr = (struct tcphdr *)(skb->data + hdroff);
+        if (maniptype == IP_NAT_MANIP_SRC) {
+                /* Get rid of src ip and src pt */
+                oldip = iph->saddr;
+                newip = tuple->src.u3.ip;
+                newport = tuple->src.u.tcp.port;
+                portptr = &hdr->source;
+        } else {
+                /* Get rid of dst ip and dst pt */
+                oldip = iph->daddr;
+                newip = tuple->dst.u3.ip;
+                newport = tuple->dst.u.tcp.port;
+                portptr = &hdr->dest;
+        }
+        oldport = *portptr;
+        *portptr = newport;
+        if (hdrsize < sizeof(*hdr))
+                return true;
+        inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+        inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
+        return true;
+}
+const struct nf_nat_protocol nf_nat_protocol_tcp = {
+        .protonum               = IPPROTO_TCP,
+        .me                     = THIS_MODULE,
+        .manip_pkt              = tcp_manip_pkt,
+        .in_range               = nf_nat_proto_in_range,
+        .unique_tuple           = tcp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+        .range_to_nlattr        = nf_nat_proto_range_to_nlattr,
+        .nlattr_to_range        = nf_nat_proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
new file mode 100644
index 00000000000..dfe65c7e292
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -0,0 +1,83 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+static u_int16_t udp_port_rover;
+static void
+udp_unique_tuple(struct nf_conntrack_tuple *tuple,
+                 const struct nf_nat_range *range,
+                 enum nf_nat_manip_type maniptype,
+                 const struct nf_conn *ct)
+{
+        nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
+}
+static bool
+udp_manip_pkt(struct sk_buff *skb,
+              unsigned int iphdroff,
+              const struct nf_conntrack_tuple *tuple,
+              enum nf_nat_manip_type maniptype)
+{
+        const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+        struct udphdr *hdr;
+        unsigned int hdroff = iphdroff + iph->ihl*4;
+        __be32 oldip, newip;
+        __be16 *portptr, newport;
+        if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+                return false;
+        iph = (struct iphdr *)(skb->data + iphdroff);
+        hdr = (struct udphdr *)(skb->data + hdroff);
+        if (maniptype == IP_NAT_MANIP_SRC) {
+                /* Get rid of src ip and src pt */
+                oldip = iph->saddr;
+                newip = tuple->src.u3.ip;
+                newport = tuple->src.u.udp.port;
+                portptr = &hdr->source;
+        } else {
+                /* Get rid of dst ip and dst pt */
+                oldip = iph->daddr;
+                newip = tuple->dst.u3.ip;
+                newport = tuple->dst.u.udp.port;
+                portptr = &hdr->dest;
+        }
+        if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+                inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+                inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
+                                         0);
+                if (!hdr->check)
+                        hdr->check = CSUM_MANGLED_0;
+        }
+        *portptr = newport;
+        return true;
+}
+const struct nf_nat_protocol nf_nat_protocol_udp = {
+        .protonum               = IPPROTO_UDP,
+        .me                     = THIS_MODULE,
+        .manip_pkt              = udp_manip_pkt,
+        .in_range               = nf_nat_proto_in_range,
+        .unique_tuple           = udp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+        .range_to_nlattr        = nf_nat_proto_range_to_nlattr,
+        .nlattr_to_range        = nf_nat_proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
new file mode 100644
index 00000000000..3cc8c8af39e
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -0,0 +1,99 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_protocol.h>
+static u_int16_t udplite_port_rover;
+static void
+udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
+                     const struct nf_nat_range *range,
+                     enum nf_nat_manip_type maniptype,
+                     const struct nf_conn *ct)
+{
+        nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
+                                  &udplite_port_rover);
+}
+static bool
+udplite_manip_pkt(struct sk_buff *skb,
+                  unsigned int iphdroff,
+                  const struct nf_conntrack_tuple *tuple,
+                  enum nf_nat_manip_type maniptype)
+{
+        const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+        struct udphdr *hdr;
+        unsigned int hdroff = iphdroff + iph->ihl*4;
+        __be32 oldip, newip;
+        __be16 *portptr, newport;
+        if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+                return false;
+        iph = (struct iphdr *)(skb->data + iphdroff);
+        hdr = (struct udphdr *)(skb->data + hdroff);
+        if (maniptype == IP_NAT_MANIP_SRC) {
+                /* Get rid of src ip and src pt */
+                oldip = iph->saddr;
+                newip = tuple->src.u3.ip;
+                newport = tuple->src.u.udp.port;
+                portptr = &hdr->source;
+        } else {
+                /* Get rid of dst ip and dst pt */
+                oldip = iph->daddr;
+                newip = tuple->dst.u3.ip;
+                newport = tuple->dst.u.udp.port;
+                portptr = &hdr->dest;
+        }
+        inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+        inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
+        if (!hdr->check)
+                hdr->check = CSUM_MANGLED_0;
+        *portptr = newport;
+        return true;
+}
+static const struct nf_nat_protocol nf_nat_protocol_udplite = {
+        .protonum               = IPPROTO_UDPLITE,
+        .me                     = THIS_MODULE,
+        .manip_pkt              = udplite_manip_pkt,
+        .in_range               = nf_nat_proto_in_range,
+        .unique_tuple           = udplite_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+        .range_to_nlattr        = nf_nat_proto_range_to_nlattr,
+        .nlattr_to_range        = nf_nat_proto_nlattr_to_range,
+#endif
+};
+static int __init nf_nat_proto_udplite_init(void)
+{
+        return nf_nat_protocol_register(&nf_nat_protocol_udplite);
+}
+static void __exit nf_nat_proto_udplite_fini(void)
+{
+        nf_nat_protocol_unregister(&nf_nat_protocol_udplite);
+}
+module_init(nf_nat_proto_udplite_init);
+module_exit(nf_nat_proto_udplite_fini);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
new file mode 100644
index 00000000000..a50f2bc1c73
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -0,0 +1,53 @@
+/* The "unknown" protocol.  This is what is used for protocols we
+ * don't understand.  It's returned by ip_ct_find_proto().
+ */
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
+                             enum nf_nat_manip_type manip_type,
+                             const union nf_conntrack_man_proto *min,
+                             const union nf_conntrack_man_proto *max)
+{
+        return true;
+}
+static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
+                                 const struct nf_nat_range *range,
+                                 enum nf_nat_manip_type maniptype,
+                                 const struct nf_conn *ct)
+{
+        /* Sorry: we can't help you; if it's not unique, we can't frob
+           anything. */
+        return;
+}
+static bool
+unknown_manip_pkt(struct sk_buff *skb,
+                  unsigned int iphdroff,
+                  const struct nf_conntrack_tuple *tuple,
+                  enum nf_nat_manip_type maniptype)
+{
+        return true;
+}
+const struct nf_nat_protocol nf_nat_unknown_protocol = {
+        /* .me isn't set: getting a ref to this cannot fail. */
+        .manip_pkt              = unknown_manip_pkt,
+        .in_range               = unknown_in_range,
+        .unique_tuple           = unknown_unique_tuple,
+};
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
new file mode 100644
index 00000000000..733c9abc1cb
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -0,0 +1,214 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/* Everything about the rules for NAT. */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <linux/bitops.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_rule.h>
+#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+                         (1 << NF_INET_POST_ROUTING) | \
+                         (1 << NF_INET_LOCAL_OUT) | \
+                         (1 << NF_INET_LOCAL_IN))
+static const struct xt_table nat_table = {
+        .name           = "nat",
+        .valid_hooks    = NAT_VALID_HOOKS,
+        .me             = THIS_MODULE,
+        .af             = NFPROTO_IPV4,
+};
+/* Source NAT */
+static unsigned int
+ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
+{
+        struct nf_conn *ct;
+        enum ip_conntrack_info ctinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
+        NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
+                     par->hooknum == NF_INET_LOCAL_IN);
+        ct = nf_ct_get(skb, &ctinfo);
+        /* Connection must be valid and new. */
+        NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+                            ctinfo == IP_CT_RELATED_REPLY));
+        NF_CT_ASSERT(par->out != NULL);
+        return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
+}
+static unsigned int
+ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
+{
+        struct nf_conn *ct;
+        enum ip_conntrack_info ctinfo;
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
+        NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+                     par->hooknum == NF_INET_LOCAL_OUT);
+        ct = nf_ct_get(skb, &ctinfo);
+        /* Connection must be valid and new. */
+        NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+        return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
+}
+static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
+{
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
+        /* Must be a valid range */
+        if (mr->rangesize != 1) {
+                pr_info("SNAT: multiple ranges no longer supported\n");
+                return -EINVAL;
+        }
+        return 0;
+}
+static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
+{
+        const struct nf_nat_multi_range_compat *mr = par->targinfo;
+        /* Must be a valid range */
+        if (mr->rangesize != 1) {
+                pr_info("DNAT: multiple ranges no longer supported\n");
+                return -EINVAL;
+        }
+        return 0;
+}
+static unsigned int
+alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+        /* Force range to this IP; let proto decide mapping for
+           per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+        */
+        struct nf_nat_range range;
+        range.flags = 0;
+        pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
+                 HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ?
+                 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
+                 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
+        return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+int nf_nat_rule_find(struct sk_buff *skb,
+                     unsigned int hooknum,
+                     const struct net_device *in,
+                     const struct net_device *out,
+                     struct nf_conn *ct)
+{
+        struct net *net = nf_ct_net(ct);
+        int ret;
+        ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
+        if (ret == NF_ACCEPT) {
+                if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+                        /* NUL mapping */
+                        ret = alloc_null_binding(ct, hooknum);
+        }
+        return ret;
+}
+static struct xt_target ipt_snat_reg __read_mostly = {
+        .name           = "SNAT",
+        .target         = ipt_snat_target,
+        .targetsize     = sizeof(struct nf_nat_multi_range_compat),
+        .table          = "nat",
+        .hooks          = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
+        .checkentry     = ipt_snat_checkentry,
+        .family         = AF_INET,
+};
+static struct xt_target ipt_dnat_reg __read_mostly = {
+        .name           = "DNAT",
+        .target         = ipt_dnat_target,
+        .targetsize     = sizeof(struct nf_nat_multi_range_compat),
+        .table          = "nat",
+        .hooks          = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
+        .checkentry     = ipt_dnat_checkentry,
+        .family         = AF_INET,
+};
+static int __net_init nf_nat_rule_net_init(struct net *net)
+{
+        struct ipt_replace *repl;
+        repl = ipt_alloc_initial_table(&nat_table);
+        if (repl == NULL)
+                return -ENOMEM;
+        net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl);
+        kfree(repl);
+        if (IS_ERR(net->ipv4.nat_table))
+                return PTR_ERR(net->ipv4.nat_table);
+        return 0;
+}
+static void __net_exit nf_nat_rule_net_exit(struct net *net)
+{
+        ipt_unregister_table(net, net->ipv4.nat_table);
+}
+static struct pernet_operations nf_nat_rule_net_ops = {
+        .init = nf_nat_rule_net_init,
+        .exit = nf_nat_rule_net_exit,
+};
+int __init nf_nat_rule_init(void)
+{
+        int ret;
+        ret = register_pernet_subsys(&nf_nat_rule_net_ops);
+        if (ret != 0)
+                goto out;
+        ret = xt_register_target(&ipt_snat_reg);
+        if (ret != 0)
+                goto unregister_table;
+        ret = xt_register_target(&ipt_dnat_reg);
+        if (ret != 0)
+                goto unregister_snat;
+        return ret;
+ unregister_snat:
+        xt_unregister_target(&ipt_snat_reg);
+ unregister_table:
+        unregister_pernet_subsys(&nf_nat_rule_net_ops);
+ out:
+        return ret;
+}
+void nf_nat_rule_cleanup(void)
+{
+        xt_unregister_target(&ipt_dnat_reg);
+        xt_unregister_target(&ipt_snat_reg);
+        unregister_pernet_subsys(&nf_nat_rule_net_ops);
+}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
new file mode 100644
index 00000000000..e40cf7816fd
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -0,0 +1,561 @@
+/* SIP extension for NAT alteration.
+ *
+ * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
+ * based on RR's ip_nat_ftp.c and other modules.
+ * (C) 2007 United Security Providers
+ * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_sip.h>
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
+MODULE_DESCRIPTION("SIP NAT helper");
+MODULE_ALIAS("ip_nat_sip");
+static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
+                                  const char **dptr, unsigned int *datalen,
+                                  unsigned int matchoff, unsigned int matchlen,
+                                  const char *buffer, unsigned int buflen)
+{
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+        struct tcphdr *th;
+        unsigned int baseoff;
+        if (nf_ct_protonum(ct) == IPPROTO_TCP) {
+                th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
+                baseoff = ip_hdrlen(skb) + th->doff * 4;
+                matchoff += dataoff - baseoff;
+                if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+                                                matchoff, matchlen,
+                                                buffer, buflen, false))
+                        return 0;
+        } else {
+                baseoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+                matchoff += dataoff - baseoff;
+                if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
+                                              matchoff, matchlen,
+                                              buffer, buflen))
+                        return 0;
+        }
+        /* Reload data pointer and adjust datalen value */
+        *dptr = skb->data + dataoff;
+        *datalen += buflen - matchlen;
+        return 1;
+}
+static int map_addr(struct sk_buff *skb, unsigned int dataoff,
+                    const char **dptr, unsigned int *datalen,
+                    unsigned int matchoff, unsigned int matchlen,
+                    union nf_inet_addr *addr, __be16 port)
+{
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+        char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+        unsigned int buflen;
+        __be32 newaddr;
+        __be16 newport;
+        if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip &&
+            ct->tuplehash[dir].tuple.src.u.udp.port == port) {
+                newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+                newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
+        } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
+                   ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
+                newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
+                newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
+        } else
+                return 1;
+        if (newaddr == addr->ip && newport == port)
+                return 1;
+        buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport));
+        return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
+                             buffer, buflen);
+}
+static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff,
+                        const char **dptr, unsigned int *datalen,
+                        enum sip_header_types type)
+{
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+        unsigned int matchlen, matchoff;
+        union nf_inet_addr addr;
+        __be16 port;
+        if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
+                                    &matchoff, &matchlen, &addr, &port) <= 0)
+                return 1;
+        return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
+                        &addr, port);
+}
+static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
+                               const char **dptr, unsigned int *datalen)
+{
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+        unsigned int coff, matchoff, matchlen;
+        enum sip_header_types hdr;
+        union nf_inet_addr addr;
+        __be16 port;
+        int request, in_header;
+        /* Basic rules: requests and responses. */
+        if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
+                if (ct_sip_parse_request(ct, *dptr, *datalen,
+                                         &matchoff, &matchlen,
+                                         &addr, &port) > 0 &&
+                    !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
+                              &addr, port))
+                        return NF_DROP;
+                request = 1;
+        } else
+                request = 0;
+        if (nf_ct_protonum(ct) == IPPROTO_TCP)
+                hdr = SIP_HDR_VIA_TCP;
+        else
+                hdr = SIP_HDR_VIA_UDP;
+        /* Translate topmost Via header and parameters */
+        if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+                                    hdr, NULL, &matchoff, &matchlen,
+                                    &addr, &port) > 0) {
+                unsigned int matchend, poff, plen, buflen, n;
+                char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+                /* We're only interested in headers related to this
+                 * connection */
+                if (request) {
+                        if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip ||
+                            port != ct->tuplehash[dir].tuple.src.u.udp.port)
+                                goto next;
+                } else {
+                        if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip ||
+                            port != ct->tuplehash[dir].tuple.dst.u.udp.port)
+                                goto next;
+                }
+                if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
+                              &addr, port))
+                        return NF_DROP;
+                matchend = matchoff + matchlen;
+                /* The maddr= parameter (RFC 2361) specifies where to send
+                 * the reply. */
+                if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+                                               "maddr=", &poff, &plen,
+                                               &addr) > 0 &&
+                    addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+                    addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
+                        buflen = sprintf(buffer, "%pI4",
+                                        &ct->tuplehash[!dir].tuple.dst.u3.ip);
+                        if (!mangle_packet(skb, dataoff, dptr, datalen,
+                                           poff, plen, buffer, buflen))
+                                return NF_DROP;
+                }
+                /* The received= parameter (RFC 2361) contains the address
+                 * from which the server received the request. */
+                if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+                                               "received=", &poff, &plen,
+                                               &addr) > 0 &&
+                    addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
+                    addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
+                        buflen = sprintf(buffer, "%pI4",
+                                        &ct->tuplehash[!dir].tuple.src.u3.ip);
+                        if (!mangle_packet(skb, dataoff, dptr, datalen,
+                                           poff, plen, buffer, buflen))
+                                return NF_DROP;
+                }
+                /* The rport= parameter (RFC 3581) contains the port number
+                 * from which the server received the request. */
+                if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
+                                                 "rport=", &poff, &plen,
+                                                 &n) > 0 &&
+                    htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
+                    htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
+                        __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
+                        buflen = sprintf(buffer, "%u", ntohs(p));
+                        if (!mangle_packet(skb, dataoff, dptr, datalen,
+                                           poff, plen, buffer, buflen))
+                                return NF_DROP;
+                }
+        }
+next:
+        /* Translate Contact headers */
+        coff = 0;
+        in_header = 0;
+        while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
+                                       SIP_HDR_CONTACT, &in_header,
+                                       &matchoff, &matchlen,
+                                       &addr, &port) > 0) {
+                if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
+                              &addr, port))
+                        return NF_DROP;
+        }
+        if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) ||
+            !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
+                return NF_DROP;
+        return NF_ACCEPT;
+}
+static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
+{
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+        const struct tcphdr *th;
+        if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0)
+                return;
+        th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
+        nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
+}
+/* Handles expected signalling connections and media streams */
+static void ip_nat_sip_expected(struct nf_conn *ct,
+                                struct nf_conntrack_expect *exp)
+{
+        struct nf_nat_range range;
+        /* This must be a fresh one. */
+        BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+        /* For DST manip, map port here to where it's expected. */
+        range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+        range.min = range.max = exp->saved_proto;
+        range.min_ip = range.max_ip = exp->saved_ip;
+        nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+        /* Change src to where master sends to, but only if the connection
+         * actually came from the same source. */
+        if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
+            ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
+                range.flags = IP_NAT_RANGE_MAP_IPS;
+                range.min_ip = range.max_ip
+                        = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+                nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+        }
+}
+static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
+                                      const char **dptr, unsigned int *datalen,
+                                      struct nf_conntrack_expect *exp,
+                                      unsigned int matchoff,
+                                      unsigned int matchlen)
+{
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+        __be32 newip;
+        u_int16_t port;
+        char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+        unsigned buflen;
+        /* Connection will come from reply */
+        if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
+                newip = exp->tuple.dst.u3.ip;
+        else
+                newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+        /* If the signalling port matches the connection's source port in the
+         * original direction, try to use the destination port in the opposite
+         * direction. */
+        if (exp->tuple.dst.u.udp.port ==
+            ct->tuplehash[dir].tuple.src.u.udp.port)
+                port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
+        else
+                port = ntohs(exp->tuple.dst.u.udp.port);
+        exp->saved_ip = exp->tuple.dst.u3.ip;
+        exp->tuple.dst.u3.ip = newip;
+        exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
+        exp->dir = !dir;
+        exp->expectfn = ip_nat_sip_expected;
+        for (; port != 0; port++) {
+                int ret;
+                exp->tuple.dst.u.udp.port = htons(port);
+                ret = nf_ct_expect_related(exp);
+                if (ret == 0)
+                        break;
+                else if (ret != -EBUSY) {
+                        port = 0;
+                        break;
+                }
+        }
+        if (port == 0)
+                return NF_DROP;
+        if (exp->tuple.dst.u3.ip != exp->saved_ip ||
+            exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
+                buflen = sprintf(buffer, "%pI4:%u", &newip, port);
+                if (!mangle_packet(skb, dataoff, dptr, datalen,
+                                   matchoff, matchlen, buffer, buflen))
+                        goto err;
+        }
+        return NF_ACCEPT;
+err:
+        nf_ct_unexpect_related(exp);
+        return NF_DROP;
+}
+static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff,
+                              const char **dptr, unsigned int *datalen)
+{
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+        unsigned int matchoff, matchlen;
+        char buffer[sizeof("65536")];
+        int buflen, c_len;
+        /* Get actual SDP length */
+        if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
+                                  SDP_HDR_VERSION, SDP_HDR_UNSPEC,
+                                  &matchoff, &matchlen) <= 0)
+                return 0;
+        c_len = *datalen - matchoff + strlen("v=");
+        /* Now, update SDP length */
+        if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
+                              &matchoff, &matchlen) <= 0)
+                return 0;
+        buflen = sprintf(buffer, "%u", c_len);
+        return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
+                             buffer, buflen);
+}
+static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff,
+                             const char **dptr, unsigned int *datalen,
+                             unsigned int sdpoff,
+                             enum sdp_header_types type,
+                             enum sdp_header_types term,
+                             char *buffer, int buflen)
+{
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+        unsigned int matchlen, matchoff;
+        if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
+                                  &matchoff, &matchlen) <= 0)
+                return -ENOENT;
+        return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
+                             buffer, buflen) ? 0 : -EINVAL;
+}
+static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff,
+                                    const char **dptr, unsigned int *datalen,
+                                    unsigned int sdpoff,
+                                    enum sdp_header_types type,
+                                    enum sdp_header_types term,
+                                    const union nf_inet_addr *addr)
+{
+        char buffer[sizeof("nnn.nnn.nnn.nnn")];
+        unsigned int buflen;
+        buflen = sprintf(buffer, "%pI4", &addr->ip);
+        if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term,
+                              buffer, buflen))
+                return 0;
+        return mangle_content_len(skb, dataoff, dptr, datalen);
+}
+static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff,
+                                    const char **dptr, unsigned int *datalen,
+                                    unsigned int matchoff,
+                                    unsigned int matchlen,
+                                    u_int16_t port)
+{
+        char buffer[sizeof("nnnnn")];
+        unsigned int buflen;
+        buflen = sprintf(buffer, "%u", port);
+        if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
+                           buffer, buflen))
+                return 0;
+        return mangle_content_len(skb, dataoff, dptr, datalen);
+}
+static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff,
+                                       const char **dptr, unsigned int *datalen,
+                                       unsigned int sdpoff,
+                                       const union nf_inet_addr *addr)
+{
+        char buffer[sizeof("nnn.nnn.nnn.nnn")];
+        unsigned int buflen;
+        /* Mangle session description owner and contact addresses */
+        buflen = sprintf(buffer, "%pI4", &addr->ip);
+        if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
+                               SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
+                               buffer, buflen))
+                return 0;
+        switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
+                                  SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
+                                  buffer, buflen)) {
+        case 0:
+        /*
+         * RFC 2327:
+         *
+         * Session description
+         *
+         * c=* (connection information - not required if included in all media)
+         */
+        case -ENOENT:
+                break;
+        default:
+                return 0;
+        }
+        return mangle_content_len(skb, dataoff, dptr, datalen);
+}
+/* So, this packet has hit the connection tracking matching code.
+   Mangle it, and change the expectation to match the new version. */
+static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
+                                     const char **dptr, unsigned int *datalen,
+                                     struct nf_conntrack_expect *rtp_exp,
+                                     struct nf_conntrack_expect *rtcp_exp,
+                                     unsigned int mediaoff,
+                                     unsigned int medialen,
+                                     union nf_inet_addr *rtp_addr)
+{
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+        u_int16_t port;
+        /* Connection will come from reply */
+        if (ct->tuplehash[dir].tuple.src.u3.ip ==
+            ct->tuplehash[!dir].tuple.dst.u3.ip)
+                rtp_addr->ip = rtp_exp->tuple.dst.u3.ip;
+        else
+                rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+        rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip;
+        rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
+        rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
+        rtp_exp->dir = !dir;
+        rtp_exp->expectfn = ip_nat_sip_expected;
+        rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip;
+        rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
+        rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
+        rtcp_exp->dir = !dir;
+        rtcp_exp->expectfn = ip_nat_sip_expected;
+        /* Try to get same pair of ports: if not, try to change them. */
+        for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
+             port != 0; port += 2) {
+                int ret;
+                rtp_exp->tuple.dst.u.udp.port = htons(port);
+                ret = nf_ct_expect_related(rtp_exp);
+                if (ret == -EBUSY)
+                        continue;
+                else if (ret < 0) {
+                        port = 0;
+                        break;
+                }
+                rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
+                ret = nf_ct_expect_related(rtcp_exp);
+                if (ret == 0)
+                        break;
+                else if (ret != -EBUSY) {
+                        nf_ct_unexpect_related(rtp_exp);
+                        port = 0;
+                        break;
+                }
+        }
+        if (port == 0)
+                goto err1;
+        /* Update media port. */
+        if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
+            !ip_nat_sdp_port(skb, dataoff, dptr, datalen,
+                             mediaoff, medialen, port))
+                goto err2;
+        return NF_ACCEPT;
+err2:
+        nf_ct_unexpect_related(rtp_exp);
+        nf_ct_unexpect_related(rtcp_exp);
+err1:
+        return NF_DROP;
+}
+static void __exit nf_nat_sip_fini(void)
+{
+        rcu_assign_pointer(nf_nat_sip_hook, NULL);
+        rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL);
+        rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
+        rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
+        rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
+        rcu_assign_pointer(nf_nat_sdp_session_hook, NULL);
+        rcu_assign_pointer(nf_nat_sdp_media_hook, NULL);
+        synchronize_rcu();
+}
+static int __init nf_nat_sip_init(void)
+{
+        BUG_ON(nf_nat_sip_hook != NULL);
+        BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
+        BUG_ON(nf_nat_sip_expect_hook != NULL);
+        BUG_ON(nf_nat_sdp_addr_hook != NULL);
+        BUG_ON(nf_nat_sdp_port_hook != NULL);
+        BUG_ON(nf_nat_sdp_session_hook != NULL);
+        BUG_ON(nf_nat_sdp_media_hook != NULL);
+        rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
+        rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
+        rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
+        rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
+        rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
+        rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session);
+        rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media);
+        return 0;
+}
+module_init(nf_nat_sip_init);
+module_exit(nf_nat_sip_fini);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
new file mode 100644
index 00000000000..a6e606e8482
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -0,0 +1,326 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/icmp.h>
+#include <linux/gfp.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/spinlock.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#ifdef CONFIG_XFRM
+static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+        struct flowi4 *fl4 = &fl->u.ip4;
+        const struct nf_conn *ct;
+        const struct nf_conntrack_tuple *t;
+        enum ip_conntrack_info ctinfo;
+        enum ip_conntrack_dir dir;
+        unsigned long statusbit;
+        ct = nf_ct_get(skb, &ctinfo);
+        if (ct == NULL)
+                return;
+        dir = CTINFO2DIR(ctinfo);
+        t = &ct->tuplehash[dir].tuple;
+        if (dir == IP_CT_DIR_ORIGINAL)
+                statusbit = IPS_DST_NAT;
+        else
+                statusbit = IPS_SRC_NAT;
+        if (ct->status & statusbit) {
+                fl4->daddr = t->dst.u3.ip;
+                if (t->dst.protonum == IPPROTO_TCP ||
+                    t->dst.protonum == IPPROTO_UDP ||
+                    t->dst.protonum == IPPROTO_UDPLITE ||
+                    t->dst.protonum == IPPROTO_DCCP ||
+                    t->dst.protonum == IPPROTO_SCTP)
+                        fl4->fl4_dport = t->dst.u.tcp.port;
+        }
+        statusbit ^= IPS_NAT_MASK;
+        if (ct->status & statusbit) {
+                fl4->saddr = t->src.u3.ip;
+                if (t->dst.protonum == IPPROTO_TCP ||
+                    t->dst.protonum == IPPROTO_UDP ||
+                    t->dst.protonum == IPPROTO_UDPLITE ||
+                    t->dst.protonum == IPPROTO_DCCP ||
+                    t->dst.protonum == IPPROTO_SCTP)
+                        fl4->fl4_sport = t->src.u.tcp.port;
+        }
+}
+#endif
+static unsigned int
+nf_nat_fn(unsigned int hooknum,
+          struct sk_buff *skb,
+          const struct net_device *in,
+          const struct net_device *out,
+          int (*okfn)(struct sk_buff *))
+{
+        struct nf_conn *ct;
+        enum ip_conntrack_info ctinfo;
+        struct nf_conn_nat *nat;
+        /* maniptype == SRC for postrouting. */
+        enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+        /* We never see fragments: conntrack defrags on pre-routing
+           and local-out, and nf_nat_out protects post-routing. */
+        NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
+        ct = nf_ct_get(skb, &ctinfo);
+        /* Can't track?  It's not due to stress, or conntrack would
+           have dropped it.  Hence it's the user's responsibilty to
+           packet filter it out, or implement conntrack/NAT for that
+           protocol. 8) --RR */
+        if (!ct)
+                return NF_ACCEPT;
+        /* Don't try to NAT if this packet is not conntracked */
+        if (nf_ct_is_untracked(ct))
+                return NF_ACCEPT;
+        nat = nfct_nat(ct);
+        if (!nat) {
+                /* NAT module was loaded late. */
+                if (nf_ct_is_confirmed(ct))
+                        return NF_ACCEPT;
+                nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+                if (nat == NULL) {
+                        pr_debug("failed to add NAT extension\n");
+                        return NF_ACCEPT;
+                }
+        }
+        switch (ctinfo) {
+        case IP_CT_RELATED:
+        case IP_CT_RELATED_REPLY:
+                if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+                        if (!nf_nat_icmp_reply_translation(ct, ctinfo,
+                                                           hooknum, skb))
+                                return NF_DROP;
+                        else
+                                return NF_ACCEPT;
+                }
+                /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+        case IP_CT_NEW:
+                /* Seen it before?  This can happen for loopback, retrans,
+                   or local packets.. */
+                if (!nf_nat_initialized(ct, maniptype)) {
+                        unsigned int ret;
+                        ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+                        if (ret != NF_ACCEPT)
+                                return ret;
+                } else
+                        pr_debug("Already setup manip %s for ct %p\n",
+                                 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+                                 ct);
+                break;
+        default:
+                /* ESTABLISHED */
+                NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+                             ctinfo == IP_CT_ESTABLISHED_REPLY);
+        }
+        return nf_nat_packet(ct, ctinfo, hooknum, skb);
+}
+static unsigned int
+nf_nat_in(unsigned int hooknum,
+          struct sk_buff *skb,
+          const struct net_device *in,
+          const struct net_device *out,
+          int (*okfn)(struct sk_buff *))
+{
+        unsigned int ret;
+        __be32 daddr = ip_hdr(skb)->daddr;
+        ret = nf_nat_fn(hooknum, skb, in, out, okfn);
+        if (ret != NF_DROP && ret != NF_STOLEN &&
+            daddr != ip_hdr(skb)->daddr)
+                skb_dst_drop(skb);
+        return ret;
+}
+static unsigned int
+nf_nat_out(unsigned int hooknum,
+           struct sk_buff *skb,
+           const struct net_device *in,
+           const struct net_device *out,
+           int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+        const struct nf_conn *ct;
+        enum ip_conntrack_info ctinfo;
+#endif
+        unsigned int ret;
+        /* root is playing with raw sockets. */
+        if (skb->len < sizeof(struct iphdr) ||
+            ip_hdrlen(skb) < sizeof(struct iphdr))
+                return NF_ACCEPT;
+        ret = nf_nat_fn(hooknum, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+        if (ret != NF_DROP && ret != NF_STOLEN &&
+            (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+                if ((ct->tuplehash[dir].tuple.src.u3.ip !=
+                     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
+                    (ct->tuplehash[dir].tuple.src.u.all !=
+                     ct->tuplehash[!dir].tuple.dst.u.all)
+                   )
+                        return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
+        }
+#endif
+        return ret;
+}
+static unsigned int
+nf_nat_local_fn(unsigned int hooknum,
+                struct sk_buff *skb,
+                const struct net_device *in,
+                const struct net_device *out,
+                int (*okfn)(struct sk_buff *))
+{
+        const struct nf_conn *ct;
+        enum ip_conntrack_info ctinfo;
+        unsigned int ret;
+        /* root is playing with raw sockets. */
+        if (skb->len < sizeof(struct iphdr) ||
+            ip_hdrlen(skb) < sizeof(struct iphdr))
+                return NF_ACCEPT;
+        ret = nf_nat_fn(hooknum, skb, in, out, okfn);
+        if (ret != NF_DROP && ret != NF_STOLEN &&
+            (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+                enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+                if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+                    ct->tuplehash[!dir].tuple.src.u3.ip) {
+                        if (ip_route_me_harder(skb, RTN_UNSPEC))
+                                ret = NF_DROP;
+                }
+#ifdef CONFIG_XFRM
+                else if (ct->tuplehash[dir].tuple.dst.u.all !=
+                         ct->tuplehash[!dir].tuple.src.u.all)
+                        if (ip_xfrm_me_harder(skb))
+                                ret = NF_DROP;
+#endif
+        }
+        return ret;
+}
+/* We must be after connection tracking and before packet filtering. */
+static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
+        /* Before packet filtering, change destination */
+        {
+                .hook           = nf_nat_in,
+                .owner          = THIS_MODULE,
+                .pf             = NFPROTO_IPV4,
+                .hooknum        = NF_INET_PRE_ROUTING,
+                .priority       = NF_IP_PRI_NAT_DST,
+        },
+        /* After packet filtering, change source */
+        {
+                .hook           = nf_nat_out,
+                .owner          = THIS_MODULE,
+                .pf             = NFPROTO_IPV4,
+                .hooknum        = NF_INET_POST_ROUTING,
+                .priority       = NF_IP_PRI_NAT_SRC,
+        },
+        /* Before packet filtering, change destination */
+        {
+                .hook           = nf_nat_local_fn,
+                .owner          = THIS_MODULE,
+                .pf             = NFPROTO_IPV4,
+                .hooknum        = NF_INET_LOCAL_OUT,
+                .priority       = NF_IP_PRI_NAT_DST,
+        },
+        /* After packet filtering, change source */
+        {
+                .hook           = nf_nat_fn,
+                .owner          = THIS_MODULE,
+                .pf             = NFPROTO_IPV4,
+                .hooknum        = NF_INET_LOCAL_IN,
+                .priority       = NF_IP_PRI_NAT_SRC,
+        },
+};
+static int __init nf_nat_standalone_init(void)
+{
+        int ret = 0;
+        need_ipv4_conntrack();
+#ifdef CONFIG_XFRM
+        BUG_ON(ip_nat_decode_session != NULL);
+        rcu_assign_pointer(ip_nat_decode_session, nat_decode_session);
+#endif
+        ret = nf_nat_rule_init();
+        if (ret < 0) {
+                pr_err("nf_nat_init: can't setup rules.\n");
+                goto cleanup_decode_session;
+        }
+        ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
+        if (ret < 0) {
+                pr_err("nf_nat_init: can't register hooks.\n");
+                goto cleanup_rule_init;
+        }
+        return ret;
+ cleanup_rule_init:
+        nf_nat_rule_cleanup();
+ cleanup_decode_session:
+#ifdef CONFIG_XFRM
+        rcu_assign_pointer(ip_nat_decode_session, NULL);
+        synchronize_net();
+#endif
+        return ret;
+}
+static void __exit nf_nat_standalone_fini(void)
+{
+        nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
+        nf_nat_rule_cleanup();
+#ifdef CONFIG_XFRM
+        rcu_assign_pointer(ip_nat_decode_session, NULL);
+        synchronize_net();
+#endif
+        /* Conntrack caches are unregistered in nf_conntrack_cleanup */
+}
+module_init(nf_nat_standalone_init);
+module_exit(nf_nat_standalone_fini);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat");
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
new file mode 100644
index 00000000000..7274a43c7a1
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -0,0 +1,51 @@
+/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/udp.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_tftp.h>
+MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
+MODULE_DESCRIPTION("TFTP NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_tftp");
+static unsigned int help(struct sk_buff *skb,
+                         enum ip_conntrack_info ctinfo,
+                         struct nf_conntrack_expect *exp)
+{
+        const struct nf_conn *ct = exp->master;
+        exp->saved_proto.udp.port
+                = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+        exp->dir = IP_CT_DIR_REPLY;
+        exp->expectfn = nf_nat_follow_master;
+        if (nf_ct_expect_related(exp) != 0)
+                return NF_DROP;
+        return NF_ACCEPT;
+}
+static void __exit nf_nat_tftp_fini(void)
+{
+        rcu_assign_pointer(nf_nat_tftp_hook, NULL);
+        synchronize_rcu();
+}
+static int __init nf_nat_tftp_init(void)
+{
+        BUG_ON(nf_nat_tftp_hook != NULL);
+        rcu_assign_pointer(nf_nat_tftp_hook, help);
+        return 0;
+}
+module_init(nf_nat_tftp_init);
+module_exit(nf_nat_tftp_fini);
diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c
new file mode 100644
index 00000000000..0cbbf10026a
--- /dev/null
+++ b/net/ipv4/sysfs_net_ipv4.c
@@ -0,0 +1,88 @@
+/*
+ * net/ipv4/sysfs_net_ipv4.c
+ *
+ * sysfs-based networking knobs (so we can, unlike with sysctl, control perms)
+ *
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * Robert Love <rlove@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <net/tcp.h>
+#define CREATE_IPV4_FILE(_name, _var) \
+static ssize_t _name##_show(struct kobject *kobj, \
+                            struct kobj_attribute *attr, char *buf) \
+{ \
+        return sprintf(buf, "%d\n", _var); \
+} \
+static ssize_t _name##_store(struct kobject *kobj, \
+                             struct kobj_attribute *attr, \
+                             const char *buf, size_t count) \
+{ \
+        int val, ret; \
+        ret = sscanf(buf, "%d", &val); \
+        if (ret != 1) \
+                return -EINVAL; \
+        if (val < 0) \
+                return -EINVAL; \
+        _var = val; \
+        return count; \
+} \
+static struct kobj_attribute _name##_attr = \
+        __ATTR(_name, 0644, _name##_show, _name##_store)
+CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]);
+CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]);
+CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]);
+CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]);
+CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]);
+CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]);
+static struct attribute *ipv4_attrs[] = {
+        &tcp_wmem_min_attr.attr,
+        &tcp_wmem_def_attr.attr,
+        &tcp_wmem_max_attr.attr,
+        &tcp_rmem_min_attr.attr,
+        &tcp_rmem_def_attr.attr,
+        &tcp_rmem_max_attr.attr,
+        NULL
+};
+static struct attribute_group ipv4_attr_group = {
+        .attrs = ipv4_attrs,
+};
+static __init int sysfs_ipv4_init(void)
+{
+        struct kobject *ipv4_kobject;
+        int ret;
+        ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj);
+        if (!ipv4_kobject)
+                return -ENOMEM;
+        ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group);
+        if (ret) {
+                kobject_put(ipv4_kobject);
+                return ret;
+        }
+        return 0;
+}
+subsys_initcall(sysfs_ipv4_init);
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
new file mode 100644
index 00000000000..e63c3972a73
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -0,0 +1,638 @@
+/*
+ * This is a module which is used for queueing IPv6 packets and
+ * communicating with userspace via netlink.
+ *
+ * (C) 2001 Fernando Anton, this code is GPL.
+ *     IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
+ *     Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
+ *     Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
+ *     email: fanton@it.uc3m.es
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ipv6.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netfilter/nf_queue.h>
+#include <linux/netfilter_ipv4/ip_queue.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#define IPQ_QMAX_DEFAULT 1024
+#define IPQ_PROC_FS_NAME "ip6_queue"
+#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
+typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
+static DEFINE_SPINLOCK(queue_lock);
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
+static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
+static struct sock *ipqnl __read_mostly;
+static LIST_HEAD(queue_list);
+static DEFINE_MUTEX(ipqnl_mutex);
+static inline void
+__ipq_enqueue_entry(struct nf_queue_entry *entry)
+{
+       list_add_tail(&entry->list, &queue_list);
+       queue_total++;
+}
+static inline int
+__ipq_set_mode(unsigned char mode, unsigned int range)
+{
+        int status = 0;
+        switch(mode) {
+        case IPQ_COPY_NONE:
+        case IPQ_COPY_META:
+                copy_mode = mode;
+                copy_range = 0;
+                break;
+        case IPQ_COPY_PACKET:
+                if (range > 0xFFFF)
+                        range = 0xFFFF;
+                copy_range = range;
+                copy_mode = mode;
+                break;
+        default:
+                status = -EINVAL;
+        }
+        return status;
+}
+static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
+static inline void
+__ipq_reset(void)
+{
+        peer_pid = 0;
+        net_disable_timestamp();
+        __ipq_set_mode(IPQ_COPY_NONE, 0);
+        __ipq_flush(NULL, 0);
+}
+static struct nf_queue_entry *
+ipq_find_dequeue_entry(unsigned long id)
+{
+        struct nf_queue_entry *entry = NULL, *i;
+        spin_lock_bh(&queue_lock);
+        list_for_each_entry(i, &queue_list, list) {
+                if ((unsigned long)i == id) {
+                        entry = i;
+                        break;
+                }
+        }
+        if (entry) {
+                list_del(&entry->list);
+                queue_total--;
+        }
+        spin_unlock_bh(&queue_lock);
+        return entry;
+}
+static void
+__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+        struct nf_queue_entry *entry, *next;
+        list_for_each_entry_safe(entry, next, &queue_list, list) {
+                if (!cmpfn || cmpfn(entry, data)) {
+                        list_del(&entry->list);
+                        queue_total--;
+                        nf_reinject(entry, NF_DROP);
+                }
+        }
+}
+static void
+ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
+{
+        spin_lock_bh(&queue_lock);
+        __ipq_flush(cmpfn, data);
+        spin_unlock_bh(&queue_lock);
+}
+static struct sk_buff *
+ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
+{
+        sk_buff_data_t old_tail;
+        size_t size = 0;
+        size_t data_len = 0;
+        struct sk_buff *skb;
+        struct ipq_packet_msg *pmsg;
+        struct nlmsghdr *nlh;
+        struct timeval tv;
+        switch (ACCESS_ONCE(copy_mode)) {
+        case IPQ_COPY_META:
+        case IPQ_COPY_NONE:
+                size = NLMSG_SPACE(sizeof(*pmsg));
+                break;
+        case IPQ_COPY_PACKET:
+                if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
+                    (*errp = skb_checksum_help(entry->skb)))
+                        return NULL;
+                data_len = ACCESS_ONCE(copy_range);
+                if (data_len == 0 || data_len > entry->skb->len)
+                        data_len = entry->skb->len;
+                size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
+                break;
+        default:
+                *errp = -EINVAL;
+                return NULL;
+        }
+        skb = alloc_skb(size, GFP_ATOMIC);
+        if (!skb)
+                goto nlmsg_failure;
+        old_tail = skb->tail;
+        nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
+        pmsg = NLMSG_DATA(nlh);
+        memset(pmsg, 0, sizeof(*pmsg));
+        pmsg->packet_id       = (unsigned long )entry;
+        pmsg->data_len        = data_len;
+        tv = ktime_to_timeval(entry->skb->tstamp);
+        pmsg->timestamp_sec   = tv.tv_sec;
+        pmsg->timestamp_usec  = tv.tv_usec;
+        pmsg->mark            = entry->skb->mark;
+        pmsg->hook            = entry->hook;
+        pmsg->hw_protocol     = entry->skb->protocol;
+        if (entry->indev)
+                strcpy(pmsg->indev_name, entry->indev->name);
+        else
+                pmsg->indev_name[0] = '\0';
+        if (entry->outdev)
+                strcpy(pmsg->outdev_name, entry->outdev->name);
+        else
+                pmsg->outdev_name[0] = '\0';
+        if (entry->indev && entry->skb->dev &&
+            entry->skb->mac_header != entry->skb->network_header) {
+                pmsg->hw_type = entry->skb->dev->type;
+                pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
+        }
+        if (data_len)
+                if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
+                        BUG();
+        nlh->nlmsg_len = skb->tail - old_tail;
+        return skb;
+nlmsg_failure:
+        kfree_skb(skb);
+        *errp = -EINVAL;
+        printk(KERN_ERR "ip6_queue: error creating packet message\n");
+        return NULL;
+}
+static int
+ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+{
+        int status = -EINVAL;
+        struct sk_buff *nskb;
+        if (copy_mode == IPQ_COPY_NONE)
+                return -EAGAIN;
+        nskb = ipq_build_packet_message(entry, &status);
+        if (nskb == NULL)
+                return status;
+        spin_lock_bh(&queue_lock);
+        if (!peer_pid)
+                goto err_out_free_nskb;
+        if (queue_total >= queue_maxlen) {
+                queue_dropped++;
+                status = -ENOSPC;
+                if (net_ratelimit())
+                        printk (KERN_WARNING "ip6_queue: fill at %d entries, "
+                                "dropping packet(s).  Dropped: %d\n", queue_total,
+                                queue_dropped);
+                goto err_out_free_nskb;
+        }
+        /* netlink_unicast will either free the nskb or attach it to a socket */
+        status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
+        if (status < 0) {
+                queue_user_dropped++;
+                goto err_out_unlock;
+        }
+        __ipq_enqueue_entry(entry);
+        spin_unlock_bh(&queue_lock);
+        return status;
+err_out_free_nskb:
+        kfree_skb(nskb);
+err_out_unlock:
+        spin_unlock_bh(&queue_lock);
+        return status;
+}
+static int
+ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
+{
+        int diff;
+        struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
+        struct sk_buff *nskb;
+        if (v->data_len < sizeof(*user_iph))
+                return 0;
+        diff = v->data_len - e->skb->len;
+        if (diff < 0) {
+                if (pskb_trim(e->skb, v->data_len))
+                        return -ENOMEM;
+        } else if (diff > 0) {
+                if (v->data_len > 0xFFFF)
+                        return -EINVAL;
+                if (diff > skb_tailroom(e->skb)) {
+                        nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
+                                               diff, GFP_ATOMIC);
+                        if (!nskb) {
+                                printk(KERN_WARNING "ip6_queue: OOM "
+                                      "in mangle, dropping packet\n");
+                                return -ENOMEM;
+                        }
+                        kfree_skb(e->skb);
+                        e->skb = nskb;
+                }
+                skb_put(e->skb, diff);
+        }
+        if (!skb_make_writable(e->skb, v->data_len))
+                return -ENOMEM;
+        skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
+        e->skb->ip_summed = CHECKSUM_NONE;
+        return 0;
+}
+static int
+ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
+{
+        struct nf_queue_entry *entry;
+        if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
+                return -EINVAL;
+        entry = ipq_find_dequeue_entry(vmsg->id);
+        if (entry == NULL)
+                return -ENOENT;
+        else {
+                int verdict = vmsg->value;
+                if (vmsg->data_len && vmsg->data_len == len)
+                        if (ipq_mangle_ipv6(vmsg, entry) < 0)
+                                verdict = NF_DROP;
+                nf_reinject(entry, verdict);
+                return 0;
+        }
+}
+static int
+ipq_set_mode(unsigned char mode, unsigned int range)
+{
+        int status;
+        spin_lock_bh(&queue_lock);
+        status = __ipq_set_mode(mode, range);
+        spin_unlock_bh(&queue_lock);
+        return status;
+}
+static int
+ipq_receive_peer(struct ipq_peer_msg *pmsg,
+                 unsigned char type, unsigned int len)
+{
+        int status = 0;
+        if (len < sizeof(*pmsg))
+                return -EINVAL;
+        switch (type) {
+        case IPQM_MODE:
+                status = ipq_set_mode(pmsg->msg.mode.value,
+                                      pmsg->msg.mode.range);
+                break;
+        case IPQM_VERDICT:
+                status = ipq_set_verdict(&pmsg->msg.verdict,
+                                         len - sizeof(*pmsg));
+                break;
+        default:
+                status = -EINVAL;
+        }
+        return status;
+}
+static int
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
+{
+        if (entry->indev)
+                if (entry->indev->ifindex == ifindex)
+                        return 1;
+        if (entry->outdev)
+                if (entry->outdev->ifindex == ifindex)
+                        return 1;
+#ifdef CONFIG_BRIDGE_NETFILTER
+        if (entry->skb->nf_bridge) {
+                if (entry->skb->nf_bridge->physindev &&
+                    entry->skb->nf_bridge->physindev->ifindex == ifindex)
+                        return 1;
+                if (entry->skb->nf_bridge->physoutdev &&
+                    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+                        return 1;
+        }
+#endif
+        return 0;
+}
+static void
+ipq_dev_drop(int ifindex)
+{
+        ipq_flush(dev_cmp, ifindex);
+}
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+static inline void
+__ipq_rcv_skb(struct sk_buff *skb)
+{
+        int status, type, pid, flags;
+        unsigned int nlmsglen, skblen;
+        struct nlmsghdr *nlh;
+        skblen = skb->len;
+        if (skblen < sizeof(*nlh))
+                return;
+        nlh = nlmsg_hdr(skb);
+        nlmsglen = nlh->nlmsg_len;
+        if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
+                return;
+        pid = nlh->nlmsg_pid;
+        flags = nlh->nlmsg_flags;
+        if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
+                RCV_SKB_FAIL(-EINVAL);
+        if (flags & MSG_TRUNC)
+                RCV_SKB_FAIL(-ECOMM);
+        type = nlh->nlmsg_type;
+        if (type < NLMSG_NOOP || type >= IPQM_MAX)
+                RCV_SKB_FAIL(-EINVAL);
+        if (type <= IPQM_BASE)
+                return;
+        if (security_netlink_recv(skb, CAP_NET_ADMIN))
+                RCV_SKB_FAIL(-EPERM);
+        spin_lock_bh(&queue_lock);
+        if (peer_pid) {
+                if (peer_pid != pid) {
+                        spin_unlock_bh(&queue_lock);
+                        RCV_SKB_FAIL(-EBUSY);
+                }
+        } else {
+                net_enable_timestamp();
+                peer_pid = pid;
+        }
+        spin_unlock_bh(&queue_lock);
+        status = ipq_receive_peer(NLMSG_DATA(nlh), type,
+                                  nlmsglen - NLMSG_LENGTH(0));
+        if (status < 0)
+                RCV_SKB_FAIL(status);
+        if (flags & NLM_F_ACK)
+                netlink_ack(skb, nlh, 0);
+}
+static void
+ipq_rcv_skb(struct sk_buff *skb)
+{
+        mutex_lock(&ipqnl_mutex);
+        __ipq_rcv_skb(skb);
+        mutex_unlock(&ipqnl_mutex);
+}
+static int
+ipq_rcv_dev_event(struct notifier_block *this,
+                  unsigned long event, void *ptr)
+{
+        struct net_device *dev = ptr;
+        if (!net_eq(dev_net(dev), &init_net))
+                return NOTIFY_DONE;
+        /* Drop any packets associated with the downed device */
+        if (event == NETDEV_DOWN)
+                ipq_dev_drop(dev->ifindex);
+        return NOTIFY_DONE;
+}
+static struct notifier_block ipq_dev_notifier = {
+        .notifier_call  = ipq_rcv_dev_event,
+};
+static int
+ipq_rcv_nl_event(struct notifier_block *this,
+                 unsigned long event, void *ptr)
+{
+        struct netlink_notify *n = ptr;
+        if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
+                spin_lock_bh(&queue_lock);
+                if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
+                        __ipq_reset();
+                spin_unlock_bh(&queue_lock);
+        }
+        return NOTIFY_DONE;
+}
+static struct notifier_block ipq_nl_notifier = {
+        .notifier_call  = ipq_rcv_nl_event,
+};
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *ipq_sysctl_header;
+static ctl_table ipq_table[] = {
+        {
+                .procname       = NET_IPQ_QMAX_NAME,
+                .data           = &queue_maxlen,
+                .maxlen         = sizeof(queue_maxlen),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec
+        },
+        { }
+};
+#endif
+#ifdef CONFIG_PROC_FS
+static int ip6_queue_show(struct seq_file *m, void *v)
+{
+        spin_lock_bh(&queue_lock);
+        seq_printf(m,
+                      "Peer PID          : %d\n"
+                      "Copy mode         : %hu\n"
+                      "Copy range        : %u\n"
+                      "Queue length      : %u\n"
+                      "Queue max. length : %u\n"
+                      "Queue dropped     : %u\n"
+                      "Netfilter dropped : %u\n",
+                      peer_pid,
+                      copy_mode,
+                      copy_range,
+                      queue_total,
+                      queue_maxlen,
+                      queue_dropped,
+                      queue_user_dropped);
+        spin_unlock_bh(&queue_lock);
+        return 0;
+}
+static int ip6_queue_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, ip6_queue_show, NULL);
+}
+static const struct file_operations ip6_queue_proc_fops = {
+        .open           = ip6_queue_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+        .owner          = THIS_MODULE,
+};
+#endif
+static const struct nf_queue_handler nfqh = {
+        .name   = "ip6_queue",
+        .outfn  = &ipq_enqueue_packet,
+};
+static int __init ip6_queue_init(void)
+{
+        int status = -ENOMEM;
+        struct proc_dir_entry *proc __maybe_unused;
+        netlink_register_notifier(&ipq_nl_notifier);
+        ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
+                                      ipq_rcv_skb, NULL, THIS_MODULE);
+        if (ipqnl == NULL) {
+                printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
+                goto cleanup_netlink_notifier;
+        }
+#ifdef CONFIG_PROC_FS
+        proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
+                           &ip6_queue_proc_fops);
+        if (!proc) {
+                printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
+                goto cleanup_ipqnl;
+        }
+#endif
+        register_netdevice_notifier(&ipq_dev_notifier);
+#ifdef CONFIG_SYSCTL
+        ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
+#endif
+        status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
+        if (status < 0) {
+                printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
+                goto cleanup_sysctl;
+        }
+        return status;
+cleanup_sysctl:
+#ifdef CONFIG_SYSCTL
+        unregister_sysctl_table(ipq_sysctl_header);
+#endif
+        unregister_netdevice_notifier(&ipq_dev_notifier);
+        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+cleanup_ipqnl: __maybe_unused
+        netlink_kernel_release(ipqnl);
+        mutex_lock(&ipqnl_mutex);
+        mutex_unlock(&ipqnl_mutex);
+cleanup_netlink_notifier:
+        netlink_unregister_notifier(&ipq_nl_notifier);
+        return status;
+}
+static void __exit ip6_queue_fini(void)
+{
+        nf_unregister_queue_handlers(&nfqh);
+        ipq_flush(NULL, 0);
+#ifdef CONFIG_SYSCTL
+        unregister_sysctl_table(ipq_sysctl_header);
+#endif
+        unregister_netdevice_notifier(&ipq_dev_notifier);
+        proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+        netlink_kernel_release(ipqnl);
+        mutex_lock(&ipqnl_mutex);
+        mutex_unlock(&ipqnl_mutex);
+        netlink_unregister_notifier(&ipq_nl_notifier);
+}
+MODULE_DESCRIPTION("IPv6 packet queue handler");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);
+module_init(ip6_queue_init);
+module_exit(ip6_queue_fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
new file mode 100644
index 00000000000..e6af8d72f26
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -0,0 +1,527 @@
+/*
+ * This is a module which is used for logging packets.
+ */
+/* (C) 2001 Jan Rekorajski <baggins@pld.org.pl>
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/spinlock.h>
+#include <linux/icmpv6.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <net/netfilter/xt_log.h>
+MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
+MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog");
+MODULE_LICENSE("GPL");
+struct in_device;
+#include <net/route.h>
+#include <linux/netfilter_ipv6/ip6t_LOG.h>
+/* One level of recursion won't kill us */
+static void dump_packet(struct sbuff *m,
+                        const struct nf_loginfo *info,
+                        const struct sk_buff *skb, unsigned int ip6hoff,
+                        int recurse)
+{
+        u_int8_t currenthdr;
+        int fragment;
+        struct ipv6hdr _ip6h;
+        const struct ipv6hdr *ih;
+        unsigned int ptr;
+        unsigned int hdrlen = 0;
+        unsigned int logflags;
+        if (info->type == NF_LOG_TYPE_LOG)
+                logflags = info->u.log.logflags;
+        else
+                logflags = NF_LOG_MASK;
+        ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
+        if (ih == NULL) {
+                sb_add(m, "TRUNCATED");
+                return;
+        }
+        /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
+        sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
+        /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
+        sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
+               ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
+               (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
+               ih->hop_limit,
+               (ntohl(*(__be32 *)ih) & 0x000fffff));
+        fragment = 0;
+        ptr = ip6hoff + sizeof(struct ipv6hdr);
+        currenthdr = ih->nexthdr;
+        while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
+                struct ipv6_opt_hdr _hdr;
+                const struct ipv6_opt_hdr *hp;
+                hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+                if (hp == NULL) {
+                        sb_add(m, "TRUNCATED");
+                        return;
+                }
+                /* Max length: 48 "OPT (...) " */
+                if (logflags & IP6T_LOG_IPOPT)
+                        sb_add(m, "OPT ( ");
+                switch (currenthdr) {
+                case IPPROTO_FRAGMENT: {
+                        struct frag_hdr _fhdr;
+                        const struct frag_hdr *fh;
+                        sb_add(m, "FRAG:");
+                        fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
+                                                &_fhdr);
+                        if (fh == NULL) {
+                                sb_add(m, "TRUNCATED ");
+                                return;
+                        }
+                        /* Max length: 6 "65535 " */
+                        sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
+                        /* Max length: 11 "INCOMPLETE " */
+                        if (fh->frag_off & htons(0x0001))
+                                sb_add(m, "INCOMPLETE ");
+                        sb_add(m, "ID:%08x ", ntohl(fh->identification));
+                        if (ntohs(fh->frag_off) & 0xFFF8)
+                                fragment = 1;
+                        hdrlen = 8;
+                        break;
+                }
+                case IPPROTO_DSTOPTS:
+                case IPPROTO_ROUTING:
+                case IPPROTO_HOPOPTS:
+                        if (fragment) {
+                                if (logflags & IP6T_LOG_IPOPT)
+                                        sb_add(m, ")");
+                                return;
+                        }
+                        hdrlen = ipv6_optlen(hp);
+                        break;
+                /* Max Length */
+                case IPPROTO_AH:
+                        if (logflags & IP6T_LOG_IPOPT) {
+                                struct ip_auth_hdr _ahdr;
+                                const struct ip_auth_hdr *ah;
+                                /* Max length: 3 "AH " */
+                                sb_add(m, "AH ");
+                                if (fragment) {
+                                        sb_add(m, ")");
+                                        return;
+                                }
+                                ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
+                                                        &_ahdr);
+                                if (ah == NULL) {
+                                        /*
+                                         * Max length: 26 "INCOMPLETE [65535
+                                         *  bytes] )"
+                                         */
+                                        sb_add(m, "INCOMPLETE [%u bytes] )",
+                                               skb->len - ptr);
+                                        return;
+                                }
+                                /* Length: 15 "SPI=0xF1234567 */
+                                sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
+                        }
+                        hdrlen = (hp->hdrlen+2)<<2;
+                        break;
+                case IPPROTO_ESP:
+                        if (logflags & IP6T_LOG_IPOPT) {
+                                struct ip_esp_hdr _esph;
+                                const struct ip_esp_hdr *eh;
+                                /* Max length: 4 "ESP " */
+                                sb_add(m, "ESP ");
+                                if (fragment) {
+                                        sb_add(m, ")");
+                                        return;
+                                }
+                                /*
+                                 * Max length: 26 "INCOMPLETE [65535 bytes] )"
+                                 */
+                                eh = skb_header_pointer(skb, ptr, sizeof(_esph),
+                                                        &_esph);
+                                if (eh == NULL) {
+                                        sb_add(m, "INCOMPLETE [%u bytes] )",
+                                               skb->len - ptr);
+                                        return;
+                                }
+                                /* Length: 16 "SPI=0xF1234567 )" */
+                                sb_add(m, "SPI=0x%x )", ntohl(eh->spi) );
+                        }
+                        return;
+                default:
+                        /* Max length: 20 "Unknown Ext Hdr 255" */
+                        sb_add(m, "Unknown Ext Hdr %u", currenthdr);
+                        return;
+                }
+                if (logflags & IP6T_LOG_IPOPT)
+                        sb_add(m, ") ");
+                currenthdr = hp->nexthdr;
+                ptr += hdrlen;
+        }
+        switch (currenthdr) {
+        case IPPROTO_TCP: {
+                struct tcphdr _tcph;
+                const struct tcphdr *th;
+                /* Max length: 10 "PROTO=TCP " */
+                sb_add(m, "PROTO=TCP ");
+                if (fragment)
+                        break;
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
+                if (th == NULL) {
+                        sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
+                        return;
+                }
+                /* Max length: 20 "SPT=65535 DPT=65535 " */
+                sb_add(m, "SPT=%u DPT=%u ",
+                       ntohs(th->source), ntohs(th->dest));
+                /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+                if (logflags & IP6T_LOG_TCPSEQ)
+                        sb_add(m, "SEQ=%u ACK=%u ",
+                               ntohl(th->seq), ntohl(th->ack_seq));
+                /* Max length: 13 "WINDOW=65535 " */
+                sb_add(m, "WINDOW=%u ", ntohs(th->window));
+                /* Max length: 9 "RES=0x3C " */
+                sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+                /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+                if (th->cwr)
+                        sb_add(m, "CWR ");
+                if (th->ece)
+                        sb_add(m, "ECE ");
+                if (th->urg)
+                        sb_add(m, "URG ");
+                if (th->ack)
+                        sb_add(m, "ACK ");
+                if (th->psh)
+                        sb_add(m, "PSH ");
+                if (th->rst)
+                        sb_add(m, "RST ");
+                if (th->syn)
+                        sb_add(m, "SYN ");
+                if (th->fin)
+                        sb_add(m, "FIN ");
+                /* Max length: 11 "URGP=65535 " */
+                sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
+                if ((logflags & IP6T_LOG_TCPOPT) &&
+                    th->doff * 4 > sizeof(struct tcphdr)) {
+                        u_int8_t _opt[60 - sizeof(struct tcphdr)];
+                        const u_int8_t *op;
+                        unsigned int i;
+                        unsigned int optsize = th->doff * 4
+                                               - sizeof(struct tcphdr);
+                        op = skb_header_pointer(skb,
+                                                ptr + sizeof(struct tcphdr),
+                                                optsize, _opt);
+                        if (op == NULL) {
+                                sb_add(m, "OPT (TRUNCATED)");
+                                return;
+                        }
+                        /* Max length: 127 "OPT (" 15*4*2chars ") " */
+                        sb_add(m, "OPT (");
+                        for (i =0; i < optsize; i++)
+                                sb_add(m, "%02X", op[i]);
+                        sb_add(m, ") ");
+                }
+                break;
+        }
+        case IPPROTO_UDP:
+        case IPPROTO_UDPLITE: {
+                struct udphdr _udph;
+                const struct udphdr *uh;
+                if (currenthdr == IPPROTO_UDP)
+                        /* Max length: 10 "PROTO=UDP "     */
+                        sb_add(m, "PROTO=UDP " );
+                else    /* Max length: 14 "PROTO=UDPLITE " */
+                        sb_add(m, "PROTO=UDPLITE ");
+                if (fragment)
+                        break;
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
+                if (uh == NULL) {
+                        sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
+                        return;
+                }
+                /* Max length: 20 "SPT=65535 DPT=65535 " */
+                sb_add(m, "SPT=%u DPT=%u LEN=%u ",
+                       ntohs(uh->source), ntohs(uh->dest),
+                       ntohs(uh->len));
+                break;
+        }
+        case IPPROTO_ICMPV6: {
+                struct icmp6hdr _icmp6h;
+                const struct icmp6hdr *ic;
+                /* Max length: 13 "PROTO=ICMPv6 " */
+                sb_add(m, "PROTO=ICMPv6 ");
+                if (fragment)
+                        break;
+                /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+                ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
+                if (ic == NULL) {
+                        sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
+                        return;
+                }
+                /* Max length: 18 "TYPE=255 CODE=255 " */
+                sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
+                switch (ic->icmp6_type) {
+                case ICMPV6_ECHO_REQUEST:
+                case ICMPV6_ECHO_REPLY:
+                        /* Max length: 19 "ID=65535 SEQ=65535 " */
+                        sb_add(m, "ID=%u SEQ=%u ",
+                                ntohs(ic->icmp6_identifier),
+                                ntohs(ic->icmp6_sequence));
+                        break;
+                case ICMPV6_MGM_QUERY:
+                case ICMPV6_MGM_REPORT:
+                case ICMPV6_MGM_REDUCTION:
+                        break;
+                case ICMPV6_PARAMPROB:
+                        /* Max length: 17 "POINTER=ffffffff " */
+                        sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer));
+                        /* Fall through */
+                case ICMPV6_DEST_UNREACH:
+                case ICMPV6_PKT_TOOBIG:
+                case ICMPV6_TIME_EXCEED:
+                        /* Max length: 3+maxlen */
+                        if (recurse) {
+                                sb_add(m, "[");
+                                dump_packet(m, info, skb,
+                                            ptr + sizeof(_icmp6h), 0);
+                                sb_add(m, "] ");
+                        }
+                        /* Max length: 10 "MTU=65535 " */
+                        if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
+                                sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu));
+                }
+                break;
+        }
+        /* Max length: 10 "PROTO=255 " */
+        default:
+                sb_add(m, "PROTO=%u ", currenthdr);
+        }
+        /* Max length: 15 "UID=4294967295 " */
+        if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
+                read_lock_bh(&skb->sk->sk_callback_lock);
+                if (skb->sk->sk_socket && skb->sk->sk_socket->file)
+                        sb_add(m, "UID=%u GID=%u ",
+                                skb->sk->sk_socket->file->f_cred->fsuid,
+                                skb->sk->sk_socket->file->f_cred->fsgid);
+                read_unlock_bh(&skb->sk->sk_callback_lock);
+        }
+        /* Max length: 16 "MARK=0xFFFFFFFF " */
+        if (!recurse && skb->mark)
+                sb_add(m, "MARK=0x%x ", skb->mark);
+}
+static void dump_mac_header(struct sbuff *m,
+                            const struct nf_loginfo *info,
+                            const struct sk_buff *skb)
+{
+        struct net_device *dev = skb->dev;
+        unsigned int logflags = 0;
+        if (info->type == NF_LOG_TYPE_LOG)
+                logflags = info->u.log.logflags;
+        if (!(logflags & IP6T_LOG_MACDECODE))
+                goto fallback;
+        switch (dev->type) {
+        case ARPHRD_ETHER:
+                sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+                       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+                       ntohs(eth_hdr(skb)->h_proto));
+                return;
+        default:
+                break;
+        }
+fallback:
+        sb_add(m, "MAC=");
+        if (dev->hard_header_len &&
+            skb->mac_header != skb->network_header) {
+                const unsigned char *p = skb_mac_header(skb);
+                unsigned int len = dev->hard_header_len;
+                unsigned int i;
+                if (dev->type == ARPHRD_SIT &&
+                    (p -= ETH_HLEN) < skb->head)
+                        p = NULL;
+                if (p != NULL) {
+                        sb_add(m, "%02x", *p++);
+                        for (i = 1; i < len; i++)
+                                sb_add(m, ":%02x", *p++);
+                }
+                sb_add(m, " ");
+                if (dev->type == ARPHRD_SIT) {
+                        const struct iphdr *iph =
+                                (struct iphdr *)skb_mac_header(skb);
+                        sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
+                }
+        } else
+                sb_add(m, " ");
+}
+static struct nf_loginfo default_loginfo = {
+        .type   = NF_LOG_TYPE_LOG,
+        .u = {
+                .log = {
+                        .level    = 5,
+                        .logflags = NF_LOG_MASK,
+                },
+        },
+};
+static void
+ip6t_log_packet(u_int8_t pf,
+                unsigned int hooknum,
+                const struct sk_buff *skb,
+                const struct net_device *in,
+                const struct net_device *out,
+                const struct nf_loginfo *loginfo,
+                const char *prefix)
+{
+        struct sbuff *m = sb_open();
+        if (!loginfo)
+                loginfo = &default_loginfo;
+        sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
+               prefix,
+               in ? in->name : "",
+               out ? out->name : "");
+        if (in != NULL)
+                dump_mac_header(m, loginfo, skb);
+        dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
+        sb_close(m);
+}
+static unsigned int
+log_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+        const struct ip6t_log_info *loginfo = par->targinfo;
+        struct nf_loginfo li;
+        li.type = NF_LOG_TYPE_LOG;
+        li.u.log.level = loginfo->level;
+        li.u.log.logflags = loginfo->logflags;
+        ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out,
+                        &li, loginfo->prefix);
+        return XT_CONTINUE;
+}
+static int log_tg6_check(const struct xt_tgchk_param *par)
+{
+        const struct ip6t_log_info *loginfo = par->targinfo;
+        if (loginfo->level >= 8) {
+                pr_debug("level %u >= 8\n", loginfo->level);
+                return -EINVAL;
+        }
+        if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
+                pr_debug("prefix not null-terminated\n");
+                return -EINVAL;
+        }
+        return 0;
+}
+static struct xt_target log_tg6_reg __read_mostly = {
+        .name           = "LOG",
+        .family         = NFPROTO_IPV6,
+        .target         = log_tg6,
+        .targetsize     = sizeof(struct ip6t_log_info),
+        .checkentry     = log_tg6_check,
+        .me             = THIS_MODULE,
+};
+static struct nf_logger ip6t_logger __read_mostly = {
+        .name           = "ip6t_LOG",
+        .logfn          = &ip6t_log_packet,
+        .me             = THIS_MODULE,
+};
+static int __init log_tg6_init(void)
+{
+        int ret;
+        ret = xt_register_target(&log_tg6_reg);
+        if (ret < 0)
+                return ret;
+        nf_log_register(NFPROTO_IPV6, &ip6t_logger);
+        return 0;
+}
+static void __exit log_tg6_exit(void)
+{
+        nf_log_unregister(&ip6t_logger);
+        xt_unregister_target(&log_tg6_reg);
+}
+module_init(log_tg6_init);
+module_exit(log_tg6_exit);
diff --git a/net/mac80211/driver-trace.c b/net/mac80211/driver-trace.c
new file mode 100644
index 00000000000..8ed8711b1a6
--- /dev/null
+++ b/net/mac80211/driver-trace.c
@@ -0,0 +1,9 @@
+/* bug in tracepoint.h, it should include this */
+#include <linux/module.h>
+/* sparse isn't too happy with all macros... */
+#ifndef __CHECKER__
+#include "driver-ops.h"
+#define CREATE_TRACE_POINTS
+#include "driver-trace.h"
+#endif
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
new file mode 100644
index 00000000000..f47b00dc7af
--- /dev/null
+++ b/net/mac80211/driver-trace.h
@@ -0,0 +1,1492 @@
+#if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
+#define __MAC80211_DRIVER_TRACE
+#include <linux/tracepoint.h>
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#if !defined(CONFIG_MAC80211_DRIVER_API_TRACER) || defined(__CHECKER__)
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, ...) \
+static inline void trace_ ## name(proto) {}
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(...)
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(evt_class, name, proto, ...) \
+static inline void trace_ ## name(proto) {}
+#endif
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mac80211
+#define MAXNAME         32
+#define LOCAL_ENTRY     __array(char, wiphy_name, 32)
+#define LOCAL_ASSIGN    strlcpy(__entry->wiphy_name, wiphy_name(local->hw.wiphy), MAXNAME)
+#define LOCAL_PR_FMT    "%s"
+#define LOCAL_PR_ARG    __entry->wiphy_name
+#define STA_ENTRY       __array(char, sta_addr, ETH_ALEN)
+#define STA_ASSIGN      (sta ? memcpy(__entry->sta_addr, sta->addr, ETH_ALEN) : memset(__entry->sta_addr, 0, ETH_ALEN))
+#define STA_PR_FMT      " sta:%pM"
+#define STA_PR_ARG      __entry->sta_addr
+#define VIF_ENTRY       __field(enum nl80211_iftype, vif_type) __field(void *, sdata)   \
+                        __field(bool, p2p)                                              \
+                        __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
+#define VIF_ASSIGN      __entry->vif_type = sdata->vif.type; __entry->sdata = sdata;    \
+                        __entry->p2p = sdata->vif.p2p;                                  \
+                        __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
+#define VIF_PR_FMT      " vif:%s(%d%s)"
+#define VIF_PR_ARG      __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
+/*
+ * Tracing for driver callbacks.
+ */
+DECLARE_EVENT_CLASS(local_only_evt,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+        ),
+        TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
+);
+DECLARE_EVENT_CLASS(local_sdata_addr_evt,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(local, sdata),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                __array(char, addr, 6)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                memcpy(__entry->addr, sdata->vif.addr, 6);
+        ),
+        TP_printk(
+                LOCAL_PR_FMT  VIF_PR_FMT " addr:%pM",
+                LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr
+        )
+);
+DECLARE_EVENT_CLASS(local_u32_evt,
+        TP_PROTO(struct ieee80211_local *local, u32 value),
+        TP_ARGS(local, value),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u32, value)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->value = value;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " value:%d",
+                LOCAL_PR_ARG, __entry->value
+        )
+);
+DECLARE_EVENT_CLASS(local_sdata_evt,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(local, sdata),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT VIF_PR_FMT,
+                LOCAL_PR_ARG, VIF_PR_ARG
+        )
+);
+DEFINE_EVENT(local_only_evt, drv_return_void,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+TRACE_EVENT(drv_return_int,
+        TP_PROTO(struct ieee80211_local *local, int ret),
+        TP_ARGS(local, ret),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(int, ret)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->ret = ret;
+        ),
+        TP_printk(LOCAL_PR_FMT " - %d", LOCAL_PR_ARG, __entry->ret)
+);
+TRACE_EVENT(drv_return_bool,
+        TP_PROTO(struct ieee80211_local *local, bool ret),
+        TP_ARGS(local, ret),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(bool, ret)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->ret = ret;
+        ),
+        TP_printk(LOCAL_PR_FMT " - %s", LOCAL_PR_ARG, (__entry->ret) ?
+                  "true" : "false")
+);
+TRACE_EVENT(drv_return_u64,
+        TP_PROTO(struct ieee80211_local *local, u64 ret),
+        TP_ARGS(local, ret),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u64, ret)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->ret = ret;
+        ),
+        TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret)
+);
+DEFINE_EVENT(local_only_evt, drv_start,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+DEFINE_EVENT(local_only_evt, drv_suspend,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+DEFINE_EVENT(local_only_evt, drv_resume,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+DEFINE_EVENT(local_only_evt, drv_stop,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+DEFINE_EVENT(local_sdata_addr_evt, drv_add_interface,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(local, sdata)
+);
+TRACE_EVENT(drv_change_interface,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 enum nl80211_iftype type, bool p2p),
+        TP_ARGS(local, sdata, type, p2p),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                __field(u32, new_type)
+                __field(bool, new_p2p)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                __entry->new_type = type;
+                __entry->new_p2p = p2p;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT  VIF_PR_FMT " new type:%d%s",
+                LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type,
+                __entry->new_p2p ? "/p2p" : ""
+        )
+);
+DEFINE_EVENT(local_sdata_addr_evt, drv_remove_interface,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(local, sdata)
+);
+TRACE_EVENT(drv_config,
+        TP_PROTO(struct ieee80211_local *local,
+                 u32 changed),
+        TP_ARGS(local, changed),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u32, changed)
+                __field(u32, flags)
+                __field(int, power_level)
+                __field(int, dynamic_ps_timeout)
+                __field(int, max_sleep_period)
+                __field(u16, listen_interval)
+                __field(u8, long_frame_max_tx_count)
+                __field(u8, short_frame_max_tx_count)
+                __field(int, center_freq)
+                __field(int, channel_type)
+                __field(int, smps)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->changed = changed;
+                __entry->flags = local->hw.conf.flags;
+                __entry->power_level = local->hw.conf.power_level;
+                __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout;
+                __entry->max_sleep_period = local->hw.conf.max_sleep_period;
+                __entry->listen_interval = local->hw.conf.listen_interval;
+                __entry->long_frame_max_tx_count = local->hw.conf.long_frame_max_tx_count;
+                __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count;
+                __entry->center_freq = local->hw.conf.channel->center_freq;
+                __entry->channel_type = local->hw.conf.channel_type;
+                __entry->smps = local->hw.conf.smps_mode;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " ch:%#x freq:%d",
+                LOCAL_PR_ARG, __entry->changed, __entry->center_freq
+        )
+);
+TRACE_EVENT(drv_bss_info_changed,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 struct ieee80211_bss_conf *info,
+                 u32 changed),
+        TP_ARGS(local, sdata, info, changed),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                __field(bool, assoc)
+                __field(u16, aid)
+                __field(bool, cts)
+                __field(bool, shortpre)
+                __field(bool, shortslot)
+                __field(u8, dtimper)
+                __field(u16, bcnint)
+                __field(u16, assoc_cap)
+                __field(u64, timestamp)
+                __field(u32, basic_rates)
+                __field(u32, changed)
+                __field(bool, enable_beacon)
+                __field(u16, ht_operation_mode)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                __entry->changed = changed;
+                __entry->aid = info->aid;
+                __entry->assoc = info->assoc;
+                __entry->shortpre = info->use_short_preamble;
+                __entry->cts = info->use_cts_prot;
+                __entry->shortslot = info->use_short_slot;
+                __entry->dtimper = info->dtim_period;
+                __entry->bcnint = info->beacon_int;
+                __entry->assoc_cap = info->assoc_capability;
+                __entry->timestamp = info->timestamp;
+                __entry->basic_rates = info->basic_rates;
+                __entry->enable_beacon = info->enable_beacon;
+                __entry->ht_operation_mode = info->ht_operation_mode;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT  VIF_PR_FMT " changed:%#x",
+                LOCAL_PR_ARG, VIF_PR_ARG, __entry->changed
+        )
+);
+DECLARE_EVENT_CLASS(tx_sync_evt,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 const u8 *bssid,
+                 enum ieee80211_tx_sync_type type),
+        TP_ARGS(local, sdata, bssid, type),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                __array(char, bssid, ETH_ALEN)
+                __field(u32, sync_type)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                memcpy(__entry->bssid, bssid, ETH_ALEN);
+                __entry->sync_type = type;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT  VIF_PR_FMT " bssid:%pM type:%d",
+                LOCAL_PR_ARG, VIF_PR_ARG, __entry->bssid, __entry->sync_type
+        )
+);
+DEFINE_EVENT(tx_sync_evt, drv_tx_sync,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 const u8 *bssid,
+                 enum ieee80211_tx_sync_type type),
+        TP_ARGS(local, sdata, bssid, type)
+);
+DEFINE_EVENT(tx_sync_evt, drv_finish_tx_sync,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 const u8 *bssid,
+                 enum ieee80211_tx_sync_type type),
+        TP_ARGS(local, sdata, bssid, type)
+);
+TRACE_EVENT(drv_prepare_multicast,
+        TP_PROTO(struct ieee80211_local *local, int mc_count),
+        TP_ARGS(local, mc_count),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(int, mc_count)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->mc_count = mc_count;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " prepare mc (%d)",
+                LOCAL_PR_ARG, __entry->mc_count
+        )
+);
+TRACE_EVENT(drv_configure_filter,
+        TP_PROTO(struct ieee80211_local *local,
+                 unsigned int changed_flags,
+                 unsigned int *total_flags,
+                 u64 multicast),
+        TP_ARGS(local, changed_flags, total_flags, multicast),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(unsigned int, changed)
+                __field(unsigned int, total)
+                __field(u64, multicast)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->changed = changed_flags;
+                __entry->total = *total_flags;
+                __entry->multicast = multicast;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " changed:%#x total:%#x",
+                LOCAL_PR_ARG, __entry->changed, __entry->total
+        )
+);
+TRACE_EVENT(drv_set_tim,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sta *sta, bool set),
+        TP_ARGS(local, sta, set),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                STA_ENTRY
+                __field(bool, set)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                STA_ASSIGN;
+                __entry->set = set;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT STA_PR_FMT " set:%d",
+                LOCAL_PR_ARG, STA_PR_FMT, __entry->set
+        )
+);
+TRACE_EVENT(drv_set_key,
+        TP_PROTO(struct ieee80211_local *local,
+                 enum set_key_cmd cmd, struct ieee80211_sub_if_data *sdata,
+                 struct ieee80211_sta *sta,
+                 struct ieee80211_key_conf *key),
+        TP_ARGS(local, cmd, sdata, sta, key),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                STA_ENTRY
+                __field(u32, cipher)
+                __field(u8, hw_key_idx)
+                __field(u8, flags)
+                __field(s8, keyidx)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                STA_ASSIGN;
+                __entry->cipher = key->cipher;
+                __entry->flags = key->flags;
+                __entry->keyidx = key->keyidx;
+                __entry->hw_key_idx = key->hw_key_idx;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT,
+                LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
+        )
+);
+TRACE_EVENT(drv_update_tkip_key,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 struct ieee80211_key_conf *conf,
+                 struct ieee80211_sta *sta, u32 iv32),
+        TP_ARGS(local, sdata, conf, sta, iv32),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                STA_ENTRY
+                __field(u32, iv32)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                STA_ASSIGN;
+                __entry->iv32 = iv32;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " iv32:%#x",
+                LOCAL_PR_ARG,VIF_PR_ARG,STA_PR_ARG, __entry->iv32
+        )
+);
+DEFINE_EVENT(local_sdata_evt, drv_hw_scan,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(local, sdata)
+);
+DEFINE_EVENT(local_sdata_evt, drv_cancel_hw_scan,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(local, sdata)
+);
+DEFINE_EVENT(local_sdata_evt, drv_sched_scan_start,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(local, sdata)
+);
+DEFINE_EVENT(local_sdata_evt, drv_sched_scan_stop,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(local, sdata)
+);
+DEFINE_EVENT(local_only_evt, drv_sw_scan_start,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+DEFINE_EVENT(local_only_evt, drv_sw_scan_complete,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+TRACE_EVENT(drv_get_stats,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_low_level_stats *stats,
+                 int ret),
+        TP_ARGS(local, stats, ret),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(int, ret)
+                __field(unsigned int, ackfail)
+                __field(unsigned int, rtsfail)
+                __field(unsigned int, fcserr)
+                __field(unsigned int, rtssucc)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->ret = ret;
+                __entry->ackfail = stats->dot11ACKFailureCount;
+                __entry->rtsfail = stats->dot11RTSFailureCount;
+                __entry->fcserr = stats->dot11FCSErrorCount;
+                __entry->rtssucc = stats->dot11RTSSuccessCount;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " ret:%d",
+                LOCAL_PR_ARG, __entry->ret
+        )
+);
+TRACE_EVENT(drv_get_tkip_seq,
+        TP_PROTO(struct ieee80211_local *local,
+                 u8 hw_key_idx, u32 *iv32, u16 *iv16),
+        TP_ARGS(local, hw_key_idx, iv32, iv16),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u8, hw_key_idx)
+                __field(u32, iv32)
+                __field(u16, iv16)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->hw_key_idx = hw_key_idx;
+                __entry->iv32 = *iv32;
+                __entry->iv16 = *iv16;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT, LOCAL_PR_ARG
+        )
+);
+DEFINE_EVENT(local_u32_evt, drv_set_frag_threshold,
+        TP_PROTO(struct ieee80211_local *local, u32 value),
+        TP_ARGS(local, value)
+);
+DEFINE_EVENT(local_u32_evt, drv_set_rts_threshold,
+        TP_PROTO(struct ieee80211_local *local, u32 value),
+        TP_ARGS(local, value)
+);
+TRACE_EVENT(drv_set_coverage_class,
+        TP_PROTO(struct ieee80211_local *local, u8 value),
+        TP_ARGS(local, value),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u8, value)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->value = value;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " value:%d",
+                LOCAL_PR_ARG, __entry->value
+        )
+);
+TRACE_EVENT(drv_sta_notify,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 enum sta_notify_cmd cmd,
+                 struct ieee80211_sta *sta),
+        TP_ARGS(local, sdata, cmd, sta),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                STA_ENTRY
+                __field(u32, cmd)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                STA_ASSIGN;
+                __entry->cmd = cmd;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT " cmd:%d",
+                LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->cmd
+        )
+);
+TRACE_EVENT(drv_sta_add,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 struct ieee80211_sta *sta),
+        TP_ARGS(local, sdata, sta),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                STA_ENTRY
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                STA_ASSIGN;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT,
+                LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
+        )
+);
+TRACE_EVENT(drv_sta_remove,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 struct ieee80211_sta *sta),
+        TP_ARGS(local, sdata, sta),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                STA_ENTRY
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                STA_ASSIGN;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT  VIF_PR_FMT  STA_PR_FMT,
+                LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
+        )
+);
+TRACE_EVENT(drv_conf_tx,
+        TP_PROTO(struct ieee80211_local *local, u16 queue,
+                 const struct ieee80211_tx_queue_params *params),
+        TP_ARGS(local, queue, params),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u16, queue)
+                __field(u16, txop)
+                __field(u16, cw_min)
+                __field(u16, cw_max)
+                __field(u8, aifs)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->queue = queue;
+                __entry->txop = params->txop;
+                __entry->cw_max = params->cw_max;
+                __entry->cw_min = params->cw_min;
+                __entry->aifs = params->aifs;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " queue:%d",
+                LOCAL_PR_ARG, __entry->queue
+        )
+);
+DEFINE_EVENT(local_only_evt, drv_get_tsf,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+TRACE_EVENT(drv_set_tsf,
+        TP_PROTO(struct ieee80211_local *local, u64 tsf),
+        TP_ARGS(local, tsf),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u64, tsf)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->tsf = tsf;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " tsf:%llu",
+                LOCAL_PR_ARG, (unsigned long long)__entry->tsf
+        )
+);
+DEFINE_EVENT(local_only_evt, drv_reset_tsf,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+DEFINE_EVENT(local_only_evt, drv_tx_last_beacon,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+TRACE_EVENT(drv_ampdu_action,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 enum ieee80211_ampdu_mlme_action action,
+                 struct ieee80211_sta *sta, u16 tid,
+                 u16 *ssn, u8 buf_size),
+        TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                STA_ENTRY
+                __field(u32, action)
+                __field(u16, tid)
+                __field(u16, ssn)
+                __field(u8, buf_size)
+                VIF_ENTRY
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                STA_ASSIGN;
+                __entry->action = action;
+                __entry->tid = tid;
+                __entry->ssn = ssn ? *ssn : 0;
+                __entry->buf_size = buf_size;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d",
+                LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
+                __entry->tid, __entry->buf_size
+        )
+);
+TRACE_EVENT(drv_get_survey,
+        TP_PROTO(struct ieee80211_local *local, int idx,
+                 struct survey_info *survey),
+        TP_ARGS(local, idx, survey),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(int, idx)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->idx = idx;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " idx:%d",
+                LOCAL_PR_ARG, __entry->idx
+        )
+);
+TRACE_EVENT(drv_flush,
+        TP_PROTO(struct ieee80211_local *local, bool drop),
+        TP_ARGS(local, drop),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(bool, drop)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->drop = drop;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " drop:%d",
+                LOCAL_PR_ARG, __entry->drop
+        )
+);
+TRACE_EVENT(drv_channel_switch,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_channel_switch *ch_switch),
+        TP_ARGS(local, ch_switch),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u64, timestamp)
+                __field(bool, block_tx)
+                __field(u16, freq)
+                __field(u8, count)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->timestamp = ch_switch->timestamp;
+                __entry->block_tx = ch_switch->block_tx;
+                __entry->freq = ch_switch->channel->center_freq;
+                __entry->count = ch_switch->count;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " new freq:%u count:%d",
+                LOCAL_PR_ARG, __entry->freq, __entry->count
+        )
+);
+TRACE_EVENT(drv_set_antenna,
+        TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
+        TP_ARGS(local, tx_ant, rx_ant, ret),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u32, tx_ant)
+                __field(u32, rx_ant)
+                __field(int, ret)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->tx_ant = tx_ant;
+                __entry->rx_ant = rx_ant;
+                __entry->ret = ret;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d",
+                LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret
+        )
+);
+TRACE_EVENT(drv_get_antenna,
+        TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
+        TP_ARGS(local, tx_ant, rx_ant, ret),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u32, tx_ant)
+                __field(u32, rx_ant)
+                __field(int, ret)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->tx_ant = tx_ant;
+                __entry->rx_ant = rx_ant;
+                __entry->ret = ret;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d",
+                LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret
+        )
+);
+TRACE_EVENT(drv_remain_on_channel,
+        TP_PROTO(struct ieee80211_local *local, struct ieee80211_channel *chan,
+                 enum nl80211_channel_type chantype, unsigned int duration),
+        TP_ARGS(local, chan, chantype, duration),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(int, center_freq)
+                __field(int, channel_type)
+                __field(unsigned int, duration)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->center_freq = chan->center_freq;
+                __entry->channel_type = chantype;
+                __entry->duration = duration;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " freq:%dMHz duration:%dms",
+                LOCAL_PR_ARG, __entry->center_freq, __entry->duration
+        )
+);
+DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+TRACE_EVENT(drv_offchannel_tx,
+        TP_PROTO(struct ieee80211_local *local, struct sk_buff *skb,
+                 struct ieee80211_channel *chan,
+                 enum nl80211_channel_type channel_type,
+                 unsigned int wait),
+        TP_ARGS(local, skb, chan, channel_type, wait),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(int, center_freq)
+                __field(int, channel_type)
+                __field(unsigned int, wait)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->center_freq = chan->center_freq;
+                __entry->channel_type = channel_type;
+                __entry->wait = wait;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " freq:%dMHz, wait:%dms",
+                LOCAL_PR_ARG, __entry->center_freq, __entry->wait
+        )
+);
+TRACE_EVENT(drv_set_ringparam,
+        TP_PROTO(struct ieee80211_local *local, u32 tx, u32 rx),
+        TP_ARGS(local, tx, rx),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u32, tx)
+                __field(u32, rx)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->tx = tx;
+                __entry->rx = rx;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " tx:%d rx %d",
+                LOCAL_PR_ARG, __entry->tx, __entry->rx
+        )
+);
+TRACE_EVENT(drv_get_ringparam,
+        TP_PROTO(struct ieee80211_local *local, u32 *tx, u32 *tx_max,
+                 u32 *rx, u32 *rx_max),
+        TP_ARGS(local, tx, tx_max, rx, rx_max),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u32, tx)
+                __field(u32, tx_max)
+                __field(u32, rx)
+                __field(u32, rx_max)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->tx = *tx;
+                __entry->tx_max = *tx_max;
+                __entry->rx = *rx;
+                __entry->rx_max = *rx_max;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " tx:%d tx_max %d rx %d rx_max %d",
+                LOCAL_PR_ARG,
+                __entry->tx, __entry->tx_max, __entry->rx, __entry->rx_max
+        )
+);
+DEFINE_EVENT(local_only_evt, drv_tx_frames_pending,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+TRACE_EVENT(drv_set_bitrate_mask,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 const struct cfg80211_bitrate_mask *mask),
+        TP_ARGS(local, sdata, mask),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                __field(u32, legacy_2g)
+                __field(u32, legacy_5g)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                __entry->legacy_2g = mask->control[IEEE80211_BAND_2GHZ].legacy;
+                __entry->legacy_5g = mask->control[IEEE80211_BAND_5GHZ].legacy;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT  VIF_PR_FMT " 2G Mask:0x%x 5G Mask:0x%x",
+                LOCAL_PR_ARG, VIF_PR_ARG, __entry->legacy_2g, __entry->legacy_5g
+        )
+);
+TRACE_EVENT(drv_set_rekey_data,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sub_if_data *sdata,
+                 struct cfg80211_gtk_rekey_data *data),
+        TP_ARGS(local, sdata, data),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                VIF_ENTRY
+                __array(u8, kek, NL80211_KEK_LEN)
+                __array(u8, kck, NL80211_KCK_LEN)
+                __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                VIF_ASSIGN;
+                memcpy(__entry->kek, data->kek, NL80211_KEK_LEN);
+                memcpy(__entry->kck, data->kck, NL80211_KCK_LEN);
+                memcpy(__entry->replay_ctr, data->replay_ctr,
+                       NL80211_REPLAY_CTR_LEN);
+        ),
+        TP_printk(LOCAL_PR_FMT VIF_PR_FMT,
+                  LOCAL_PR_ARG, VIF_PR_ARG)
+);
+TRACE_EVENT(drv_rssi_callback,
+        TP_PROTO(struct ieee80211_local *local,
+                 enum ieee80211_rssi_event rssi_event),
+        TP_ARGS(local, rssi_event),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u32, rssi_event)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->rssi_event = rssi_event;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " rssi_event:%d",
+                LOCAL_PR_ARG, __entry->rssi_event
+        )
+);
+/*
+ * Tracing for API calls that drivers call.
+ */
+TRACE_EVENT(api_start_tx_ba_session,
+        TP_PROTO(struct ieee80211_sta *sta, u16 tid),
+        TP_ARGS(sta, tid),
+        TP_STRUCT__entry(
+                STA_ENTRY
+                __field(u16, tid)
+        ),
+        TP_fast_assign(
+                STA_ASSIGN;
+                __entry->tid = tid;
+        ),
+        TP_printk(
+                STA_PR_FMT " tid:%d",
+                STA_PR_ARG, __entry->tid
+        )
+);
+TRACE_EVENT(api_start_tx_ba_cb,
+        TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
+        TP_ARGS(sdata, ra, tid),
+        TP_STRUCT__entry(
+                VIF_ENTRY
+                __array(u8, ra, ETH_ALEN)
+                __field(u16, tid)
+        ),
+        TP_fast_assign(
+                VIF_ASSIGN;
+                memcpy(__entry->ra, ra, ETH_ALEN);
+                __entry->tid = tid;
+        ),
+        TP_printk(
+                VIF_PR_FMT " ra:%pM tid:%d",
+                VIF_PR_ARG, __entry->ra, __entry->tid
+        )
+);
+TRACE_EVENT(api_stop_tx_ba_session,
+        TP_PROTO(struct ieee80211_sta *sta, u16 tid),
+        TP_ARGS(sta, tid),
+        TP_STRUCT__entry(
+                STA_ENTRY
+                __field(u16, tid)
+        ),
+        TP_fast_assign(
+                STA_ASSIGN;
+                __entry->tid = tid;
+        ),
+        TP_printk(
+                STA_PR_FMT " tid:%d",
+                STA_PR_ARG, __entry->tid
+        )
+);
+TRACE_EVENT(api_stop_tx_ba_cb,
+        TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
+        TP_ARGS(sdata, ra, tid),
+        TP_STRUCT__entry(
+                VIF_ENTRY
+                __array(u8, ra, ETH_ALEN)
+                __field(u16, tid)
+        ),
+        TP_fast_assign(
+                VIF_ASSIGN;
+                memcpy(__entry->ra, ra, ETH_ALEN);
+                __entry->tid = tid;
+        ),
+        TP_printk(
+                VIF_PR_FMT " ra:%pM tid:%d",
+                VIF_PR_ARG, __entry->ra, __entry->tid
+        )
+);
+DEFINE_EVENT(local_only_evt, api_restart_hw,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+TRACE_EVENT(api_beacon_loss,
+        TP_PROTO(struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(sdata),
+        TP_STRUCT__entry(
+                VIF_ENTRY
+        ),
+        TP_fast_assign(
+                VIF_ASSIGN;
+        ),
+        TP_printk(
+                VIF_PR_FMT,
+                VIF_PR_ARG
+        )
+);
+TRACE_EVENT(api_connection_loss,
+        TP_PROTO(struct ieee80211_sub_if_data *sdata),
+        TP_ARGS(sdata),
+        TP_STRUCT__entry(
+                VIF_ENTRY
+        ),
+        TP_fast_assign(
+                VIF_ASSIGN;
+        ),
+        TP_printk(
+                VIF_PR_FMT,
+                VIF_PR_ARG
+        )
+);
+TRACE_EVENT(api_cqm_rssi_notify,
+        TP_PROTO(struct ieee80211_sub_if_data *sdata,
+                 enum nl80211_cqm_rssi_threshold_event rssi_event),
+        TP_ARGS(sdata, rssi_event),
+        TP_STRUCT__entry(
+                VIF_ENTRY
+                __field(u32, rssi_event)
+        ),
+        TP_fast_assign(
+                VIF_ASSIGN;
+                __entry->rssi_event = rssi_event;
+        ),
+        TP_printk(
+                VIF_PR_FMT " event:%d",
+                VIF_PR_ARG, __entry->rssi_event
+        )
+);
+TRACE_EVENT(api_scan_completed,
+        TP_PROTO(struct ieee80211_local *local, bool aborted),
+        TP_ARGS(local, aborted),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(bool, aborted)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->aborted = aborted;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " aborted:%d",
+                LOCAL_PR_ARG, __entry->aborted
+        )
+);
+TRACE_EVENT(api_sched_scan_results,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT, LOCAL_PR_ARG
+        )
+);
+TRACE_EVENT(api_sched_scan_stopped,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT, LOCAL_PR_ARG
+        )
+);
+TRACE_EVENT(api_sta_block_awake,
+        TP_PROTO(struct ieee80211_local *local,
+                 struct ieee80211_sta *sta, bool block),
+        TP_ARGS(local, sta, block),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                STA_ENTRY
+                __field(bool, block)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                STA_ASSIGN;
+                __entry->block = block;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT STA_PR_FMT " block:%d",
+                LOCAL_PR_ARG, STA_PR_FMT, __entry->block
+        )
+);
+TRACE_EVENT(api_chswitch_done,
+        TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success),
+        TP_ARGS(sdata, success),
+        TP_STRUCT__entry(
+                VIF_ENTRY
+                __field(bool, success)
+        ),
+        TP_fast_assign(
+                VIF_ASSIGN;
+                __entry->success = success;
+        ),
+        TP_printk(
+                VIF_PR_FMT " success=%d",
+                VIF_PR_ARG, __entry->success
+        )
+);
+DEFINE_EVENT(local_only_evt, api_ready_on_channel,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired,
+        TP_PROTO(struct ieee80211_local *local),
+        TP_ARGS(local)
+);
+TRACE_EVENT(api_gtk_rekey_notify,
+        TP_PROTO(struct ieee80211_sub_if_data *sdata,
+                 const u8 *bssid, const u8 *replay_ctr),
+        TP_ARGS(sdata, bssid, replay_ctr),
+        TP_STRUCT__entry(
+                VIF_ENTRY
+                __array(u8, bssid, ETH_ALEN)
+                __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN)
+        ),
+        TP_fast_assign(
+                VIF_ASSIGN;
+                memcpy(__entry->bssid, bssid, ETH_ALEN);
+                memcpy(__entry->replay_ctr, replay_ctr, NL80211_REPLAY_CTR_LEN);
+        ),
+        TP_printk(VIF_PR_FMT, VIF_PR_ARG)
+);
+TRACE_EVENT(api_enable_rssi_reports,
+        TP_PROTO(struct ieee80211_sub_if_data *sdata,
+                 int rssi_min_thold, int rssi_max_thold),
+        TP_ARGS(sdata, rssi_min_thold, rssi_max_thold),
+        TP_STRUCT__entry(
+                VIF_ENTRY
+                __field(int, rssi_min_thold)
+                __field(int, rssi_max_thold)
+        ),
+        TP_fast_assign(
+                VIF_ASSIGN;
+                __entry->rssi_min_thold = rssi_min_thold;
+                __entry->rssi_max_thold = rssi_max_thold;
+        ),
+        TP_printk(
+                VIF_PR_FMT " rssi_min_thold =%d, rssi_max_thold = %d",
+                VIF_PR_ARG, __entry->rssi_min_thold, __entry->rssi_max_thold
+        )
+);
+/*
+ * Tracing for internal functions
+ * (which may also be called in response to driver calls)
+ */
+TRACE_EVENT(wake_queue,
+        TP_PROTO(struct ieee80211_local *local, u16 queue,
+                 enum queue_stop_reason reason),
+        TP_ARGS(local, queue, reason),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u16, queue)
+                __field(u32, reason)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->queue = queue;
+                __entry->reason = reason;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " queue:%d, reason:%d",
+                LOCAL_PR_ARG, __entry->queue, __entry->reason
+        )
+);
+TRACE_EVENT(stop_queue,
+        TP_PROTO(struct ieee80211_local *local, u16 queue,
+                 enum queue_stop_reason reason),
+        TP_ARGS(local, queue, reason),
+        TP_STRUCT__entry(
+                LOCAL_ENTRY
+                __field(u16, queue)
+                __field(u32, reason)
+        ),
+        TP_fast_assign(
+                LOCAL_ASSIGN;
+                __entry->queue = queue;
+                __entry->reason = reason;
+        ),
+        TP_printk(
+                LOCAL_PR_FMT " queue:%d, reason:%d",
+                LOCAL_PR_ARG, __entry->queue, __entry->reason
+        )
+);
+#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE driver-trace
+#include <trace/define_trace.h>
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
new file mode 100644
index 00000000000..7737f204d3f
--- /dev/null
+++ b/net/mac80211/work.c
@@ -0,0 +1,1302 @@
+/*
+ * mac80211 work implementation
+ *
+ * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
+ * Copyright 2004, Instant802 Networks, Inc.
+ * Copyright 2005, Devicescape Software, Inc.
+ * Copyright 2006-2007  Jiri Benc <jbenc@suse.cz>
+ * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/if_ether.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/etherdevice.h>
+#include <linux/crc32.h>
+#include <linux/slab.h>
+#include <net/mac80211.h>
+#include <asm/unaligned.h>
+#include "ieee80211_i.h"
+#include "rate.h"
+#include "driver-ops.h"
+#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
+#define IEEE80211_AUTH_MAX_TRIES 3
+#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
+#define IEEE80211_ASSOC_MAX_TRIES 3
+enum work_action {
+        WORK_ACT_MISMATCH,
+        WORK_ACT_NONE,
+        WORK_ACT_TIMEOUT,
+        WORK_ACT_DONE,
+};
+/* utils */
+static inline void ASSERT_WORK_MTX(struct ieee80211_local *local)
+{
+        lockdep_assert_held(&local->mtx);
+}
+/*
+ * We can have multiple work items (and connection probing)
+ * scheduling this timer, but we need to take care to only
+ * reschedule it when it should fire _earlier_ than it was
+ * asked for before, or if it's not pending right now. This
+ * function ensures that. Note that it then is required to
+ * run this function for all timeouts after the first one
+ * has happened -- the work that runs from this timer will
+ * do that.
+ */
+static void run_again(struct ieee80211_local *local,
+                      unsigned long timeout)
+{
+        ASSERT_WORK_MTX(local);
+        if (!timer_pending(&local->work_timer) ||
+            time_before(timeout, local->work_timer.expires))
+                mod_timer(&local->work_timer, timeout);
+}
+void free_work(struct ieee80211_work *wk)
+{
+        kfree_rcu(wk, rcu_head);
+}
+static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
+                                      struct ieee80211_supported_band *sband,
+                                      u32 *rates)
+{
+        int i, j, count;
+        *rates = 0;
+        count = 0;
+        for (i = 0; i < supp_rates_len; i++) {
+                int rate = (supp_rates[i] & 0x7F) * 5;
+                for (j = 0; j < sband->n_bitrates; j++)
+                        if (sband->bitrates[j].bitrate == rate) {
+                                *rates |= BIT(j);
+                                count++;
+                                break;
+                        }
+        }
+        return count;
+}
+/* frame sending functions */
+static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie,
+                                struct ieee80211_supported_band *sband,
+                                struct ieee80211_channel *channel,
+                                enum ieee80211_smps_mode smps)
+{
+        struct ieee80211_ht_info *ht_info;
+        u8 *pos;
+        u32 flags = channel->flags;
+        u16 cap = sband->ht_cap.cap;
+        __le16 tmp;
+        if (!sband->ht_cap.ht_supported)
+                return;
+        if (!ht_info_ie)
+                return;
+        if (ht_info_ie[1] < sizeof(struct ieee80211_ht_info))
+                return;
+        ht_info = (struct ieee80211_ht_info *)(ht_info_ie + 2);
+        /* determine capability flags */
+        switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+        case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+                if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
+                        cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+                        cap &= ~IEEE80211_HT_CAP_SGI_40;
+                }
+                break;
+        case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+                if (flags & IEEE80211_CHAN_NO_HT40MINUS) {
+                        cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+                        cap &= ~IEEE80211_HT_CAP_SGI_40;
+                }
+                break;
+        }
+        /* set SM PS mode properly */
+        cap &= ~IEEE80211_HT_CAP_SM_PS;
+        switch (smps) {
+        case IEEE80211_SMPS_AUTOMATIC:
+        case IEEE80211_SMPS_NUM_MODES:
+                WARN_ON(1);
+        case IEEE80211_SMPS_OFF:
+                cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
+                        IEEE80211_HT_CAP_SM_PS_SHIFT;
+                break;
+        case IEEE80211_SMPS_STATIC:
+                cap |= WLAN_HT_CAP_SM_PS_STATIC <<
+                        IEEE80211_HT_CAP_SM_PS_SHIFT;
+                break;
+        case IEEE80211_SMPS_DYNAMIC:
+                cap |= WLAN_HT_CAP_SM_PS_DYNAMIC <<
+                        IEEE80211_HT_CAP_SM_PS_SHIFT;
+                break;
+        }
+        /* reserve and fill IE */
+        pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
+        *pos++ = WLAN_EID_HT_CAPABILITY;
+        *pos++ = sizeof(struct ieee80211_ht_cap);
+        memset(pos, 0, sizeof(struct ieee80211_ht_cap));
+        /* capability flags */
+        tmp = cpu_to_le16(cap);
+        memcpy(pos, &tmp, sizeof(u16));
+        pos += sizeof(u16);
+        /* AMPDU parameters */
+        *pos++ = sband->ht_cap.ampdu_factor |
+                 (sband->ht_cap.ampdu_density <<
+                        IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT);
+        /* MCS set */
+        memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
+        pos += sizeof(sband->ht_cap.mcs);
+        /* extended capabilities */
+        pos += sizeof(__le16);
+        /* BF capabilities */
+        pos += sizeof(__le32);
+        /* antenna selection */
+        pos += sizeof(u8);
+}
+static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
+                                 struct ieee80211_work *wk)
+{
+        struct ieee80211_local *local = sdata->local;
+        struct sk_buff *skb;
+        struct ieee80211_mgmt *mgmt;
+        u8 *pos, qos_info;
+        size_t offset = 0, noffset;
+        int i, count, rates_len, supp_rates_len;
+        u16 capab;
+        struct ieee80211_supported_band *sband;
+        u32 rates = 0;
+        sband = local->hw.wiphy->bands[wk->chan->band];
+        if (wk->assoc.supp_rates_len) {
+                /*
+                 * Get all rates supported by the device and the AP as
+                 * some APs don't like getting a superset of their rates
+                 * in the association request (e.g. D-Link DAP 1353 in
+                 * b-only mode)...
+                 */
+                rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
+                                                       wk->assoc.supp_rates_len,
+                                                       sband, &rates);
+        } else {
+                /*
+                 * In case AP not provide any supported rates information
+                 * before association, we send information element(s) with
+                 * all rates that we support.
+                 */
+                rates = ~0;
+                rates_len = sband->n_bitrates;
+        }
+        skb = alloc_skb(local->hw.extra_tx_headroom +
+                        sizeof(*mgmt) + /* bit too much but doesn't matter */
+                        2 + wk->assoc.ssid_len + /* SSID */
+                        4 + rates_len + /* (extended) rates */
+                        4 + /* power capability */
+                        2 + 2 * sband->n_channels + /* supported channels */
+                        2 + sizeof(struct ieee80211_ht_cap) + /* HT */
+                        wk->ie_len + /* extra IEs */
+                        9, /* WMM */
+                        GFP_KERNEL);
+        if (!skb) {
+                printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
+                       "frame\n", sdata->name);
+                return;
+        }
+        skb_reserve(skb, local->hw.extra_tx_headroom);
+        capab = WLAN_CAPABILITY_ESS;
+        if (sband->band == IEEE80211_BAND_2GHZ) {
+                if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
+                        capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
+                if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
+                        capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
+        }
+        if (wk->assoc.capability & WLAN_CAPABILITY_PRIVACY)
+                capab |= WLAN_CAPABILITY_PRIVACY;
+        if ((wk->assoc.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
+            (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
+                capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
+        mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
+        memset(mgmt, 0, 24);
+        memcpy(mgmt->da, wk->filter_ta, ETH_ALEN);
+        memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+        memcpy(mgmt->bssid, wk->filter_ta, ETH_ALEN);
+        if (!is_zero_ether_addr(wk->assoc.prev_bssid)) {
+                skb_put(skb, 10);
+                mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+                                                  IEEE80211_STYPE_REASSOC_REQ);
+                mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
+                mgmt->u.reassoc_req.listen_interval =
+                                cpu_to_le16(local->hw.conf.listen_interval);
+                memcpy(mgmt->u.reassoc_req.current_ap, wk->assoc.prev_bssid,
+                       ETH_ALEN);
+        } else {
+                skb_put(skb, 4);
+                mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+                                                  IEEE80211_STYPE_ASSOC_REQ);
+                mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
+                mgmt->u.assoc_req.listen_interval =
+                                cpu_to_le16(local->hw.conf.listen_interval);
+        }
+        /* SSID */
+        pos = skb_put(skb, 2 + wk->assoc.ssid_len);
+        *pos++ = WLAN_EID_SSID;
+        *pos++ = wk->assoc.ssid_len;
+        memcpy(pos, wk->assoc.ssid, wk->assoc.ssid_len);
+        /* add all rates which were marked to be used above */
+        supp_rates_len = rates_len;
+        if (supp_rates_len > 8)
+                supp_rates_len = 8;
+        pos = skb_put(skb, supp_rates_len + 2);
+        *pos++ = WLAN_EID_SUPP_RATES;
+        *pos++ = supp_rates_len;
+        count = 0;
+        for (i = 0; i < sband->n_bitrates; i++) {
+                if (BIT(i) & rates) {
+                        int rate = sband->bitrates[i].bitrate;
+                        *pos++ = (u8) (rate / 5);
+                        if (++count == 8)
+                                break;
+                }
+        }
+        if (rates_len > count) {
+                pos = skb_put(skb, rates_len - count + 2);
+                *pos++ = WLAN_EID_EXT_SUPP_RATES;
+                *pos++ = rates_len - count;
+                for (i++; i < sband->n_bitrates; i++) {
+                        if (BIT(i) & rates) {
+                                int rate = sband->bitrates[i].bitrate;
+                                *pos++ = (u8) (rate / 5);
+                        }
+                }
+        }
+        if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) {
+                /* 1. power capabilities */
+                pos = skb_put(skb, 4);
+                *pos++ = WLAN_EID_PWR_CAPABILITY;
+                *pos++ = 2;
+                *pos++ = 0; /* min tx power */
+                *pos++ = wk->chan->max_power; /* max tx power */
+                /* 2. supported channels */
+                /* TODO: get this in reg domain format */
+                pos = skb_put(skb, 2 * sband->n_channels + 2);
+                *pos++ = WLAN_EID_SUPPORTED_CHANNELS;
+                *pos++ = 2 * sband->n_channels;
+                for (i = 0; i < sband->n_channels; i++) {
+                        *pos++ = ieee80211_frequency_to_channel(
+                                        sband->channels[i].center_freq);
+                        *pos++ = 1; /* one channel in the subband*/
+                }
+        }
+        /* if present, add any custom IEs that go before HT */
+        if (wk->ie_len && wk->ie) {
+                static const u8 before_ht[] = {
+                        WLAN_EID_SSID,
+                        WLAN_EID_SUPP_RATES,
+                        WLAN_EID_EXT_SUPP_RATES,
+                        WLAN_EID_PWR_CAPABILITY,
+                        WLAN_EID_SUPPORTED_CHANNELS,
+                        WLAN_EID_RSN,
+                        WLAN_EID_QOS_CAPA,
+                        WLAN_EID_RRM_ENABLED_CAPABILITIES,
+                        WLAN_EID_MOBILITY_DOMAIN,
+                        WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+                };
+                noffset = ieee80211_ie_split(wk->ie, wk->ie_len,
+                                             before_ht, ARRAY_SIZE(before_ht),
+                                             offset);
+                pos = skb_put(skb, noffset - offset);
+                memcpy(pos, wk->ie + offset, noffset - offset);
+                offset = noffset;
+        }
+        if (wk->assoc.use_11n && wk->assoc.wmm_used &&
+            local->hw.queues >= 4)
+                ieee80211_add_ht_ie(skb, wk->assoc.ht_information_ie,
+                                    sband, wk->chan, wk->assoc.smps);
+        /* if present, add any custom non-vendor IEs that go after HT */
+        if (wk->ie_len && wk->ie) {
+                noffset = ieee80211_ie_split_vendor(wk->ie, wk->ie_len,
+                                                    offset);
+                pos = skb_put(skb, noffset - offset);
+                memcpy(pos, wk->ie + offset, noffset - offset);
+                offset = noffset;
+        }
+        if (wk->assoc.wmm_used && local->hw.queues >= 4) {
+                if (wk->assoc.uapsd_used) {
+                        qos_info = local->uapsd_queues;
+                        qos_info |= (local->uapsd_max_sp_len <<
+                                     IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT);
+                } else {
+                        qos_info = 0;
+                }
+                pos = skb_put(skb, 9);
+                *pos++ = WLAN_EID_VENDOR_SPECIFIC;
+                *pos++ = 7; /* len */
+                *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
+                *pos++ = 0x50;
+                *pos++ = 0xf2;
+                *pos++ = 2; /* WME */
+                *pos++ = 0; /* WME info */
+                *pos++ = 1; /* WME ver */
+                *pos++ = qos_info;
+        }
+        /* add any remaining custom (i.e. vendor specific here) IEs */
+        if (wk->ie_len && wk->ie) {
+                noffset = wk->ie_len;
+                pos = skb_put(skb, noffset - offset);
+                memcpy(pos, wk->ie + offset, noffset - offset);
+        }
+        IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+        ieee80211_tx_skb(sdata, skb);
+}
+static void ieee80211_remove_auth_bss(struct ieee80211_local *local,
+                                      struct ieee80211_work *wk)
+{
+        struct cfg80211_bss *cbss;
+        u16 capa_val = WLAN_CAPABILITY_ESS;
+        if (wk->probe_auth.privacy)
+                capa_val |= WLAN_CAPABILITY_PRIVACY;
+        cbss = cfg80211_get_bss(local->hw.wiphy, wk->chan, wk->filter_ta,
+                                wk->probe_auth.ssid, wk->probe_auth.ssid_len,
+                                WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_PRIVACY,
+                                capa_val);
+        if (!cbss)
+                return;
+        cfg80211_unlink_bss(local->hw.wiphy, cbss);
+        cfg80211_put_bss(cbss);
+}
+static enum work_action __must_check
+ieee80211_direct_probe(struct ieee80211_work *wk)
+{
+        struct ieee80211_sub_if_data *sdata = wk->sdata;
+        struct ieee80211_local *local = sdata->local;
+        if (!wk->probe_auth.synced) {
+                int ret = drv_tx_sync(local, sdata, wk->filter_ta,
+                                      IEEE80211_TX_SYNC_AUTH);
+                if (ret)
+                        return WORK_ACT_TIMEOUT;
+        }
+        wk->probe_auth.synced = true;
+        wk->probe_auth.tries++;
+        if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) {
+                printk(KERN_DEBUG "%s: direct probe to %pM timed out\n",
+                       sdata->name, wk->filter_ta);
+                /*
+                 * Most likely AP is not in the range so remove the
+                 * bss struct for that AP.
+                 */
+                ieee80211_remove_auth_bss(local, wk);
+                return WORK_ACT_TIMEOUT;
+        }
+        printk(KERN_DEBUG "%s: direct probe to %pM (try %d/%i)\n",
+               sdata->name, wk->filter_ta, wk->probe_auth.tries,
+               IEEE80211_AUTH_MAX_TRIES);
+        /*
+         * Direct probe is sent to broadcast address as some APs
+         * will not answer to direct packet in unassociated state.
+         */
+        ieee80211_send_probe_req(sdata, NULL, wk->probe_auth.ssid,
+                                 wk->probe_auth.ssid_len, NULL, 0,
+                                 (u32) -1, true);
+        wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
+        run_again(local, wk->timeout);
+        return WORK_ACT_NONE;
+}
+static enum work_action __must_check
+ieee80211_authenticate(struct ieee80211_work *wk)
+{
+        struct ieee80211_sub_if_data *sdata = wk->sdata;
+        struct ieee80211_local *local = sdata->local;
+        if (!wk->probe_auth.synced) {
+                int ret = drv_tx_sync(local, sdata, wk->filter_ta,
+                                      IEEE80211_TX_SYNC_AUTH);
+                if (ret)
+                        return WORK_ACT_TIMEOUT;
+        }
+        wk->probe_auth.synced = true;
+        wk->probe_auth.tries++;
+        if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) {
+                printk(KERN_DEBUG "%s: authentication with %pM"
+                       " timed out\n", sdata->name, wk->filter_ta);
+                /*
+                 * Most likely AP is not in the range so remove the
+                 * bss struct for that AP.
+                 */
+                ieee80211_remove_auth_bss(local, wk);
+                return WORK_ACT_TIMEOUT;
+        }
+        printk(KERN_DEBUG "%s: authenticate with %pM (try %d)\n",
+               sdata->name, wk->filter_ta, wk->probe_auth.tries);
+        ieee80211_send_auth(sdata, 1, wk->probe_auth.algorithm, wk->ie,
+                            wk->ie_len, wk->filter_ta, NULL, 0, 0);
+        wk->probe_auth.transaction = 2;
+        wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
+        run_again(local, wk->timeout);
+        return WORK_ACT_NONE;
+}
+static enum work_action __must_check
+ieee80211_associate(struct ieee80211_work *wk)
+{
+        struct ieee80211_sub_if_data *sdata = wk->sdata;
+        struct ieee80211_local *local = sdata->local;
+        if (!wk->assoc.synced) {
+                int ret = drv_tx_sync(local, sdata, wk->filter_ta,
+                                      IEEE80211_TX_SYNC_ASSOC);
+                if (ret)
+                        return WORK_ACT_TIMEOUT;
+        }
+        wk->assoc.synced = true;
+        wk->assoc.tries++;
+        if (wk->assoc.tries > IEEE80211_ASSOC_MAX_TRIES) {
+                printk(KERN_DEBUG "%s: association with %pM"
+                       " timed out\n",
+                       sdata->name, wk->filter_ta);
+                /*
+                 * Most likely AP is not in the range so remove the
+                 * bss struct for that AP.
+                 */
+                if (wk->assoc.bss)
+                        cfg80211_unlink_bss(local->hw.wiphy, wk->assoc.bss);
+                return WORK_ACT_TIMEOUT;
+        }
+        printk(KERN_DEBUG "%s: associate with %pM (try %d)\n",
+               sdata->name, wk->filter_ta, wk->assoc.tries);
+        ieee80211_send_assoc(sdata, wk);
+        wk->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
+        run_again(local, wk->timeout);
+        return WORK_ACT_NONE;
+}
+static enum work_action __must_check
+ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
+{
+        /*
+         * First time we run, do nothing -- the generic code will
+         * have switched to the right channel etc.
+         */
+        if (!wk->started) {
+                wk->timeout = jiffies + msecs_to_jiffies(wk->remain.duration);
+                cfg80211_ready_on_channel(wk->sdata->dev, (unsigned long) wk,
+                                          wk->chan, wk->chan_type,
+                                          wk->remain.duration, GFP_KERNEL);
+                return WORK_ACT_NONE;
+        }
+        return WORK_ACT_TIMEOUT;
+}
+static enum work_action __must_check
+ieee80211_offchannel_tx(struct ieee80211_work *wk)
+{
+        if (!wk->started) {
+                wk->timeout = jiffies + msecs_to_jiffies(wk->offchan_tx.wait);
+                /*
+                 * After this, offchan_tx.frame remains but now is no
+                 * longer a valid pointer -- we still need it as the
+                 * cookie for canceling this work/status matching.
+                 */
+                ieee80211_tx_skb(wk->sdata, wk->offchan_tx.frame);
+                return WORK_ACT_NONE;
+        }
+        return WORK_ACT_TIMEOUT;
+}
+static enum work_action __must_check
+ieee80211_assoc_beacon_wait(struct ieee80211_work *wk)
+{
+        if (wk->started)
+                return WORK_ACT_TIMEOUT;
+        /*
+         * Wait up to one beacon interval ...
+         * should this be more if we miss one?
+         */
+        printk(KERN_DEBUG "%s: waiting for beacon from %pM\n",
+               wk->sdata->name, wk->filter_ta);
+        wk->timeout = TU_TO_EXP_TIME(wk->assoc.bss->beacon_interval);
+        return WORK_ACT_NONE;
+}
+static void ieee80211_auth_challenge(struct ieee80211_work *wk,
+                                     struct ieee80211_mgmt *mgmt,
+                                     size_t len)
+{
+        struct ieee80211_sub_if_data *sdata = wk->sdata;
+        u8 *pos;
+        struct ieee802_11_elems elems;
+        pos = mgmt->u.auth.variable;
+        ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+        if (!elems.challenge)
+                return;
+        ieee80211_send_auth(sdata, 3, wk->probe_auth.algorithm,
+                            elems.challenge - 2, elems.challenge_len + 2,
+                            wk->filter_ta, wk->probe_auth.key,
+                            wk->probe_auth.key_len, wk->probe_auth.key_idx);
+        wk->probe_auth.transaction = 4;
+}
+static enum work_action __must_check
+ieee80211_rx_mgmt_auth(struct ieee80211_work *wk,
+                       struct ieee80211_mgmt *mgmt, size_t len)
+{
+        u16 auth_alg, auth_transaction, status_code;
+        if (wk->type != IEEE80211_WORK_AUTH)
+                return WORK_ACT_MISMATCH;
+        if (len < 24 + 6)
+                return WORK_ACT_NONE;
+        auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
+        auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
+        status_code = le16_to_cpu(mgmt->u.auth.status_code);
+        if (auth_alg != wk->probe_auth.algorithm ||
+            auth_transaction != wk->probe_auth.transaction)
+                return WORK_ACT_NONE;
+        if (status_code != WLAN_STATUS_SUCCESS) {
+                printk(KERN_DEBUG "%s: %pM denied authentication (status %d)\n",
+                       wk->sdata->name, mgmt->sa, status_code);
+                return WORK_ACT_DONE;
+        }
+        switch (wk->probe_auth.algorithm) {
+        case WLAN_AUTH_OPEN:
+        case WLAN_AUTH_LEAP:
+        case WLAN_AUTH_FT:
+                break;
+        case WLAN_AUTH_SHARED_KEY:
+                if (wk->probe_auth.transaction != 4) {
+                        ieee80211_auth_challenge(wk, mgmt, len);
+                        /* need another frame */
+                        return WORK_ACT_NONE;
+                }
+                break;
+        default:
+                WARN_ON(1);
+                return WORK_ACT_NONE;
+        }
+        printk(KERN_DEBUG "%s: authenticated\n", wk->sdata->name);
+        return WORK_ACT_DONE;
+}
+static enum work_action __must_check
+ieee80211_rx_mgmt_assoc_resp(struct ieee80211_work *wk,
+                             struct ieee80211_mgmt *mgmt, size_t len,
+                             bool reassoc)
+{
+        struct ieee80211_sub_if_data *sdata = wk->sdata;
+        struct ieee80211_local *local = sdata->local;
+        u16 capab_info, status_code, aid;
+        struct ieee802_11_elems elems;
+        u8 *pos;
+        if (wk->type != IEEE80211_WORK_ASSOC)
+                return WORK_ACT_MISMATCH;
+        /*
+         * AssocResp and ReassocResp have identical structure, so process both
+         * of them in this function.
+         */
+        if (len < 24 + 6)
+                return WORK_ACT_NONE;
+        capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
+        status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
+        aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
+        printk(KERN_DEBUG "%s: RX %sssocResp from %pM (capab=0x%x "
+               "status=%d aid=%d)\n",
+               sdata->name, reassoc ? "Rea" : "A", mgmt->sa,
+               capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
+        pos = mgmt->u.assoc_resp.variable;
+        ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
+        if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
+            elems.timeout_int && elems.timeout_int_len == 5 &&
+            elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) {
+                u32 tu, ms;
+                tu = get_unaligned_le32(elems.timeout_int + 1);
+                ms = tu * 1024 / 1000;
+                printk(KERN_DEBUG "%s: %pM rejected association temporarily; "
+                       "comeback duration %u TU (%u ms)\n",
+                       sdata->name, mgmt->sa, tu, ms);
+                wk->timeout = jiffies + msecs_to_jiffies(ms);
+                if (ms > IEEE80211_ASSOC_TIMEOUT)
+                        run_again(local, wk->timeout);
+                return WORK_ACT_NONE;
+        }
+        if (status_code != WLAN_STATUS_SUCCESS)
+                printk(KERN_DEBUG "%s: %pM denied association (code=%d)\n",
+                       sdata->name, mgmt->sa, status_code);
+        else
+                printk(KERN_DEBUG "%s: associated\n", sdata->name);
+        return WORK_ACT_DONE;
+}
+static enum work_action __must_check
+ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk,
+                             struct ieee80211_mgmt *mgmt, size_t len,
+                             struct ieee80211_rx_status *rx_status)
+{
+        struct ieee80211_sub_if_data *sdata = wk->sdata;
+        struct ieee80211_local *local = sdata->local;
+        size_t baselen;
+        ASSERT_WORK_MTX(local);
+        if (wk->type != IEEE80211_WORK_DIRECT_PROBE)
+                return WORK_ACT_MISMATCH;
+        if (len < 24 + 12)
+                return WORK_ACT_NONE;
+        baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
+        if (baselen > len)
+                return WORK_ACT_NONE;
+        printk(KERN_DEBUG "%s: direct probe responded\n", sdata->name);
+        return WORK_ACT_DONE;
+}
+static enum work_action __must_check
+ieee80211_rx_mgmt_beacon(struct ieee80211_work *wk,
+                         struct ieee80211_mgmt *mgmt, size_t len)
+{
+        struct ieee80211_sub_if_data *sdata = wk->sdata;
+        struct ieee80211_local *local = sdata->local;
+        ASSERT_WORK_MTX(local);
+        if (wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
+                return WORK_ACT_MISMATCH;
+        if (len < 24 + 12)
+                return WORK_ACT_NONE;
+        printk(KERN_DEBUG "%s: beacon received\n", sdata->name);
+        return WORK_ACT_DONE;
+}
+static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
+                                          struct sk_buff *skb)
+{
+        struct ieee80211_rx_status *rx_status;
+        struct ieee80211_mgmt *mgmt;
+        struct ieee80211_work *wk;
+        enum work_action rma = WORK_ACT_NONE;
+        u16 fc;
+        rx_status = (struct ieee80211_rx_status *) skb->cb;
+        mgmt = (struct ieee80211_mgmt *) skb->data;
+        fc = le16_to_cpu(mgmt->frame_control);
+        mutex_lock(&local->mtx);
+        list_for_each_entry(wk, &local->work_list, list) {
+                const u8 *bssid = NULL;
+                switch (wk->type) {
+                case IEEE80211_WORK_DIRECT_PROBE:
+                case IEEE80211_WORK_AUTH:
+                case IEEE80211_WORK_ASSOC:
+                case IEEE80211_WORK_ASSOC_BEACON_WAIT:
+                        bssid = wk->filter_ta;
+                        break;
+                default:
+                        continue;
+                }
+                /*
+                 * Before queuing, we already verified mgmt->sa,
+                 * so this is needed just for matching.
+                 */
+                if (compare_ether_addr(bssid, mgmt->bssid))
+                        continue;
+                switch (fc & IEEE80211_FCTL_STYPE) {
+                case IEEE80211_STYPE_BEACON:
+                        rma = ieee80211_rx_mgmt_beacon(wk, mgmt, skb->len);
+                        break;
+                case IEEE80211_STYPE_PROBE_RESP:
+                        rma = ieee80211_rx_mgmt_probe_resp(wk, mgmt, skb->len,
+                                                           rx_status);
+                        break;
+                case IEEE80211_STYPE_AUTH:
+                        rma = ieee80211_rx_mgmt_auth(wk, mgmt, skb->len);
+                        break;
+                case IEEE80211_STYPE_ASSOC_RESP:
+                        rma = ieee80211_rx_mgmt_assoc_resp(wk, mgmt,
+                                                           skb->len, false);
+                        break;
+                case IEEE80211_STYPE_REASSOC_RESP:
+                        rma = ieee80211_rx_mgmt_assoc_resp(wk, mgmt,
+                                                           skb->len, true);
+                        break;
+                default:
+                        WARN_ON(1);
+                        rma = WORK_ACT_NONE;
+                }
+                /*
+                 * We've either received an unexpected frame, or we have
+                 * multiple work items and need to match the frame to the
+                 * right one.
+                 */
+                if (rma == WORK_ACT_MISMATCH)
+                        continue;
+                /*
+                 * We've processed this frame for that work, so it can't
+                 * belong to another work struct.
+                 * NB: this is also required for correctness for 'rma'!
+                 */
+                break;
+        }
+        switch (rma) {
+        case WORK_ACT_MISMATCH:
+                /* ignore this unmatched frame */
+                break;
+        case WORK_ACT_NONE:
+                break;
+        case WORK_ACT_DONE:
+                list_del_rcu(&wk->list);
+                break;
+        default:
+                WARN(1, "unexpected: %d", rma);
+        }
+        mutex_unlock(&local->mtx);
+        if (rma != WORK_ACT_DONE)
+                goto out;
+        switch (wk->done(wk, skb)) {
+        case WORK_DONE_DESTROY:
+                free_work(wk);
+                break;
+        case WORK_DONE_REQUEUE:
+                synchronize_rcu();
+                wk->started = false; /* restart */
+                mutex_lock(&local->mtx);
+                list_add_tail(&wk->list, &local->work_list);
+                mutex_unlock(&local->mtx);
+        }
+ out:
+        kfree_skb(skb);
+}
+static bool ieee80211_work_ct_coexists(enum nl80211_channel_type wk_ct,
+                                       enum nl80211_channel_type oper_ct)
+{
+        switch (wk_ct) {
+        case NL80211_CHAN_NO_HT:
+                return true;
+        case NL80211_CHAN_HT20:
+                if (oper_ct != NL80211_CHAN_NO_HT)
+                        return true;
+                return false;
+        case NL80211_CHAN_HT40MINUS:
+        case NL80211_CHAN_HT40PLUS:
+                return (wk_ct == oper_ct);
+        }
+        WARN_ON(1); /* shouldn't get here */
+        return false;
+}
+static enum nl80211_channel_type
+ieee80211_calc_ct(enum nl80211_channel_type wk_ct,
+                  enum nl80211_channel_type oper_ct)
+{
+        switch (wk_ct) {
+        case NL80211_CHAN_NO_HT:
+                return oper_ct;
+        case NL80211_CHAN_HT20:
+                if (oper_ct != NL80211_CHAN_NO_HT)
+                        return oper_ct;
+                return wk_ct;
+        case NL80211_CHAN_HT40MINUS:
+        case NL80211_CHAN_HT40PLUS:
+                return wk_ct;
+        }
+        WARN_ON(1); /* shouldn't get here */
+        return wk_ct;
+}
+static void ieee80211_work_timer(unsigned long data)
+{
+        struct ieee80211_local *local = (void *) data;
+        if (local->quiescing)
+                return;
+        ieee80211_queue_work(&local->hw, &local->work_work);
+}
+static void ieee80211_work_work(struct work_struct *work)
+{
+        struct ieee80211_local *local =
+                container_of(work, struct ieee80211_local, work_work);
+        struct sk_buff *skb;
+        struct ieee80211_work *wk, *tmp;
+        LIST_HEAD(free_work);
+        enum work_action rma;
+        bool remain_off_channel = false;
+        if (local->scanning)
+                return;
+        /*
+         * ieee80211_queue_work() should have picked up most cases,
+         * here we'll pick the rest.
+         */
+        if (WARN(local->suspended, "work scheduled while going to suspend\n"))
+                return;
+        /* first process frames to avoid timing out while a frame is pending */
+        while ((skb = skb_dequeue(&local->work_skb_queue)))
+                ieee80211_work_rx_queued_mgmt(local, skb);
+        mutex_lock(&local->mtx);
+        ieee80211_recalc_idle(local);
+        list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
+                bool started = wk->started;
+                /* mark work as started if it's on the current off-channel */
+                if (!started && local->tmp_channel &&
+                    wk->chan == local->tmp_channel &&
+                    wk->chan_type == local->tmp_channel_type) {
+                        started = true;
+                        wk->timeout = jiffies;
+                }
+                if (!started && !local->tmp_channel) {
+                        bool on_oper_chan;
+                        bool tmp_chan_changed = false;
+                        bool on_oper_chan2;
+                        enum nl80211_channel_type wk_ct;
+                        on_oper_chan = ieee80211_cfg_on_oper_channel(local);
+                        /* Work with existing channel type if possible. */
+                        wk_ct = wk->chan_type;
+                        if (wk->chan == local->hw.conf.channel)
+                                wk_ct = ieee80211_calc_ct(wk->chan_type,
+                                                local->hw.conf.channel_type);
+                        if (local->tmp_channel)
+                                if ((local->tmp_channel != wk->chan) ||
+                                    (local->tmp_channel_type != wk_ct))
+                                        tmp_chan_changed = true;
+                        local->tmp_channel = wk->chan;
+                        local->tmp_channel_type = wk_ct;
+                        /*
+                         * Leave the station vifs in awake mode if they
+                         * happen to be on the same channel as
+                         * the requested channel.
+                         */
+                        on_oper_chan2 = ieee80211_cfg_on_oper_channel(local);
+                        if (on_oper_chan != on_oper_chan2) {
+                                if (on_oper_chan2) {
+                                        /* going off oper channel, PS too */
+                                        ieee80211_offchannel_stop_vifs(local,
+                                                                       true);
+                                        ieee80211_hw_config(local, 0);
+                                } else {
+                                        /* going on channel, but leave PS
+                                         * off-channel. */
+                                        ieee80211_hw_config(local, 0);
+                                        ieee80211_offchannel_return(local,
+                                                                    true,
+                                                                    false);
+                                }
+                        } else if (tmp_chan_changed)
+                                /* Still off-channel, but on some other
+                                 * channel, so update hardware.
+                                 * PS should already be off-channel.
+                                 */
+                                ieee80211_hw_config(local, 0);
+                        started = true;
+                        wk->timeout = jiffies;
+                }
+                /* don't try to work with items that aren't started */
+                if (!started)
+                        continue;
+                if (time_is_after_jiffies(wk->timeout)) {
+                        /*
+                         * This work item isn't supposed to be worked on
+                         * right now, but take care to adjust the timer
+                         * properly.
+                         */
+                        run_again(local, wk->timeout);
+                        continue;
+                }
+                switch (wk->type) {
+                default:
+                        WARN_ON(1);
+                        /* nothing */
+                        rma = WORK_ACT_NONE;
+                        break;
+                case IEEE80211_WORK_ABORT:
+                        rma = WORK_ACT_TIMEOUT;
+                        break;
+                case IEEE80211_WORK_DIRECT_PROBE:
+                        rma = ieee80211_direct_probe(wk);
+                        break;
+                case IEEE80211_WORK_AUTH:
+                        rma = ieee80211_authenticate(wk);
+                        break;
+                case IEEE80211_WORK_ASSOC:
+                        rma = ieee80211_associate(wk);
+                        break;
+                case IEEE80211_WORK_REMAIN_ON_CHANNEL:
+                        rma = ieee80211_remain_on_channel_timeout(wk);
+                        break;
+                case IEEE80211_WORK_OFFCHANNEL_TX:
+                        rma = ieee80211_offchannel_tx(wk);
+                        break;
+                case IEEE80211_WORK_ASSOC_BEACON_WAIT:
+                        rma = ieee80211_assoc_beacon_wait(wk);
+                        break;
+                }
+                wk->started = started;
+                switch (rma) {
+                case WORK_ACT_NONE:
+                        /* might have changed the timeout */
+                        run_again(local, wk->timeout);
+                        break;
+                case WORK_ACT_TIMEOUT:
+                        list_del_rcu(&wk->list);
+                        synchronize_rcu();
+                        list_add(&wk->list, &free_work);
+                        break;
+                default:
+                        WARN(1, "unexpected: %d", rma);
+                }
+        }
+        list_for_each_entry(wk, &local->work_list, list) {
+                if (!wk->started)
+                        continue;
+                if (wk->chan != local->tmp_channel)
+                        continue;
+                if (!ieee80211_work_ct_coexists(wk->chan_type,
+                                                local->tmp_channel_type))
+                        continue;
+                remain_off_channel = true;
+        }
+        if (!remain_off_channel && local->tmp_channel) {
+                local->tmp_channel = NULL;
+                /* If tmp_channel wasn't operating channel, then
+                 * we need to go back on-channel.
+                 * NOTE:  If we can ever be here while scannning,
+                 * or if the hw_config() channel config logic changes,
+                 * then we may need to do a more thorough check to see if
+                 * we still need to do a hardware config.  Currently,
+                 * we cannot be here while scanning, however.
+                 */
+                if (!ieee80211_cfg_on_oper_channel(local))
+                        ieee80211_hw_config(local, 0);
+                /* At the least, we need to disable offchannel_ps,
+                 * so just go ahead and run the entire offchannel
+                 * return logic here.  We *could* skip enabling
+                 * beaconing if we were already on-oper-channel
+                 * as a future optimization.
+                 */
+                ieee80211_offchannel_return(local, true, true);
+                /* give connection some time to breathe */
+                run_again(local, jiffies + HZ/2);
+        }
+        if (list_empty(&local->work_list) && local->scan_req &&
+            !local->scanning)
+                ieee80211_queue_delayed_work(&local->hw,
+                                             &local->scan_work,
+                                             round_jiffies_relative(0));
+        ieee80211_recalc_idle(local);
+        mutex_unlock(&local->mtx);
+        list_for_each_entry_safe(wk, tmp, &free_work, list) {
+                wk->done(wk, NULL);
+                list_del(&wk->list);
+                kfree(wk);
+        }
+}
+void ieee80211_add_work(struct ieee80211_work *wk)
+{
+        struct ieee80211_local *local;
+        if (WARN_ON(!wk->chan))
+                return;
+        if (WARN_ON(!wk->sdata))
+                return;
+        if (WARN_ON(!wk->done))
+                return;
+        if (WARN_ON(!ieee80211_sdata_running(wk->sdata)))
+                return;
+        wk->started = false;
+        local = wk->sdata->local;
+        mutex_lock(&local->mtx);
+        list_add_tail(&wk->list, &local->work_list);
+        mutex_unlock(&local->mtx);
+        ieee80211_queue_work(&local->hw, &local->work_work);
+}
+void ieee80211_work_init(struct ieee80211_local *local)
+{
+        INIT_LIST_HEAD(&local->work_list);
+        setup_timer(&local->work_timer, ieee80211_work_timer,
+                    (unsigned long)local);
+        INIT_WORK(&local->work_work, ieee80211_work_work);
+        skb_queue_head_init(&local->work_skb_queue);
+}
+void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
+{
+        struct ieee80211_local *local = sdata->local;
+        struct ieee80211_work *wk;
+        bool cleanup = false;
+        mutex_lock(&local->mtx);
+        list_for_each_entry(wk, &local->work_list, list) {
+                if (wk->sdata != sdata)
+                        continue;
+                cleanup = true;
+                wk->type = IEEE80211_WORK_ABORT;
+                wk->started = true;
+                wk->timeout = jiffies;
+        }
+        mutex_unlock(&local->mtx);
+        /* run cleanups etc. */
+        if (cleanup)
+                ieee80211_work_work(&local->work_work);
+        mutex_lock(&local->mtx);
+        list_for_each_entry(wk, &local->work_list, list) {
+                if (wk->sdata != sdata)
+                        continue;
+                WARN_ON(1);
+                break;
+        }
+        mutex_unlock(&local->mtx);
+}
+ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
+                                           struct sk_buff *skb)
+{
+        struct ieee80211_local *local = sdata->local;
+        struct ieee80211_mgmt *mgmt;
+        struct ieee80211_work *wk;
+        u16 fc;
+        if (skb->len < 24)
+                return RX_DROP_MONITOR;
+        mgmt = (struct ieee80211_mgmt *) skb->data;
+        fc = le16_to_cpu(mgmt->frame_control);
+        list_for_each_entry_rcu(wk, &local->work_list, list) {
+                if (sdata != wk->sdata)
+                        continue;
+                if (compare_ether_addr(wk->filter_ta, mgmt->sa))
+                        continue;
+                if (compare_ether_addr(wk->filter_ta, mgmt->bssid))
+                        continue;
+                switch (fc & IEEE80211_FCTL_STYPE) {
+                case IEEE80211_STYPE_AUTH:
+                case IEEE80211_STYPE_PROBE_RESP:
+                case IEEE80211_STYPE_ASSOC_RESP:
+                case IEEE80211_STYPE_REASSOC_RESP:
+                case IEEE80211_STYPE_BEACON:
+                        skb_queue_tail(&local->work_skb_queue, skb);
+                        ieee80211_queue_work(&local->hw, &local->work_work);
+                        return RX_QUEUED;
+                }
+        }
+        return RX_CONTINUE;
+}
+static enum work_done_result ieee80211_remain_done(struct ieee80211_work *wk,
+                                                   struct sk_buff *skb)
+{
+        /*
+         * We are done serving the remain-on-channel command.
+         */
+        cfg80211_remain_on_channel_expired(wk->sdata->dev, (unsigned long) wk,
+                                           wk->chan, wk->chan_type,
+                                           GFP_KERNEL);
+        return WORK_DONE_DESTROY;
+}
+int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
+                                   struct ieee80211_channel *chan,
+                                   enum nl80211_channel_type channel_type,
+                                   unsigned int duration, u64 *cookie)
+{
+        struct ieee80211_work *wk;
+        wk = kzalloc(sizeof(*wk), GFP_KERNEL);
+        if (!wk)
+                return -ENOMEM;
+        wk->type = IEEE80211_WORK_REMAIN_ON_CHANNEL;
+        wk->chan = chan;
+        wk->chan_type = channel_type;
+        wk->sdata = sdata;
+        wk->done = ieee80211_remain_done;
+        wk->remain.duration = duration;
+        *cookie = (unsigned long) wk;
+        ieee80211_add_work(wk);
+        return 0;
+}
+int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
+                                          u64 cookie)
+{
+        struct ieee80211_local *local = sdata->local;
+        struct ieee80211_work *wk, *tmp;
+        bool found = false;
+        mutex_lock(&local->mtx);
+        list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
+                if ((unsigned long) wk == cookie) {
+                        wk->timeout = jiffies;
+                        found = true;
+                        break;
+                }
+        }
+        mutex_unlock(&local->mtx);
+        if (!found)
+                return -ENOENT;
+        ieee80211_queue_work(&local->hw, &local->work_work);
+        return 0;
+}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
new file mode 100644
index 00000000000..a80b0cb03f1
--- /dev/null
+++ b/net/netfilter/nfnetlink_queue.c
@@ -0,0 +1,1028 @@
+/*
+ * This is a module which is used for queueing packets and communicating with
+ * userspace via nfnetlink.
+ *
+ * (C) 2005 by Harald Welte <laforge@netfilter.org>
+ * (C) 2007 by Patrick McHardy <kaber@trash.net>
+ *
+ * Based on the old ipv4-only ip_queue.c:
+ * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/proc_fs.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/netfilter/nf_queue.h>
+#include <linux/atomic.h>
+#ifdef CONFIG_BRIDGE_NETFILTER
+#include "../bridge/br_private.h"
+#endif
+#define NFQNL_QMAX_DEFAULT 1024
+struct nfqnl_instance {
+        struct hlist_node hlist;                /* global list of queues */
+        struct rcu_head rcu;
+        int peer_pid;
+        unsigned int queue_maxlen;
+        unsigned int copy_range;
+        unsigned int queue_dropped;
+        unsigned int queue_user_dropped;
+        u_int16_t queue_num;                    /* number of this queue */
+        u_int8_t copy_mode;
+/*
+ * Following fields are dirtied for each queued packet,
+ * keep them in same cache line if possible.
+ */
+        spinlock_t      lock;
+        unsigned int    queue_total;
+        unsigned int    id_sequence;            /* 'sequence' of pkt ids */
+        struct list_head queue_list;            /* packets in queue */
+};
+typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
+static DEFINE_SPINLOCK(instances_lock);
+#define INSTANCE_BUCKETS        16
+static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
+static inline u_int8_t instance_hashfn(u_int16_t queue_num)
+{
+        return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
+}
+static struct nfqnl_instance *
+instance_lookup(u_int16_t queue_num)
+{
+        struct hlist_head *head;
+        struct hlist_node *pos;
+        struct nfqnl_instance *inst;
+        head = &instance_table[instance_hashfn(queue_num)];
+        hlist_for_each_entry_rcu(inst, pos, head, hlist) {
+                if (inst->queue_num == queue_num)
+                        return inst;
+        }
+        return NULL;
+}
+static struct nfqnl_instance *
+instance_create(u_int16_t queue_num, int pid)
+{
+        struct nfqnl_instance *inst;
+        unsigned int h;
+        int err;
+        spin_lock(&instances_lock);
+        if (instance_lookup(queue_num)) {
+                err = -EEXIST;
+                goto out_unlock;
+        }
+        inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
+        if (!inst) {
+                err = -ENOMEM;
+                goto out_unlock;
+        }
+        inst->queue_num = queue_num;
+        inst->peer_pid = pid;
+        inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
+        inst->copy_range = 0xfffff;
+        inst->copy_mode = NFQNL_COPY_NONE;
+        spin_lock_init(&inst->lock);
+        INIT_LIST_HEAD(&inst->queue_list);
+        if (!try_module_get(THIS_MODULE)) {
+                err = -EAGAIN;
+                goto out_free;
+        }
+        h = instance_hashfn(queue_num);
+        hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
+        spin_unlock(&instances_lock);
+        return inst;
+out_free:
+        kfree(inst);
+out_unlock:
+        spin_unlock(&instances_lock);
+        return ERR_PTR(err);
+}
+static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
+                        unsigned long data);
+static void
+instance_destroy_rcu(struct rcu_head *head)
+{
+        struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
+                                                   rcu);
+        nfqnl_flush(inst, NULL, 0);
+        kfree(inst);
+        module_put(THIS_MODULE);
+}
+static void
+__instance_destroy(struct nfqnl_instance *inst)
+{
+        hlist_del_rcu(&inst->hlist);
+        call_rcu(&inst->rcu, instance_destroy_rcu);
+}
+static void
+instance_destroy(struct nfqnl_instance *inst)
+{
+        spin_lock(&instances_lock);
+        __instance_destroy(inst);
+        spin_unlock(&instances_lock);
+}
+static inline void
+__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
+{
+       list_add_tail(&entry->list, &queue->queue_list);
+       queue->queue_total++;
+}
+static void
+__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
+{
+        list_del(&entry->list);
+        queue->queue_total--;
+}
+static struct nf_queue_entry *
+find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
+{
+        struct nf_queue_entry *entry = NULL, *i;
+        spin_lock_bh(&queue->lock);
+        list_for_each_entry(i, &queue->queue_list, list) {
+                if (i->id == id) {
+                        entry = i;
+                        break;
+                }
+        }
+        if (entry)
+                __dequeue_entry(queue, entry);
+        spin_unlock_bh(&queue->lock);
+        return entry;
+}
+static void
+nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
+{
+        struct nf_queue_entry *entry, *next;
+        spin_lock_bh(&queue->lock);
+        list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
+                if (!cmpfn || cmpfn(entry, data)) {
+                        list_del(&entry->list);
+                        queue->queue_total--;
+                        nf_reinject(entry, NF_DROP);
+                }
+        }
+        spin_unlock_bh(&queue->lock);
+}
+static struct sk_buff *
+nfqnl_build_packet_message(struct nfqnl_instance *queue,
+                           struct nf_queue_entry *entry,
+                           __be32 **packet_id_ptr)
+{
+        sk_buff_data_t old_tail;
+        size_t size;
+        size_t data_len = 0;
+        struct sk_buff *skb;
+        struct nlattr *nla;
+        struct nfqnl_msg_packet_hdr *pmsg;
+        struct nlmsghdr *nlh;
+        struct nfgenmsg *nfmsg;
+        struct sk_buff *entskb = entry->skb;
+        struct net_device *indev;
+        struct net_device *outdev;
+        size =    NLMSG_SPACE(sizeof(struct nfgenmsg))
+                + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
+                + nla_total_size(sizeof(u_int32_t))     /* ifindex */
+                + nla_total_size(sizeof(u_int32_t))     /* ifindex */
+#ifdef CONFIG_BRIDGE_NETFILTER
+                + nla_total_size(sizeof(u_int32_t))     /* ifindex */
+                + nla_total_size(sizeof(u_int32_t))     /* ifindex */
+#endif
+                + nla_total_size(sizeof(u_int32_t))     /* mark */
+                + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
+                + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
+        outdev = entry->outdev;
+        switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
+        case NFQNL_COPY_META:
+        case NFQNL_COPY_NONE:
+                break;
+        case NFQNL_COPY_PACKET:
+                if (entskb->ip_summed == CHECKSUM_PARTIAL &&
+                    skb_checksum_help(entskb))
+                        return NULL;
+                data_len = ACCESS_ONCE(queue->copy_range);
+                if (data_len == 0 || data_len > entskb->len)
+                        data_len = entskb->len;
+                size += nla_total_size(data_len);
+                break;
+        }
+        skb = alloc_skb(size, GFP_ATOMIC);
+        if (!skb)
+                goto nlmsg_failure;
+        old_tail = skb->tail;
+        nlh = NLMSG_PUT(skb, 0, 0,
+                        NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
+                        sizeof(struct nfgenmsg));
+        nfmsg = NLMSG_DATA(nlh);
+        nfmsg->nfgen_family = entry->pf;
+        nfmsg->version = NFNETLINK_V0;
+        nfmsg->res_id = htons(queue->queue_num);
+        nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
+        pmsg = nla_data(nla);
+        pmsg->hw_protocol       = entskb->protocol;
+        pmsg->hook              = entry->hook;
+        *packet_id_ptr          = &pmsg->packet_id;
+        indev = entry->indev;
+        if (indev) {
+#ifndef CONFIG_BRIDGE_NETFILTER
+                NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex));
+#else
+                if (entry->pf == PF_BRIDGE) {
+                        /* Case 1: indev is physical input device, we need to
+                         * look for bridge group (when called from
+                         * netfilter_bridge) */
+                        NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
+                                     htonl(indev->ifindex));
+                        /* this is the bridge group "brX" */
+                        /* rcu_read_lock()ed by __nf_queue */
+                        NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
+                                     htonl(br_port_get_rcu(indev)->br->dev->ifindex));
+                } else {
+                        /* Case 2: indev is bridge group, we need to look for
+                         * physical device (when called from ipv4) */
+                        NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
+                                     htonl(indev->ifindex));
+                        if (entskb->nf_bridge && entskb->nf_bridge->physindev)
+                                NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
+                                             htonl(entskb->nf_bridge->physindev->ifindex));
+                }
+#endif
+        }
+        if (outdev) {
+#ifndef CONFIG_BRIDGE_NETFILTER
+                NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex));
+#else
+                if (entry->pf == PF_BRIDGE) {
+                        /* Case 1: outdev is physical output device, we need to
+                         * look for bridge group (when called from
+                         * netfilter_bridge) */
+                        NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+                                     htonl(outdev->ifindex));
+                        /* this is the bridge group "brX" */
+                        /* rcu_read_lock()ed by __nf_queue */
+                        NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
+                                     htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
+                } else {
+                        /* Case 2: outdev is bridge group, we need to look for
+                         * physical output device (when called from ipv4) */
+                        NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
+                                     htonl(outdev->ifindex));
+                        if (entskb->nf_bridge && entskb->nf_bridge->physoutdev)
+                                NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
+                                             htonl(entskb->nf_bridge->physoutdev->ifindex));
+                }
+#endif
+        }
+        if (entskb->mark)
+                NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark));
+        if (indev && entskb->dev &&
+            entskb->mac_header != entskb->network_header) {
+                struct nfqnl_msg_packet_hw phw;
+                int len = dev_parse_header(entskb, phw.hw_addr);
+                if (len) {
+                        phw.hw_addrlen = htons(len);
+                        NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
+                }
+        }
+        if (entskb->tstamp.tv64) {
+                struct nfqnl_msg_packet_timestamp ts;
+                struct timeval tv = ktime_to_timeval(entskb->tstamp);
+                ts.sec = cpu_to_be64(tv.tv_sec);
+                ts.usec = cpu_to_be64(tv.tv_usec);
+                NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
+        }
+        if (data_len) {
+                struct nlattr *nla;
+                int sz = nla_attr_size(data_len);
+                if (skb_tailroom(skb) < nla_total_size(data_len)) {
+                        printk(KERN_WARNING "nf_queue: no tailroom!\n");
+                        goto nlmsg_failure;
+                }
+                nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
+                nla->nla_type = NFQA_PAYLOAD;
+                nla->nla_len = sz;
+                if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
+                        BUG();
+        }
+        nlh->nlmsg_len = skb->tail - old_tail;
+        return skb;
+nlmsg_failure:
+nla_put_failure:
+        if (skb)
+                kfree_skb(skb);
+        if (net_ratelimit())
+                printk(KERN_ERR "nf_queue: error creating packet message\n");
+        return NULL;
+}
+static int
+nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+{
+        struct sk_buff *nskb;
+        struct nfqnl_instance *queue;
+        int err = -ENOBUFS;
+        __be32 *packet_id_ptr;
+        /* rcu_read_lock()ed by nf_hook_slow() */
+        queue = instance_lookup(queuenum);
+        if (!queue) {
+                err = -ESRCH;
+                goto err_out;
+        }
+        if (queue->copy_mode == NFQNL_COPY_NONE) {
+                err = -EINVAL;
+                goto err_out;
+        }
+        nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
+        if (nskb == NULL) {
+                err = -ENOMEM;
+                goto err_out;
+        }
+        spin_lock_bh(&queue->lock);
+        if (!queue->peer_pid) {
+                err = -EINVAL;
+                goto err_out_free_nskb;
+        }
+        if (queue->queue_total >= queue->queue_maxlen) {
+                queue->queue_dropped++;
+                if (net_ratelimit())
+                          printk(KERN_WARNING "nf_queue: full at %d entries, "
+                                 "dropping packets(s).\n",
+                                 queue->queue_total);
+                goto err_out_free_nskb;
+        }
+        entry->id = ++queue->id_sequence;
+        *packet_id_ptr = htonl(entry->id);
+        /* nfnetlink_unicast will either free the nskb or add it to a socket */
+        err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
+        if (err < 0) {
+                queue->queue_user_dropped++;
+                goto err_out_unlock;
+        }
+        __enqueue_entry(queue, entry);
+        spin_unlock_bh(&queue->lock);
+        return 0;
+err_out_free_nskb:
+        kfree_skb(nskb);
+err_out_unlock:
+        spin_unlock_bh(&queue->lock);
+err_out:
+        return err;
+}
+static int
+nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e)
+{
+        struct sk_buff *nskb;
+        int diff;
+        diff = data_len - e->skb->len;
+        if (diff < 0) {
+                if (pskb_trim(e->skb, data_len))
+                        return -ENOMEM;
+        } else if (diff > 0) {
+                if (data_len > 0xFFFF)
+                        return -EINVAL;
+                if (diff > skb_tailroom(e->skb)) {
+                        nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
+                                               diff, GFP_ATOMIC);
+                        if (!nskb) {
+                                printk(KERN_WARNING "nf_queue: OOM "
+                                      "in mangle, dropping packet\n");
+                                return -ENOMEM;
+                        }
+                        kfree_skb(e->skb);
+                        e->skb = nskb;
+                }
+                skb_put(e->skb, diff);
+        }
+        if (!skb_make_writable(e->skb, data_len))
+                return -ENOMEM;
+        skb_copy_to_linear_data(e->skb, data, data_len);
+        e->skb->ip_summed = CHECKSUM_NONE;
+        return 0;
+}
+static int
+nfqnl_set_mode(struct nfqnl_instance *queue,
+               unsigned char mode, unsigned int range)
+{
+        int status = 0;
+        spin_lock_bh(&queue->lock);
+        switch (mode) {
+        case NFQNL_COPY_NONE:
+        case NFQNL_COPY_META:
+                queue->copy_mode = mode;
+                queue->copy_range = 0;
+                break;
+        case NFQNL_COPY_PACKET:
+                queue->copy_mode = mode;
+                /* we're using struct nlattr which has 16bit nla_len */
+                if (range > 0xffff)
+                        queue->copy_range = 0xffff;
+                else
+                        queue->copy_range = range;
+                break;
+        default:
+                status = -EINVAL;
+        }
+        spin_unlock_bh(&queue->lock);
+        return status;
+}
+static int
+dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
+{
+        if (entry->indev)
+                if (entry->indev->ifindex == ifindex)
+                        return 1;
+        if (entry->outdev)
+                if (entry->outdev->ifindex == ifindex)
+                        return 1;
+#ifdef CONFIG_BRIDGE_NETFILTER
+        if (entry->skb->nf_bridge) {
+                if (entry->skb->nf_bridge->physindev &&
+                    entry->skb->nf_bridge->physindev->ifindex == ifindex)
+                        return 1;
+                if (entry->skb->nf_bridge->physoutdev &&
+                    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+                        return 1;
+        }
+#endif
+        return 0;
+}
+/* drop all packets with either indev or outdev == ifindex from all queue
+ * instances */
+static void
+nfqnl_dev_drop(int ifindex)
+{
+        int i;
+        rcu_read_lock();
+        for (i = 0; i < INSTANCE_BUCKETS; i++) {
+                struct hlist_node *tmp;
+                struct nfqnl_instance *inst;
+                struct hlist_head *head = &instance_table[i];
+                hlist_for_each_entry_rcu(inst, tmp, head, hlist)
+                        nfqnl_flush(inst, dev_cmp, ifindex);
+        }
+        rcu_read_unlock();
+}
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+static int
+nfqnl_rcv_dev_event(struct notifier_block *this,
+                    unsigned long event, void *ptr)
+{
+        struct net_device *dev = ptr;
+        if (!net_eq(dev_net(dev), &init_net))
+                return NOTIFY_DONE;
+        /* Drop any packets associated with the downed device */
+        if (event == NETDEV_DOWN)
+                nfqnl_dev_drop(dev->ifindex);
+        return NOTIFY_DONE;
+}
+static struct notifier_block nfqnl_dev_notifier = {
+        .notifier_call  = nfqnl_rcv_dev_event,
+};
+static int
+nfqnl_rcv_nl_event(struct notifier_block *this,
+                   unsigned long event, void *ptr)
+{
+        struct netlink_notify *n = ptr;
+        if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
+                int i;
+                /* destroy all instances for this pid */
+                spin_lock(&instances_lock);
+                for (i = 0; i < INSTANCE_BUCKETS; i++) {
+                        struct hlist_node *tmp, *t2;
+                        struct nfqnl_instance *inst;
+                        struct hlist_head *head = &instance_table[i];
+                        hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+                                if ((n->net == &init_net) &&
+                                    (n->pid == inst->peer_pid))
+                                        __instance_destroy(inst);
+                        }
+                }
+                spin_unlock(&instances_lock);
+        }
+        return NOTIFY_DONE;
+}
+static struct notifier_block nfqnl_rtnl_notifier = {
+        .notifier_call  = nfqnl_rcv_nl_event,
+};
+static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
+        [NFQA_VERDICT_HDR]      = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
+        [NFQA_MARK]             = { .type = NLA_U32 },
+        [NFQA_PAYLOAD]          = { .type = NLA_UNSPEC },
+};
+static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
+        [NFQA_VERDICT_HDR]      = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
+        [NFQA_MARK]             = { .type = NLA_U32 },
+};
+static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid)
+{
+        struct nfqnl_instance *queue;
+        queue = instance_lookup(queue_num);
+        if (!queue)
+                return ERR_PTR(-ENODEV);
+        if (queue->peer_pid != nlpid)
+                return ERR_PTR(-EPERM);
+        return queue;
+}
+static struct nfqnl_msg_verdict_hdr*
+verdicthdr_get(const struct nlattr * const nfqa[])
+{
+        struct nfqnl_msg_verdict_hdr *vhdr;
+        unsigned int verdict;
+        if (!nfqa[NFQA_VERDICT_HDR])
+                return NULL;
+        vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
+        verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
+        if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
+                return NULL;
+        return vhdr;
+}
+static int nfq_id_after(unsigned int id, unsigned int max)
+{
+        return (int)(id - max) > 0;
+}
+static int
+nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
+                   const struct nlmsghdr *nlh,
+                   const struct nlattr * const nfqa[])
+{
+        struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+        struct nf_queue_entry *entry, *tmp;
+        unsigned int verdict, maxid;
+        struct nfqnl_msg_verdict_hdr *vhdr;
+        struct nfqnl_instance *queue;
+        LIST_HEAD(batch_list);
+        u16 queue_num = ntohs(nfmsg->res_id);
+        queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
+        if (IS_ERR(queue))
+                return PTR_ERR(queue);
+        vhdr = verdicthdr_get(nfqa);
+        if (!vhdr)
+                return -EINVAL;
+        verdict = ntohl(vhdr->verdict);
+        maxid = ntohl(vhdr->id);
+        spin_lock_bh(&queue->lock);
+        list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) {
+                if (nfq_id_after(entry->id, maxid))
+                        break;
+                __dequeue_entry(queue, entry);
+                list_add_tail(&entry->list, &batch_list);
+        }
+        spin_unlock_bh(&queue->lock);
+        if (list_empty(&batch_list))
+                return -ENOENT;
+        list_for_each_entry_safe(entry, tmp, &batch_list, list) {
+                if (nfqa[NFQA_MARK])
+                        entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+                nf_reinject(entry, verdict);
+        }
+        return 0;
+}
+static int
+nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
+                   const struct nlmsghdr *nlh,
+                   const struct nlattr * const nfqa[])
+{
+        struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+        u_int16_t queue_num = ntohs(nfmsg->res_id);
+        struct nfqnl_msg_verdict_hdr *vhdr;
+        struct nfqnl_instance *queue;
+        unsigned int verdict;
+        struct nf_queue_entry *entry;
+        queue = instance_lookup(queue_num);
+        if (!queue)
+        queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
+        if (IS_ERR(queue))
+                return PTR_ERR(queue);
+        vhdr = verdicthdr_get(nfqa);
+        if (!vhdr)
+                return -EINVAL;
+        verdict = ntohl(vhdr->verdict);
+        entry = find_dequeue_entry(queue, ntohl(vhdr->id));
+        if (entry == NULL)
+                return -ENOENT;
+        if (nfqa[NFQA_PAYLOAD]) {
+                if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
+                                 nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0)
+                        verdict = NF_DROP;
+        }
+        if (nfqa[NFQA_MARK])
+                entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+        nf_reinject(entry, verdict);
+        return 0;
+}
+static int
+nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
+                  const struct nlmsghdr *nlh,
+                  const struct nlattr * const nfqa[])
+{
+        return -ENOTSUPP;
+}
+static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
+        [NFQA_CFG_CMD]          = { .len = sizeof(struct nfqnl_msg_config_cmd) },
+        [NFQA_CFG_PARAMS]       = { .len = sizeof(struct nfqnl_msg_config_params) },
+};
+static const struct nf_queue_handler nfqh = {
+        .name   = "nf_queue",
+        .outfn  = &nfqnl_enqueue_packet,
+};
+static int
+nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
+                  const struct nlmsghdr *nlh,
+                  const struct nlattr * const nfqa[])
+{
+        struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+        u_int16_t queue_num = ntohs(nfmsg->res_id);
+        struct nfqnl_instance *queue;
+        struct nfqnl_msg_config_cmd *cmd = NULL;
+        int ret = 0;
+        if (nfqa[NFQA_CFG_CMD]) {
+                cmd = nla_data(nfqa[NFQA_CFG_CMD]);
+                /* Commands without queue context - might sleep */
+                switch (cmd->command) {
+                case NFQNL_CFG_CMD_PF_BIND:
+                        return nf_register_queue_handler(ntohs(cmd->pf),
+                                                         &nfqh);
+                case NFQNL_CFG_CMD_PF_UNBIND:
+                        return nf_unregister_queue_handler(ntohs(cmd->pf),
+                                                           &nfqh);
+                }
+        }
+        rcu_read_lock();
+        queue = instance_lookup(queue_num);
+        if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
+                ret = -EPERM;
+                goto err_out_unlock;
+        }
+        if (cmd != NULL) {
+                switch (cmd->command) {
+                case NFQNL_CFG_CMD_BIND:
+                        if (queue) {
+                                ret = -EBUSY;
+                                goto err_out_unlock;
+                        }
+                        queue = instance_create(queue_num, NETLINK_CB(skb).pid);
+                        if (IS_ERR(queue)) {
+                                ret = PTR_ERR(queue);
+                                goto err_out_unlock;
+                        }
+                        break;
+                case NFQNL_CFG_CMD_UNBIND:
+                        if (!queue) {
+                                ret = -ENODEV;
+                                goto err_out_unlock;
+                        }
+                        instance_destroy(queue);
+                        break;
+                case NFQNL_CFG_CMD_PF_BIND:
+                case NFQNL_CFG_CMD_PF_UNBIND:
+                        break;
+                default:
+                        ret = -ENOTSUPP;
+                        break;
+                }
+        }
+        if (nfqa[NFQA_CFG_PARAMS]) {
+                struct nfqnl_msg_config_params *params;
+                if (!queue) {
+                        ret = -ENODEV;
+                        goto err_out_unlock;
+                }
+                params = nla_data(nfqa[NFQA_CFG_PARAMS]);
+                nfqnl_set_mode(queue, params->copy_mode,
+                                ntohl(params->copy_range));
+        }
+        if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
+                __be32 *queue_maxlen;
+                if (!queue) {
+                        ret = -ENODEV;
+                        goto err_out_unlock;
+                }
+                queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
+                spin_lock_bh(&queue->lock);
+                queue->queue_maxlen = ntohl(*queue_maxlen);
+                spin_unlock_bh(&queue->lock);
+        }
+err_out_unlock:
+        rcu_read_unlock();
+        return ret;
+}
+static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
+        [NFQNL_MSG_PACKET]      = { .call_rcu = nfqnl_recv_unsupp,
+                                    .attr_count = NFQA_MAX, },
+        [NFQNL_MSG_VERDICT]     = { .call_rcu = nfqnl_recv_verdict,
+                                    .attr_count = NFQA_MAX,
+                                    .policy = nfqa_verdict_policy },
+        [NFQNL_MSG_CONFIG]      = { .call = nfqnl_recv_config,
+                                    .attr_count = NFQA_CFG_MAX,
+                                    .policy = nfqa_cfg_policy },
+        [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch,
+                                    .attr_count = NFQA_MAX,
+                                    .policy = nfqa_verdict_batch_policy },
+};
+static const struct nfnetlink_subsystem nfqnl_subsys = {
+        .name           = "nf_queue",
+        .subsys_id      = NFNL_SUBSYS_QUEUE,
+        .cb_count       = NFQNL_MSG_MAX,
+        .cb             = nfqnl_cb,
+};
+#ifdef CONFIG_PROC_FS
+struct iter_state {
+        unsigned int bucket;
+};
+static struct hlist_node *get_first(struct seq_file *seq)
+{
+        struct iter_state *st = seq->private;
+        if (!st)
+                return NULL;
+        for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
+                if (!hlist_empty(&instance_table[st->bucket]))
+                        return instance_table[st->bucket].first;
+        }
+        return NULL;
+}
+static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+{
+        struct iter_state *st = seq->private;
+        h = h->next;
+        while (!h) {
+                if (++st->bucket >= INSTANCE_BUCKETS)
+                        return NULL;
+                h = instance_table[st->bucket].first;
+        }
+        return h;
+}
+static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+{
+        struct hlist_node *head;
+        head = get_first(seq);
+        if (head)
+                while (pos && (head = get_next(seq, head)))
+                        pos--;
+        return pos ? NULL : head;
+}
+static void *seq_start(struct seq_file *seq, loff_t *pos)
+        __acquires(instances_lock)
+{
+        spin_lock(&instances_lock);
+        return get_idx(seq, *pos);
+}
+static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+        (*pos)++;
+        return get_next(s, v);
+}
+static void seq_stop(struct seq_file *s, void *v)
+        __releases(instances_lock)
+{
+        spin_unlock(&instances_lock);
+}
+static int seq_show(struct seq_file *s, void *v)
+{
+        const struct nfqnl_instance *inst = v;
+        return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+                          inst->queue_num,
+                          inst->peer_pid, inst->queue_total,
+                          inst->copy_mode, inst->copy_range,
+                          inst->queue_dropped, inst->queue_user_dropped,
+                          inst->id_sequence, 1);
+}
+static const struct seq_operations nfqnl_seq_ops = {
+        .start  = seq_start,
+        .next   = seq_next,
+        .stop   = seq_stop,
+        .show   = seq_show,
+};
+static int nfqnl_open(struct inode *inode, struct file *file)
+{
+        return seq_open_private(file, &nfqnl_seq_ops,
+                        sizeof(struct iter_state));
+}
+static const struct file_operations nfqnl_file_ops = {
+        .owner   = THIS_MODULE,
+        .open    = nfqnl_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release_private,
+};
+#endif /* PROC_FS */
+static int __init nfnetlink_queue_init(void)
+{
+        int i, status = -ENOMEM;
+        for (i = 0; i < INSTANCE_BUCKETS; i++)
+                INIT_HLIST_HEAD(&instance_table[i]);
+        netlink_register_notifier(&nfqnl_rtnl_notifier);
+        status = nfnetlink_subsys_register(&nfqnl_subsys);
+        if (status < 0) {
+                printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
+                goto cleanup_netlink_notifier;
+        }
+#ifdef CONFIG_PROC_FS
+        if (!proc_create("nfnetlink_queue", 0440,
+                         proc_net_netfilter, &nfqnl_file_ops))
+                goto cleanup_subsys;
+#endif
+        register_netdevice_notifier(&nfqnl_dev_notifier);
+        return status;
+#ifdef CONFIG_PROC_FS
+cleanup_subsys:
+        nfnetlink_subsys_unregister(&nfqnl_subsys);
+#endif
+cleanup_netlink_notifier:
+        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+        return status;
+}
+static void __exit nfnetlink_queue_fini(void)
+{
+        nf_unregister_queue_handlers(&nfqh);
+        unregister_netdevice_notifier(&nfqnl_dev_notifier);
+#ifdef CONFIG_PROC_FS
+        remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
+#endif
+        nfnetlink_subsys_unregister(&nfqnl_subsys);
+        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+        rcu_barrier(); /* Wait for completion of call_rcu()'s */
+}
+MODULE_DESCRIPTION("netfilter packet queue handler");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
+module_init(nfnetlink_queue_init);
+module_exit(nfnetlink_queue_fini);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
new file mode 100644
index 00000000000..9d782181b6c
--- /dev/null
+++ b/net/netfilter/xt_NOTRACK.c
@@ -0,0 +1,53 @@
+/* This is a module which is used for setting up fake conntracks
+ * on packets so that they are not seen by the conntrack/NAT code.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_NOTRACK");
+MODULE_ALIAS("ip6t_NOTRACK");
+static unsigned int
+notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+        /* Previously seen (loopback)? Ignore. */
+        if (skb->nfct != NULL)
+                return XT_CONTINUE;
+        /* Attach fake conntrack entry.
+           If there is a real ct entry correspondig to this packet,
+           it'll hang aroun till timing out. We don't deal with it
+           for performance reasons. JK */
+        skb->nfct = &nf_ct_untracked_get()->ct_general;
+        skb->nfctinfo = IP_CT_NEW;
+        nf_conntrack_get(skb->nfct);
+        return XT_CONTINUE;
+}
+static struct xt_target notrack_tg_reg __read_mostly = {
+        .name     = "NOTRACK",
+        .revision = 0,
+        .family   = NFPROTO_UNSPEC,
+        .target   = notrack_tg,
+        .table    = "raw",
+        .me       = THIS_MODULE,
+};
+static int __init notrack_tg_init(void)
+{
+        return xt_register_target(&notrack_tg_reg);
+}
+static void __exit notrack_tg_exit(void)
+{
+        xt_unregister_target(&notrack_tg_reg);
+}
+module_init(notrack_tg_init);
+module_exit(notrack_tg_exit);
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
new file mode 100644
index 00000000000..08086d680c2
--- /dev/null
+++ b/net/netfilter/xt_qtaguid.c
@@ -0,0 +1,2785 @@
+/*
+ * Kernel iptables module to track stats for packets based on user tags.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/*
+ * There are run-time debug flags enabled via the debug_mask module param, or
+ * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
+ */
+#define DEBUG
+#include <linux/file.h>
+#include <linux/inetdevice.h>
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_qtaguid.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <net/addrconf.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/netfilter/xt_socket.h>
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+/*
+ * We only use the xt_socket funcs within a similar context to avoid unexpected
+ * return values.
+ */
+#define XT_SOCKET_SUPPORTED_HOOKS \
+        ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
+static const char *module_procdirname = "xt_qtaguid";
+static struct proc_dir_entry *xt_qtaguid_procdir;
+static unsigned int proc_iface_perms = S_IRUGO;
+module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
+static struct proc_dir_entry *xt_qtaguid_stats_file;
+static unsigned int proc_stats_perms = S_IRUGO;
+module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
+static struct proc_dir_entry *xt_qtaguid_ctrl_file;
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
+#else
+static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
+#endif
+module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
+static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
+#else
+/* 0 means, don't limit anybody */
+static gid_t proc_stats_readall_gid;
+static gid_t proc_ctrl_write_gid;
+#endif
+module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
+                   S_IRUGO | S_IWUSR);
+module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
+                   S_IRUGO | S_IWUSR);
+/*
+ * Limit the number of active tags (via socket tags) for a given UID.
+ * Multiple processes could share the UID.
+ */
+static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
+module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
+/*
+ * After the kernel has initiallized this module, it is still possible
+ * to make it passive.
+ * Setting passive to Y:
+ *  - the iface stats handling will not act on notifications.
+ *  - iptables matches will never match.
+ *  - ctrl commands silently succeed.
+ *  - stats are always empty.
+ * This is mostly usefull when a bug is suspected.
+ */
+static bool module_passive;
+module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
+/*
+ * Control how qtaguid data is tracked per proc/uid.
+ * Setting tag_tracking_passive to Y:
+ *  - don't create proc specific structs to track tags
+ *  - don't check that active tag stats exceed some limits.
+ *  - don't clean up socket tags on process exits.
+ * This is mostly usefull when a bug is suspected.
+ */
+static bool qtu_proc_handling_passive;
+module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
+                   S_IRUGO | S_IWUSR);
+#define QTU_DEV_NAME "xt_qtaguid"
+uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
+module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
+/*---------------------------------------------------------------------------*/
+static const char *iface_stat_procdirname = "iface_stat";
+static struct proc_dir_entry *iface_stat_procdir;
+static const char *iface_stat_all_procfilename = "iface_stat_all";
+static struct proc_dir_entry *iface_stat_all_procfile;
+/*
+ * Ordering of locks:
+ *  outer locks:
+ *    iface_stat_list_lock
+ *    sock_tag_list_lock
+ *  inner locks:
+ *    uid_tag_data_tree_lock
+ *    tag_counter_set_list_lock
+ * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
+ * is acquired.
+ *
+ * Call tree with all lock holders as of 2011-09-25:
+ *
+ * iface_stat_all_proc_read()
+ *   iface_stat_list_lock
+ *     (struct iface_stat)
+ *
+ * qtaguid_ctrl_proc_read()
+ *   sock_tag_list_lock
+ *     (sock_tag_tree)
+ *     (struct proc_qtu_data->sock_tag_list)
+ *   prdebug_full_state()
+ *     sock_tag_list_lock
+ *       (sock_tag_tree)
+ *     uid_tag_data_tree_lock
+ *       (uid_tag_data_tree)
+ *       (proc_qtu_data_tree)
+ *     iface_stat_list_lock
+ *
+ * qtaguid_stats_proc_read()
+ *   iface_stat_list_lock
+ *     struct iface_stat->tag_stat_list_lock
+ *
+ * qtudev_open()
+ *   uid_tag_data_tree_lock
+ *
+ * qtudev_release()
+ *   sock_tag_data_list_lock
+ *     uid_tag_data_tree_lock
+ *   prdebug_full_state()
+ *     sock_tag_list_lock
+ *     uid_tag_data_tree_lock
+ *     iface_stat_list_lock
+ *
+ * iface_netdev_event_handler()
+ *   iface_stat_create()
+ *     iface_stat_list_lock
+ *   iface_stat_update()
+ *     iface_stat_list_lock
+ *
+ * iface_inetaddr_event_handler()
+ *   iface_stat_create()
+ *     iface_stat_list_lock
+ *   iface_stat_update()
+ *     iface_stat_list_lock
+ *
+ * iface_inet6addr_event_handler()
+ *   iface_stat_create_ipv6()
+ *     iface_stat_list_lock
+ *   iface_stat_update()
+ *     iface_stat_list_lock
+ *
+ * qtaguid_mt()
+ *   account_for_uid()
+ *     if_tag_stat_update()
+ *       get_sock_stat()
+ *         sock_tag_list_lock
+ *       struct iface_stat->tag_stat_list_lock
+ *         tag_stat_update()
+ *           get_active_counter_set()
+ *             tag_counter_set_list_lock
+ *         tag_stat_update()
+ *           get_active_counter_set()
+ *             tag_counter_set_list_lock
+ *
+ *
+ * qtaguid_ctrl_parse()
+ *   ctrl_cmd_delete()
+ *     sock_tag_list_lock
+ *     tag_counter_set_list_lock
+ *     iface_stat_list_lock
+ *       struct iface_stat->tag_stat_list_lock
+ *     uid_tag_data_tree_lock
+ *   ctrl_cmd_counter_set()
+ *     tag_counter_set_list_lock
+ *   ctrl_cmd_tag()
+ *     sock_tag_list_lock
+ *       (sock_tag_tree)
+ *       get_tag_ref()
+ *         uid_tag_data_tree_lock
+ *           (uid_tag_data_tree)
+ *       uid_tag_data_tree_lock
+ *         (proc_qtu_data_tree)
+ *   ctrl_cmd_untag()
+ *     sock_tag_list_lock
+ *     uid_tag_data_tree_lock
+ *
+ */
+static LIST_HEAD(iface_stat_list);
+static DEFINE_SPINLOCK(iface_stat_list_lock);
+static struct rb_root sock_tag_tree = RB_ROOT;
+static DEFINE_SPINLOCK(sock_tag_list_lock);
+static struct rb_root tag_counter_set_tree = RB_ROOT;
+static DEFINE_SPINLOCK(tag_counter_set_list_lock);
+static struct rb_root uid_tag_data_tree = RB_ROOT;
+static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
+static struct rb_root proc_qtu_data_tree = RB_ROOT;
+/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
+static struct qtaguid_event_counts qtu_events;
+/*----------------------------------------------*/
+static bool can_manipulate_uids(void)
+{
+        /* root pwnd */
+        return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
+                || in_egroup_p(proc_ctrl_write_gid);
+}
+static bool can_impersonate_uid(uid_t uid)
+{
+        return uid == current_fsuid() || can_manipulate_uids();
+}
+static bool can_read_other_uid_stats(uid_t uid)
+{
+        /* root pwnd */
+        return unlikely(!current_fsuid()) || uid == current_fsuid()
+                || unlikely(!proc_stats_readall_gid)
+                || in_egroup_p(proc_stats_readall_gid);
+}
+static inline void dc_add_byte_packets(struct data_counters *counters, int set,
+                                  enum ifs_tx_rx direction,
+                                  enum ifs_proto ifs_proto,
+                                  int bytes,
+                                  int packets)
+{
+        counters->bpc[set][direction][ifs_proto].bytes += bytes;
+        counters->bpc[set][direction][ifs_proto].packets += packets;
+}
+static inline uint64_t dc_sum_bytes(struct data_counters *counters,
+                                    int set,
+                                    enum ifs_tx_rx direction)
+{
+        return counters->bpc[set][direction][IFS_TCP].bytes
+                + counters->bpc[set][direction][IFS_UDP].bytes
+                + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
+}
+static inline uint64_t dc_sum_packets(struct data_counters *counters,
+                                      int set,
+                                      enum ifs_tx_rx direction)
+{
+        return counters->bpc[set][direction][IFS_TCP].packets
+                + counters->bpc[set][direction][IFS_UDP].packets
+                + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
+}
+static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
+{
+        struct rb_node *node = root->rb_node;
+        while (node) {
+                struct tag_node *data = rb_entry(node, struct tag_node, node);
+                int result;
+                RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+                         " node=%p data=%p\n", tag, node, data);
+                result = tag_compare(tag, data->tag);
+                RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+                         " data.tag=0x%llx (uid=%u) res=%d\n",
+                         tag, data->tag, get_uid_from_tag(data->tag), result);
+                if (result < 0)
+                        node = node->rb_left;
+                else if (result > 0)
+                        node = node->rb_right;
+                else
+                        return data;
+        }
+        return NULL;
+}
+static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
+{
+        struct rb_node **new = &(root->rb_node), *parent = NULL;
+        /* Figure out where to put new node */
+        while (*new) {
+                struct tag_node *this = rb_entry(*new, struct tag_node,
+                                                 node);
+                int result = tag_compare(data->tag, this->tag);
+                RB_DEBUG("qtaguid: %s(): tag=0x%llx"
+                         " (uid=%u)\n", __func__,
+                         this->tag,
+                         get_uid_from_tag(this->tag));
+                parent = *new;
+                if (result < 0)
+                        new = &((*new)->rb_left);
+                else if (result > 0)
+                        new = &((*new)->rb_right);
+                else
+                        BUG();
+        }
+        /* Add new node and rebalance tree. */
+        rb_link_node(&data->node, parent, new);
+        rb_insert_color(&data->node, root);
+}
+static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
+{
+        tag_node_tree_insert(&data->tn, root);
+}
+static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
+{
+        struct tag_node *node = tag_node_tree_search(root, tag);
+        if (!node)
+                return NULL;
+        return rb_entry(&node->node, struct tag_stat, tn.node);
+}
+static void tag_counter_set_tree_insert(struct tag_counter_set *data,
+                                        struct rb_root *root)
+{
+        tag_node_tree_insert(&data->tn, root);
+}
+static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
+                                                           tag_t tag)
+{
+        struct tag_node *node = tag_node_tree_search(root, tag);
+        if (!node)
+                return NULL;
+        return rb_entry(&node->node, struct tag_counter_set, tn.node);
+}
+static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
+{
+        tag_node_tree_insert(&data->tn, root);
+}
+static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
+{
+        struct tag_node *node = tag_node_tree_search(root, tag);
+        if (!node)
+                return NULL;
+        return rb_entry(&node->node, struct tag_ref, tn.node);
+}
+static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
+                                             const struct sock *sk)
+{
+        struct rb_node *node = root->rb_node;
+        while (node) {
+                struct sock_tag *data = rb_entry(node, struct sock_tag,
+                                                 sock_node);
+                if (sk < data->sk)
+                        node = node->rb_left;
+                else if (sk > data->sk)
+                        node = node->rb_right;
+                else
+                        return data;
+        }
+        return NULL;
+}
+static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
+{
+        struct rb_node **new = &(root->rb_node), *parent = NULL;
+        /* Figure out where to put new node */
+        while (*new) {
+                struct sock_tag *this = rb_entry(*new, struct sock_tag,
+                                                 sock_node);
+                parent = *new;
+                if (data->sk < this->sk)
+                        new = &((*new)->rb_left);
+                else if (data->sk > this->sk)
+                        new = &((*new)->rb_right);
+                else
+                        BUG();
+        }
+        /* Add new node and rebalance tree. */
+        rb_link_node(&data->sock_node, parent, new);
+        rb_insert_color(&data->sock_node, root);
+}
+static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
+{
+        struct rb_node *node;
+        struct sock_tag *st_entry;
+        node = rb_first(st_to_free_tree);
+        while (node) {
+                st_entry = rb_entry(node, struct sock_tag, sock_node);
+                node = rb_next(node);
+                CT_DEBUG("qtaguid: %s(): "
+                         "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
+                         st_entry->sk,
+                         st_entry->tag,
+                         get_uid_from_tag(st_entry->tag));
+                rb_erase(&st_entry->sock_node, st_to_free_tree);
+                sockfd_put(st_entry->socket);
+                kfree(st_entry);
+        }
+}
+static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
+                                                       const pid_t pid)
+{
+        struct rb_node *node = root->rb_node;
+        while (node) {
+                struct proc_qtu_data *data = rb_entry(node,
+                                                      struct proc_qtu_data,
+                                                      node);
+                if (pid < data->pid)
+                        node = node->rb_left;
+                else if (pid > data->pid)
+                        node = node->rb_right;
+                else
+                        return data;
+        }
+        return NULL;
+}
+static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
+                                      struct rb_root *root)
+{
+        struct rb_node **new = &(root->rb_node), *parent = NULL;
+        /* Figure out where to put new node */
+        while (*new) {
+                struct proc_qtu_data *this = rb_entry(*new,
+                                                      struct proc_qtu_data,
+                                                      node);
+                parent = *new;
+                if (data->pid < this->pid)
+                        new = &((*new)->rb_left);
+                else if (data->pid > this->pid)
+                        new = &((*new)->rb_right);
+                else
+                        BUG();
+        }
+        /* Add new node and rebalance tree. */
+        rb_link_node(&data->node, parent, new);
+        rb_insert_color(&data->node, root);
+}
+static void uid_tag_data_tree_insert(struct uid_tag_data *data,
+                                     struct rb_root *root)
+{
+        struct rb_node **new = &(root->rb_node), *parent = NULL;
+        /* Figure out where to put new node */
+        while (*new) {
+                struct uid_tag_data *this = rb_entry(*new,
+                                                     struct uid_tag_data,
+                                                     node);
+                parent = *new;
+                if (data->uid < this->uid)
+                        new = &((*new)->rb_left);
+                else if (data->uid > this->uid)
+                        new = &((*new)->rb_right);
+                else
+                        BUG();
+        }
+        /* Add new node and rebalance tree. */
+        rb_link_node(&data->node, parent, new);
+        rb_insert_color(&data->node, root);
+}
+static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
+                                                     uid_t uid)
+{
+        struct rb_node *node = root->rb_node;
+        while (node) {
+                struct uid_tag_data *data = rb_entry(node,
+                                                     struct uid_tag_data,
+                                                     node);
+                if (uid < data->uid)
+                        node = node->rb_left;
+                else if (uid > data->uid)
+                        node = node->rb_right;
+                else
+                        return data;
+        }
+        return NULL;
+}
+/*
+ * Allocates a new uid_tag_data struct if needed.
+ * Returns a pointer to the found or allocated uid_tag_data.
+ * Returns a PTR_ERR on failures, and lock is not held.
+ * If found is not NULL:
+ *   sets *found to true if not allocated.
+ *   sets *found to false if allocated.
+ */
+struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
+{
+        struct uid_tag_data *utd_entry;
+        /* Look for top level uid_tag_data for the UID */
+        utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
+        DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
+        if (found_res)
+                *found_res = utd_entry;
+        if (utd_entry)
+                return utd_entry;
+        utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
+        if (!utd_entry) {
+                pr_err("qtaguid: get_uid_data(%u): "
+                       "tag data alloc failed\n", uid);
+                return ERR_PTR(-ENOMEM);
+        }
+        utd_entry->uid = uid;
+        utd_entry->tag_ref_tree = RB_ROOT;
+        uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
+        DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
+        return utd_entry;
+}
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *new_tag_ref(tag_t new_tag,
+                                   struct uid_tag_data *utd_entry)
+{
+        struct tag_ref *tr_entry;
+        int res;
+        if (utd_entry->num_active_tags + 1 > max_sock_tags) {
+                pr_info("qtaguid: new_tag_ref(0x%llx): "
+                        "tag ref alloc quota exceeded. max=%d\n",
+                        new_tag, max_sock_tags);
+                res = -EMFILE;
+                goto err_res;
+        }
+        tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
+        if (!tr_entry) {
+                pr_err("qtaguid: new_tag_ref(0x%llx): "
+                       "tag ref alloc failed\n",
+                       new_tag);
+                res = -ENOMEM;
+                goto err_res;
+        }
+        tr_entry->tn.tag = new_tag;
+        /* tr_entry->num_sock_tags  handled by caller */
+        utd_entry->num_active_tags++;
+        tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
+        DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
+                 " inserted new tag ref %p\n",
+                 new_tag, tr_entry);
+        return tr_entry;
+err_res:
+        return ERR_PTR(res);
+}
+static struct tag_ref *lookup_tag_ref(tag_t full_tag,
+                                      struct uid_tag_data **utd_res)
+{
+        struct uid_tag_data *utd_entry;
+        struct tag_ref *tr_entry;
+        bool found_utd;
+        uid_t uid = get_uid_from_tag(full_tag);
+        DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
+                 full_tag, uid);
+        utd_entry = get_uid_data(uid, &found_utd);
+        if (IS_ERR_OR_NULL(utd_entry)) {
+                if (utd_res)
+                        *utd_res = utd_entry;
+                return NULL;
+        }
+        tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
+        if (utd_res)
+                *utd_res = utd_entry;
+        DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
+                 full_tag, utd_entry, tr_entry);
+        return tr_entry;
+}
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *get_tag_ref(tag_t full_tag,
+                                   struct uid_tag_data **utd_res)
+{
+        struct uid_tag_data *utd_entry;
+        struct tag_ref *tr_entry;
+        DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
+                 full_tag);
+        spin_lock_bh(&uid_tag_data_tree_lock);
+        tr_entry = lookup_tag_ref(full_tag, &utd_entry);
+        BUG_ON(IS_ERR_OR_NULL(utd_entry));
+        if (!tr_entry)
+                tr_entry = new_tag_ref(full_tag, utd_entry);
+        spin_unlock_bh(&uid_tag_data_tree_lock);
+        if (utd_res)
+                *utd_res = utd_entry;
+        DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
+                 full_tag, utd_entry, tr_entry);
+        return tr_entry;
+}
+/* Checks and maybe frees the UID Tag Data entry */
+static void put_utd_entry(struct uid_tag_data *utd_entry)
+{
+        /* Are we done with the UID tag data entry? */
+        if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
+                !utd_entry->num_pqd) {
+                DR_DEBUG("qtaguid: %s(): "
+                         "erase utd_entry=%p uid=%u "
+                         "by pid=%u tgid=%u uid=%u\n", __func__,
+                         utd_entry, utd_entry->uid,
+                         current->pid, current->tgid, current_fsuid());
+                BUG_ON(utd_entry->num_active_tags);
+                rb_erase(&utd_entry->node, &uid_tag_data_tree);
+                kfree(utd_entry);
+        } else {
+                DR_DEBUG("qtaguid: %s(): "
+                         "utd_entry=%p still has %d tags %d proc_qtu_data\n",
+                         __func__, utd_entry, utd_entry->num_active_tags,
+                         utd_entry->num_pqd);
+                BUG_ON(!(utd_entry->num_active_tags ||
+                         utd_entry->num_pqd));
+        }
+}
+/*
+ * If no sock_tags are using this tag_ref,
+ * decrements refcount of utd_entry, removes tr_entry
+ * from utd_entry->tag_ref_tree and frees.
+ */
+static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
+                                        struct uid_tag_data *utd_entry)
+{
+        DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
+                 tr_entry, tr_entry->tn.tag,
+                 get_uid_from_tag(tr_entry->tn.tag));
+        if (!tr_entry->num_sock_tags) {
+                BUG_ON(!utd_entry->num_active_tags);
+                utd_entry->num_active_tags--;
+                rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
+                DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
+                kfree(tr_entry);
+        }
+}
+static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
+{
+        struct rb_node *node;
+        struct tag_ref *tr_entry;
+        tag_t acct_tag;
+        DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
+                 full_tag, get_uid_from_tag(full_tag));
+        acct_tag = get_atag_from_tag(full_tag);
+        node = rb_first(&utd_entry->tag_ref_tree);
+        while (node) {
+                tr_entry = rb_entry(node, struct tag_ref, tn.node);
+                node = rb_next(node);
+                if (!acct_tag || tr_entry->tn.tag == full_tag)
+                        free_tag_ref_from_utd_entry(tr_entry, utd_entry);
+        }
+}
+static int read_proc_u64(char *page, char **start, off_t off,
+                        int count, int *eof, void *data)
+{
+        int len;
+        uint64_t value;
+        char *p = page;
+        uint64_t *iface_entry = data;
+        if (!data)
+                return 0;
+        value = *iface_entry;
+        p += sprintf(p, "%llu\n", value);
+        len = (p - page) - off;
+        *eof = (len <= count) ? 1 : 0;
+        *start = page + off;
+        return len;
+}
+static int read_proc_bool(char *page, char **start, off_t off,
+                        int count, int *eof, void *data)
+{
+        int len;
+        bool value;
+        char *p = page;
+        bool *bool_entry = data;
+        if (!data)
+                return 0;
+        value = *bool_entry;
+        p += sprintf(p, "%u\n", value);
+        len = (p - page) - off;
+        *eof = (len <= count) ? 1 : 0;
+        *start = page + off;
+        return len;
+}
+static int get_active_counter_set(tag_t tag)
+{
+        int active_set = 0;
+        struct tag_counter_set *tcs;
+        MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
+                 " (uid=%u)\n",
+                 tag, get_uid_from_tag(tag));
+        /* For now we only handle UID tags for active sets */
+        tag = get_utag_from_tag(tag);
+        spin_lock_bh(&tag_counter_set_list_lock);
+        tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+        if (tcs)
+                active_set = tcs->active_set;
+        spin_unlock_bh(&tag_counter_set_list_lock);
+        return active_set;
+}
+/*
+ * Find the entry for tracking the specified interface.
+ * Caller must hold iface_stat_list_lock
+ */
+static struct iface_stat *get_iface_entry(const char *ifname)
+{
+        struct iface_stat *iface_entry;
+        /* Find the entry for tracking the specified tag within the interface */
+        if (ifname == NULL) {
+                pr_info("qtaguid: iface_stat: get() NULL device name\n");
+                return NULL;
+        }
+        /* Iterate over interfaces */
+        list_for_each_entry(iface_entry, &iface_stat_list, list) {
+                if (!strcmp(ifname, iface_entry->ifname))
+                        goto done;
+        }
+        iface_entry = NULL;
+done:
+        return iface_entry;
+}
+static int iface_stat_all_proc_read(char *page, char **num_items_returned,
+                                    off_t items_to_skip, int char_count,
+                                    int *eof, void *data)
+{
+        char *outp = page;
+        int item_index = 0;
+        int len;
+        struct iface_stat *iface_entry;
+        struct rtnl_link_stats64 dev_stats, *stats;
+        struct rtnl_link_stats64 no_dev_stats = {0};
+        if (unlikely(module_passive)) {
+                *eof = 1;
+                return 0;
+        }
+        CT_DEBUG("qtaguid:proc iface_stat_all "
+                 "page=%p *num_items_returned=%p off=%ld "
+                 "char_count=%d *eof=%d\n", page, *num_items_returned,
+                 items_to_skip, char_count, *eof);
+        if (*eof)
+                return 0;
+        /*
+         * This lock will prevent iface_stat_update() from changing active,
+         * and in turn prevent an interface from unregistering itself.
+         */
+        spin_lock_bh(&iface_stat_list_lock);
+        list_for_each_entry(iface_entry, &iface_stat_list, list) {
+                if (item_index++ < items_to_skip)
+                        continue;
+                if (iface_entry->active) {
+                        stats = dev_get_stats(iface_entry->net_dev,
+                                              &dev_stats);
+                } else {
+                        stats = &no_dev_stats;
+                }
+                len = snprintf(outp, char_count,
+                               "%s %d "
+                               "%llu %llu %llu %llu "
+                               "%llu %llu %llu %llu\n",
+                               iface_entry->ifname,
+                               iface_entry->active,
+                               iface_entry->totals[IFS_RX].bytes,
+                               iface_entry->totals[IFS_RX].packets,
+                               iface_entry->totals[IFS_TX].bytes,
+                               iface_entry->totals[IFS_TX].packets,
+                               stats->rx_bytes, stats->rx_packets,
+                               stats->tx_bytes, stats->tx_packets);
+                if (len >= char_count) {
+                        spin_unlock_bh(&iface_stat_list_lock);
+                        *outp = '\0';
+                        return outp - page;
+                }
+                outp += len;
+                char_count -= len;
+                (*num_items_returned)++;
+        }
+        spin_unlock_bh(&iface_stat_list_lock);
+        *eof = 1;
+        return outp - page;
+}
+static void iface_create_proc_worker(struct work_struct *work)
+{
+        struct proc_dir_entry *proc_entry;
+        struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
+                                                   iface_work);
+        struct iface_stat *new_iface  = isw->iface_entry;
+        /* iface_entries are not deleted, so safe to manipulate. */
+        proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
+        if (IS_ERR_OR_NULL(proc_entry)) {
+                pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
+                kfree(isw);
+                return;
+        }
+        new_iface->proc_ptr = proc_entry;
+        create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
+                        read_proc_u64, &new_iface->totals[IFS_TX].bytes);
+        create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
+                        read_proc_u64, &new_iface->totals[IFS_RX].bytes);
+        create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
+                        read_proc_u64, &new_iface->totals[IFS_TX].packets);
+        create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
+                        read_proc_u64, &new_iface->totals[IFS_RX].packets);
+        create_proc_read_entry("active", proc_iface_perms, proc_entry,
+                        read_proc_bool, &new_iface->active);
+        IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
+                 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
+        kfree(isw);
+}
+/*
+ * Will set the entry's active state, and
+ * update the net_dev accordingly also.
+ */
+static void _iface_stat_set_active(struct iface_stat *entry,
+                                   struct net_device *net_dev,
+                                   bool activate)
+{
+        if (activate) {
+                entry->net_dev = net_dev;
+                entry->active = true;
+                IF_DEBUG("qtaguid: %s(%s): "
+                         "enable tracking. rfcnt=%d\n", __func__,
+                         entry->ifname,
+                         percpu_read(*net_dev->pcpu_refcnt));
+        } else {
+                entry->active = false;
+                entry->net_dev = NULL;
+                IF_DEBUG("qtaguid: %s(%s): "
+                         "disable tracking. rfcnt=%d\n", __func__,
+                         entry->ifname,
+                         percpu_read(*net_dev->pcpu_refcnt));
+        }
+}
+/* Caller must hold iface_stat_list_lock */
+static struct iface_stat *iface_alloc(struct net_device *net_dev)
+{
+        struct iface_stat *new_iface;
+        struct iface_stat_work *isw;
+        new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
+        if (new_iface == NULL) {
+                pr_err("qtaguid: iface_stat: create(%s): "
+                       "iface_stat alloc failed\n", net_dev->name);
+                return NULL;
+        }
+        new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
+        if (new_iface->ifname == NULL) {
+                pr_err("qtaguid: iface_stat: create(%s): "
+                       "ifname alloc failed\n", net_dev->name);
+                kfree(new_iface);
+                return NULL;
+        }
+        spin_lock_init(&new_iface->tag_stat_list_lock);
+        new_iface->tag_stat_tree = RB_ROOT;
+        _iface_stat_set_active(new_iface, net_dev, true);
+        /*
+         * ipv6 notifier chains are atomic :(
+         * No create_proc_read_entry() for you!
+         */
+        isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
+        if (!isw) {
+                pr_err("qtaguid: iface_stat: create(%s): "
+                       "work alloc failed\n", new_iface->ifname);
+                _iface_stat_set_active(new_iface, net_dev, false);
+                kfree(new_iface->ifname);
+                kfree(new_iface);
+                return NULL;
+        }
+        isw->iface_entry = new_iface;
+        INIT_WORK(&isw->iface_work, iface_create_proc_worker);
+        schedule_work(&isw->iface_work);
+        list_add(&new_iface->list, &iface_stat_list);
+        return new_iface;
+}
+static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
+                                               struct iface_stat *iface)
+{
+        struct rtnl_link_stats64 dev_stats, *stats;
+        bool stats_rewound;
+        stats = dev_get_stats(net_dev, &dev_stats);
+        /* No empty packets */
+        stats_rewound =
+                (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
+                || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
+        IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
+                 "bytes rx/tx=%llu/%llu "
+                 "active=%d last_known=%d "
+                 "stats_rewound=%d\n", __func__,
+                 net_dev ? net_dev->name : "?",
+                 iface, net_dev,
+                 stats->rx_bytes, stats->tx_bytes,
+                 iface->active, iface->last_known_valid, stats_rewound);
+        if (iface->active && iface->last_known_valid && stats_rewound) {
+                pr_warn_once("qtaguid: iface_stat: %s(%s): "
+                             "iface reset its stats unexpectedly\n", __func__,
+                             net_dev->name);
+                iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
+                iface->totals[IFS_TX].packets +=
+                        iface->last_known[IFS_TX].packets;
+                iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
+                iface->totals[IFS_RX].packets +=
+                        iface->last_known[IFS_RX].packets;
+                iface->last_known_valid = false;
+                IF_DEBUG("qtaguid: %s(%s): iface=%p "
+                         "used last known bytes rx/tx=%llu/%llu\n", __func__,
+                         iface->ifname, iface, iface->last_known[IFS_RX].bytes,
+                         iface->last_known[IFS_TX].bytes);
+        }
+}
+/*
+ * Create a new entry for tracking the specified interface.
+ * Do nothing if the entry already exists.
+ * Called when an interface is configured with a valid IP address.
+ */
+static void iface_stat_create(struct net_device *net_dev,
+                              struct in_ifaddr *ifa)
+{
+        struct in_device *in_dev = NULL;
+        const char *ifname;
+        struct iface_stat *entry;
+        __be32 ipaddr = 0;
+        struct iface_stat *new_iface;
+        IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
+                 net_dev ? net_dev->name : "?",
+                 ifa, net_dev);
+        if (!net_dev) {
+                pr_err("qtaguid: iface_stat: create(): no net dev\n");
+                return;
+        }
+        ifname = net_dev->name;
+        if (!ifa) {
+                in_dev = in_dev_get(net_dev);
+                if (!in_dev) {
+                        pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
+                               ifname);
+                        return;
+                }
+                IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
+                         ifname, in_dev);
+                for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+                        IF_DEBUG("qtaguid: iface_stat: create(%s): "
+                                 "ifa=%p ifa_label=%s\n",
+                                 ifname, ifa,
+                                 ifa->ifa_label ? ifa->ifa_label : "(null)");
+                        if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
+                                break;
+                }
+        }
+        if (!ifa) {
+                IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
+                         ifname);
+                goto done_put;
+        }
+        ipaddr = ifa->ifa_local;
+        spin_lock_bh(&iface_stat_list_lock);
+        entry = get_iface_entry(ifname);
+        if (entry != NULL) {
+                bool activate = !ipv4_is_loopback(ipaddr);
+                IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
+                         ifname, entry);
+                iface_check_stats_reset_and_adjust(net_dev, entry);
+                _iface_stat_set_active(entry, net_dev, activate);
+                IF_DEBUG("qtaguid: %s(%s): "
+                         "tracking now %d on ip=%pI4\n", __func__,
+                         entry->ifname, activate, &ipaddr);
+                goto done_unlock_put;
+        } else if (ipv4_is_loopback(ipaddr)) {
+                IF_DEBUG("qtaguid: iface_stat: create(%s): "
+                         "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
+                goto done_unlock_put;
+        }
+        new_iface = iface_alloc(net_dev);
+        IF_DEBUG("qtaguid: iface_stat: create(%s): done "
+                 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
+done_unlock_put:
+        spin_unlock_bh(&iface_stat_list_lock);
+done_put:
+        if (in_dev)
+                in_dev_put(in_dev);
+}
+static void iface_stat_create_ipv6(struct net_device *net_dev,
+                                   struct inet6_ifaddr *ifa)
+{
+        struct in_device *in_dev;
+        const char *ifname;
+        struct iface_stat *entry;
+        struct iface_stat *new_iface;
+        int addr_type;
+        IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
+                 ifa, net_dev, net_dev ? net_dev->name : "");
+        if (!net_dev) {
+                pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
+                return;
+        }
+        ifname = net_dev->name;
+        in_dev = in_dev_get(net_dev);
+        if (!in_dev) {
+                pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
+                       ifname);
+                return;
+        }
+        IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
+                 ifname, in_dev);
+        if (!ifa) {
+                IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
+                         ifname);
+                goto done_put;
+        }
+        addr_type = ipv6_addr_type(&ifa->addr);
+        spin_lock_bh(&iface_stat_list_lock);
+        entry = get_iface_entry(ifname);
+        if (entry != NULL) {
+                bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
+                IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+                         ifname, entry);
+                iface_check_stats_reset_and_adjust(net_dev, entry);
+                _iface_stat_set_active(entry, net_dev, activate);
+                IF_DEBUG("qtaguid: %s(%s): "
+                         "tracking now %d on ip=%pI6c\n", __func__,
+                         entry->ifname, activate, &ifa->addr);
+                goto done_unlock_put;
+        } else if (addr_type & IPV6_ADDR_LOOPBACK) {
+                IF_DEBUG("qtaguid: %s(%s): "
+                         "ignore loopback dev. ip=%pI6c\n", __func__,
+                         ifname, &ifa->addr);
+                goto done_unlock_put;
+        }
+        new_iface = iface_alloc(net_dev);
+        IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
+                 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
+done_unlock_put:
+        spin_unlock_bh(&iface_stat_list_lock);
+done_put:
+        in_dev_put(in_dev);
+}
+static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
+{
+        MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
+        return sock_tag_tree_search(&sock_tag_tree, sk);
+}
+static struct sock_tag *get_sock_stat(const struct sock *sk)
+{
+        struct sock_tag *sock_tag_entry;
+        MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
+        if (!sk)
+                return NULL;
+        spin_lock_bh(&sock_tag_list_lock);
+        sock_tag_entry = get_sock_stat_nl(sk);
+        spin_unlock_bh(&sock_tag_list_lock);
+        return sock_tag_entry;
+}
+static void
+data_counters_update(struct data_counters *dc, int set,
+                     enum ifs_tx_rx direction, int proto, int bytes)
+{
+        switch (proto) {
+        case IPPROTO_TCP:
+                dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
+                break;
+        case IPPROTO_UDP:
+                dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
+                break;
+        case IPPROTO_IP:
+        default:
+                dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
+                                    1);
+                break;
+        }
+}
+/*
+ * Update stats for the specified interface. Do nothing if the entry
+ * does not exist (when a device was never configured with an IP address).
+ * Called when an device is being unregistered.
+ */
+static void iface_stat_update(struct net_device *net_dev, bool stash_only)
+{
+        struct rtnl_link_stats64 dev_stats, *stats;
+        struct iface_stat *entry;
+        stats = dev_get_stats(net_dev, &dev_stats);
+        spin_lock_bh(&iface_stat_list_lock);
+        entry = get_iface_entry(net_dev->name);
+        if (entry == NULL) {
+                IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
+                         net_dev->name);
+                spin_unlock_bh(&iface_stat_list_lock);
+                return;
+        }
+        IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+                 net_dev->name, entry);
+        if (!entry->active) {
+                IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
+                         net_dev->name);
+                spin_unlock_bh(&iface_stat_list_lock);
+                return;
+        }
+        if (stash_only) {
+                entry->last_known[IFS_TX].bytes = stats->tx_bytes;
+                entry->last_known[IFS_TX].packets = stats->tx_packets;
+                entry->last_known[IFS_RX].bytes = stats->rx_bytes;
+                entry->last_known[IFS_RX].packets = stats->rx_packets;
+                entry->last_known_valid = true;
+                IF_DEBUG("qtaguid: %s(%s): "
+                         "dev stats stashed rx/tx=%llu/%llu\n", __func__,
+                         net_dev->name, stats->rx_bytes, stats->tx_bytes);
+                spin_unlock_bh(&iface_stat_list_lock);
+                return;
+        }
+        entry->totals[IFS_TX].bytes += stats->tx_bytes;
+        entry->totals[IFS_TX].packets += stats->tx_packets;
+        entry->totals[IFS_RX].bytes += stats->rx_bytes;
+        entry->totals[IFS_RX].packets += stats->rx_packets;
+        /* We don't need the last_known[] anymore */
+        entry->last_known_valid = false;
+        _iface_stat_set_active(entry, net_dev, false);
+        IF_DEBUG("qtaguid: %s(%s): "
+                 "disable tracking. rx/tx=%llu/%llu\n", __func__,
+                 net_dev->name, stats->rx_bytes, stats->tx_bytes);
+        spin_unlock_bh(&iface_stat_list_lock);
+}
+static void tag_stat_update(struct tag_stat *tag_entry,
+                        enum ifs_tx_rx direction, int proto, int bytes)
+{
+        int active_set;
+        active_set = get_active_counter_set(tag_entry->tn.tag);
+        MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
+                 "dir=%d proto=%d bytes=%d)\n",
+                 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
+                 active_set, direction, proto, bytes);
+        data_counters_update(&tag_entry->counters, active_set, direction,
+                             proto, bytes);
+        if (tag_entry->parent_counters)
+                data_counters_update(tag_entry->parent_counters, active_set,
+                                     direction, proto, bytes);
+}
+/*
+ * Create a new entry for tracking the specified {acct_tag,uid_tag} within
+ * the interface.
+ * iface_entry->tag_stat_list_lock should be held.
+ */
+static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
+                                           tag_t tag)
+{
+        struct tag_stat *new_tag_stat_entry = NULL;
+        IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
+                 " (uid=%u)\n", __func__,
+                 iface_entry, tag, get_uid_from_tag(tag));
+        new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
+        if (!new_tag_stat_entry) {
+                pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
+                goto done;
+        }
+        new_tag_stat_entry->tn.tag = tag;
+        tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
+done:
+        return new_tag_stat_entry;
+}
+static void if_tag_stat_update(const char *ifname, uid_t uid,
+                               const struct sock *sk, enum ifs_tx_rx direction,
+                               int proto, int bytes)
+{
+        struct tag_stat *tag_stat_entry;
+        tag_t tag, acct_tag;
+        tag_t uid_tag;
+        struct data_counters *uid_tag_counters;
+        struct sock_tag *sock_tag_entry;
+        struct iface_stat *iface_entry;
+        struct tag_stat *new_tag_stat;
+        MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
+                "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
+                 ifname, uid, sk, direction, proto, bytes);
+        iface_entry = get_iface_entry(ifname);
+        if (!iface_entry) {
+                pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
+                       ifname);
+                return;
+        }
+        /* It is ok to process data when an iface_entry is inactive */
+        MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
+                 ifname, iface_entry);
+        /*
+         * Look for a tagged sock.
+         * It will have an acct_uid.
+         */
+        sock_tag_entry = get_sock_stat(sk);
+        if (sock_tag_entry) {
+                tag = sock_tag_entry->tag;
+                acct_tag = get_atag_from_tag(tag);
+                uid_tag = get_utag_from_tag(tag);
+        } else {
+                acct_tag = make_atag_from_value(0);
+                tag = combine_atag_with_uid(acct_tag, uid);
+                uid_tag = make_tag_from_uid(uid);
+        }
+        MT_DEBUG("qtaguid: iface_stat: stat_update(): "
+                 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
+                 tag, get_uid_from_tag(tag), iface_entry);
+        /* Loop over tag list under this interface for {acct_tag,uid_tag} */
+        spin_lock_bh(&iface_entry->tag_stat_list_lock);
+        tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
+                                              tag);
+        if (tag_stat_entry) {
+                /*
+                 * Updating the {acct_tag, uid_tag} entry handles both stats:
+                 * {0, uid_tag} will also get updated.
+                 */
+                tag_stat_update(tag_stat_entry, direction, proto, bytes);
+                spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+                return;
+        }
+        /* Loop over tag list under this interface for {0,uid_tag} */
+        tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
+                                              uid_tag);
+        if (!tag_stat_entry) {
+                /* Here: the base uid_tag did not exist */
+                /*
+                 * No parent counters. So
+                 *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
+                 */
+                new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
+                uid_tag_counters = &new_tag_stat->counters;
+        } else {
+                uid_tag_counters = &tag_stat_entry->counters;
+        }
+        if (acct_tag) {
+                new_tag_stat = create_if_tag_stat(iface_entry, tag);
+                new_tag_stat->parent_counters = uid_tag_counters;
+        }
+        tag_stat_update(new_tag_stat, direction, proto, bytes);
+        spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+}
+static int iface_netdev_event_handler(struct notifier_block *nb,
+                                      unsigned long event, void *ptr) {
+        struct net_device *dev = ptr;
+        if (unlikely(module_passive))
+                return NOTIFY_DONE;
+        IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
+                 "ev=0x%lx/%s netdev=%p->name=%s\n",
+                 event, netdev_evt_str(event), dev, dev ? dev->name : "");
+        switch (event) {
+        case NETDEV_UP:
+                iface_stat_create(dev, NULL);
+                atomic64_inc(&qtu_events.iface_events);
+                break;
+        case NETDEV_DOWN:
+        case NETDEV_UNREGISTER:
+                iface_stat_update(dev, event == NETDEV_DOWN);
+                atomic64_inc(&qtu_events.iface_events);
+                break;
+        }
+        return NOTIFY_DONE;
+}
+static int iface_inet6addr_event_handler(struct notifier_block *nb,
+                                         unsigned long event, void *ptr)
+{
+        struct inet6_ifaddr *ifa = ptr;
+        struct net_device *dev;
+        if (unlikely(module_passive))
+                return NOTIFY_DONE;
+        IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
+                 "ev=0x%lx/%s ifa=%p\n",
+                 event, netdev_evt_str(event), ifa);
+        switch (event) {
+        case NETDEV_UP:
+                BUG_ON(!ifa || !ifa->idev);
+                dev = (struct net_device *)ifa->idev->dev;
+                iface_stat_create_ipv6(dev, ifa);
+                atomic64_inc(&qtu_events.iface_events);
+                break;
+        case NETDEV_DOWN:
+        case NETDEV_UNREGISTER:
+                BUG_ON(!ifa || !ifa->idev);
+                dev = (struct net_device *)ifa->idev->dev;
+                iface_stat_update(dev, event == NETDEV_DOWN);
+                atomic64_inc(&qtu_events.iface_events);
+                break;
+        }
+        return NOTIFY_DONE;
+}
+static int iface_inetaddr_event_handler(struct notifier_block *nb,
+                                        unsigned long event, void *ptr)
+{
+        struct in_ifaddr *ifa = ptr;
+        struct net_device *dev;
+        if (unlikely(module_passive))
+                return NOTIFY_DONE;
+        IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
+                 "ev=0x%lx/%s ifa=%p\n",
+                 event, netdev_evt_str(event), ifa);
+        switch (event) {
+        case NETDEV_UP:
+                BUG_ON(!ifa || !ifa->ifa_dev);
+                dev = ifa->ifa_dev->dev;
+                iface_stat_create(dev, ifa);
+                atomic64_inc(&qtu_events.iface_events);
+                break;
+        case NETDEV_DOWN:
+        case NETDEV_UNREGISTER:
+                BUG_ON(!ifa || !ifa->ifa_dev);
+                dev = ifa->ifa_dev->dev;
+                iface_stat_update(dev, event == NETDEV_DOWN);
+                atomic64_inc(&qtu_events.iface_events);
+                break;
+        }
+        return NOTIFY_DONE;
+}
+static struct notifier_block iface_netdev_notifier_blk = {
+        .notifier_call = iface_netdev_event_handler,
+};
+static struct notifier_block iface_inetaddr_notifier_blk = {
+        .notifier_call = iface_inetaddr_event_handler,
+};
+static struct notifier_block iface_inet6addr_notifier_blk = {
+        .notifier_call = iface_inet6addr_event_handler,
+};
+static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
+{
+        int err;
+        iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
+        if (!iface_stat_procdir) {
+                pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
+                err = -1;
+                goto err;
+        }
+        iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
+                                                    proc_iface_perms,
+                                                    parent_procdir);
+        if (!iface_stat_all_procfile) {
+                pr_err("qtaguid: iface_stat: init "
+                       " failed to create stat_all proc entry\n");
+                err = -1;
+                goto err_zap_entry;
+        }
+        iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
+        err = register_netdevice_notifier(&iface_netdev_notifier_blk);
+        if (err) {
+                pr_err("qtaguid: iface_stat: init "
+                       "failed to register dev event handler\n");
+                goto err_zap_all_stats_entry;
+        }
+        err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
+        if (err) {
+                pr_err("qtaguid: iface_stat: init "
+                       "failed to register ipv4 dev event handler\n");
+                goto err_unreg_nd;
+        }
+        err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
+        if (err) {
+                pr_err("qtaguid: iface_stat: init "
+                       "failed to register ipv6 dev event handler\n");
+                goto err_unreg_ip4_addr;
+        }
+        return 0;
+err_unreg_ip4_addr:
+        unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
+err_unreg_nd:
+        unregister_netdevice_notifier(&iface_netdev_notifier_blk);
+err_zap_all_stats_entry:
+        remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
+err_zap_entry:
+        remove_proc_entry(iface_stat_procdirname, parent_procdir);
+err:
+        return err;
+}
+static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
+                                    struct xt_action_param *par)
+{
+        struct sock *sk;
+        unsigned int hook_mask = (1 << par->hooknum);
+        MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
+                 par->hooknum, par->family);
+        /*
+         * Let's not abuse the the xt_socket_get*_sk(), or else it will
+         * return garbage SKs.
+         */
+        if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
+                return NULL;
+        switch (par->family) {
+        case NFPROTO_IPV6:
+                sk = xt_socket_get6_sk(skb, par);
+                break;
+        case NFPROTO_IPV4:
+                sk = xt_socket_get4_sk(skb, par);
+                break;
+        default:
+                return NULL;
+        }
+        /*
+         * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
+         * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
+         * Not fixed in 3.0-r3 :(
+         */
+        if (sk) {
+                MT_DEBUG("qtaguid: %p->sk_proto=%u "
+                         "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
+                if (sk->sk_state  == TCP_TIME_WAIT) {
+                        xt_socket_put_sk(sk);
+                        sk = NULL;
+                }
+        }
+        return sk;
+}
+static void account_for_uid(const struct sk_buff *skb,
+                            const struct sock *alternate_sk, uid_t uid,
+                            struct xt_action_param *par)
+{
+        const struct net_device *el_dev;
+        if (!skb->dev) {
+                MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
+                el_dev = par->in ? : par->out;
+        } else {
+                const struct net_device *other_dev;
+                el_dev = skb->dev;
+                other_dev = par->in ? : par->out;
+                if (el_dev != other_dev) {
+                        MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
+                                "par->(in/out)=%p %s\n",
+                                par->hooknum, el_dev, el_dev->name, other_dev,
+                                other_dev->name);
+                }
+        }
+        if (unlikely(!el_dev)) {
+                pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
+        } else if (unlikely(!el_dev->name)) {
+                pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
+        } else {
+                MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
+                         par->hooknum,
+                         el_dev->name,
+                         el_dev->type);
+                if_tag_stat_update(el_dev->name, uid,
+                                skb->sk ? skb->sk : alternate_sk,
+                                par->in ? IFS_RX : IFS_TX,
+                                ip_hdr(skb)->protocol, skb->len);
+        }
+}
+static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+        const struct xt_qtaguid_match_info *info = par->matchinfo;
+        const struct file *filp;
+        bool got_sock = false;
+        struct sock *sk;
+        uid_t sock_uid;
+        bool res;
+        if (unlikely(module_passive))
+                return (info->match ^ info->invert) == 0;
+        MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
+                 par->hooknum, skb, par->in, par->out, par->family);
+        atomic64_inc(&qtu_events.match_calls);
+        if (skb == NULL) {
+                res = (info->match ^ info->invert) == 0;
+                goto ret_res;
+        }
+        sk = skb->sk;
+        if (sk == NULL) {
+                /*
+                 * A missing sk->sk_socket happens when packets are in-flight
+                 * and the matching socket is already closed and gone.
+                 */
+                sk = qtaguid_find_sk(skb, par);
+                /*
+                 * If we got the socket from the find_sk(), we will need to put
+                 * it back, as nf_tproxy_get_sock_v4() got it.
+                 */
+                got_sock = sk;
+                if (sk)
+                        atomic64_inc(&qtu_events.match_found_sk_in_ct);
+                else
+                        atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
+        } else {
+                atomic64_inc(&qtu_events.match_found_sk);
+        }
+        MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
+                par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
+        if (sk != NULL) {
+                MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
+                        par->hooknum, sk, sk->sk_socket,
+                        sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
+                filp = sk->sk_socket ? sk->sk_socket->file : NULL;
+                MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
+                        par->hooknum, filp ? filp->f_cred->fsuid : -1);
+        }
+        if (sk == NULL || sk->sk_socket == NULL) {
+                /*
+                 * Here, the qtaguid_find_sk() using connection tracking
+                 * couldn't find the owner, so for now we just count them
+                 * against the system.
+                 */
+                /*
+                 * TODO: unhack how to force just accounting.
+                 * For now we only do iface stats when the uid-owner is not
+                 * requested.
+                 */
+                if (!(info->match & XT_QTAGUID_UID))
+                        account_for_uid(skb, sk, 0, par);
+                MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
+                        par->hooknum,
+                        sk ? sk->sk_socket : NULL);
+                res = (info->match ^ info->invert) == 0;
+                atomic64_inc(&qtu_events.match_no_sk);
+                goto put_sock_ret_res;
+        } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
+                res = false;
+                goto put_sock_ret_res;
+        }
+        filp = sk->sk_socket->file;
+        if (filp == NULL) {
+                MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
+                account_for_uid(skb, sk, 0, par);
+                res = ((info->match ^ info->invert) &
+                        (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
+                atomic64_inc(&qtu_events.match_no_sk_file);
+                goto put_sock_ret_res;
+        }
+        sock_uid = filp->f_cred->fsuid;
+        /*
+         * TODO: unhack how to force just accounting.
+         * For now we only do iface stats when the uid-owner is not requested
+         */
+        if (!(info->match & XT_QTAGUID_UID))
+                account_for_uid(skb, sk, sock_uid, par);
+        /*
+         * The following two tests fail the match when:
+         *    id not in range AND no inverted condition requested
+         * or id     in range AND    inverted condition requested
+         * Thus (!a && b) || (a && !b) == a ^ b
+         */
+        if (info->match & XT_QTAGUID_UID)
+                if ((filp->f_cred->fsuid >= info->uid_min &&
+                     filp->f_cred->fsuid <= info->uid_max) ^
+                    !(info->invert & XT_QTAGUID_UID)) {
+                        MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
+                                 par->hooknum);
+                        res = false;
+                        goto put_sock_ret_res;
+                }
+        if (info->match & XT_QTAGUID_GID)
+                if ((filp->f_cred->fsgid >= info->gid_min &&
+                                filp->f_cred->fsgid <= info->gid_max) ^
+                        !(info->invert & XT_QTAGUID_GID)) {
+                        MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
+                                par->hooknum);
+                        res = false;
+                        goto put_sock_ret_res;
+                }
+        MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
+        res = true;
+put_sock_ret_res:
+        if (got_sock)
+                xt_socket_put_sk(sk);
+ret_res:
+        MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
+        return res;
+}
+#ifdef DDEBUG
+/* This function is not in xt_qtaguid_print.c because of locks visibility */
+static void prdebug_full_state(int indent_level, const char *fmt, ...)
+{
+        va_list args;
+        char *fmt_buff;
+        char *buff;
+        if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+                return;
+        fmt_buff = kasprintf(GFP_ATOMIC,
+                             "qtaguid: %s(): %s {\n", __func__, fmt);
+        BUG_ON(!fmt_buff);
+        va_start(args, fmt);
+        buff = kvasprintf(GFP_ATOMIC,
+                          fmt_buff, args);
+        BUG_ON(!buff);
+        pr_debug("%s", buff);
+        kfree(fmt_buff);
+        kfree(buff);
+        va_end(args);
+        spin_lock_bh(&sock_tag_list_lock);
+        prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
+        spin_unlock_bh(&sock_tag_list_lock);
+        spin_lock_bh(&sock_tag_list_lock);
+        spin_lock_bh(&uid_tag_data_tree_lock);
+        prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
+        prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
+        spin_unlock_bh(&uid_tag_data_tree_lock);
+        spin_unlock_bh(&sock_tag_list_lock);
+        spin_lock_bh(&iface_stat_list_lock);
+        prdebug_iface_stat_list(indent_level, &iface_stat_list);
+        spin_unlock_bh(&iface_stat_list_lock);
+        pr_debug("qtaguid: %s(): }\n", __func__);
+}
+#else
+static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
+#endif
+/*
+ * Procfs reader to get all active socket tags using style "1)" as described in
+ * fs/proc/generic.c
+ */
+static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
+                                  off_t items_to_skip, int char_count, int *eof,
+                                  void *data)
+{
+        char *outp = page;
+        int len;
+        uid_t uid;
+        struct rb_node *node;
+        struct sock_tag *sock_tag_entry;
+        int item_index = 0;
+        int indent_level = 0;
+        long f_count;
+        if (unlikely(module_passive)) {
+                *eof = 1;
+                return 0;
+        }
+        if (*eof)
+                return 0;
+        CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
+                page, items_to_skip, char_count, *eof);
+        spin_lock_bh(&sock_tag_list_lock);
+        for (node = rb_first(&sock_tag_tree);
+             node;
+             node = rb_next(node)) {
+                if (item_index++ < items_to_skip)
+                        continue;
+                sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+                uid = get_uid_from_tag(sock_tag_entry->tag);
+                CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
+                         "pid=%u\n",
+                         sock_tag_entry->sk,
+                         sock_tag_entry->tag,
+                         uid,
+                         sock_tag_entry->pid
+                        );
+                f_count = atomic_long_read(
+                        &sock_tag_entry->socket->file->f_count);
+                len = snprintf(outp, char_count,
+                               "sock=%p tag=0x%llx (uid=%u) pid=%u "
+                               "f_count=%lu\n",
+                               sock_tag_entry->sk,
+                               sock_tag_entry->tag, uid,
+                               sock_tag_entry->pid, f_count);
+                if (len >= char_count) {
+                        spin_unlock_bh(&sock_tag_list_lock);
+                        *outp = '\0';
+                        return outp - page;
+                }
+                outp += len;
+                char_count -= len;
+                (*num_items_returned)++;
+        }
+        spin_unlock_bh(&sock_tag_list_lock);
+        if (item_index++ >= items_to_skip) {
+                len = snprintf(outp, char_count,
+                               "events: sockets_tagged=%llu "
+                               "sockets_untagged=%llu "
+                               "counter_set_changes=%llu "
+                               "delete_cmds=%llu "
+                               "iface_events=%llu "
+                               "match_calls=%llu "
+                               "match_found_sk=%llu "
+                               "match_found_sk_in_ct=%llu "
+                               "match_found_no_sk_in_ct=%llu "
+                               "match_no_sk=%llu "
+                               "match_no_sk_file=%llu\n",
+                               atomic64_read(&qtu_events.sockets_tagged),
+                               atomic64_read(&qtu_events.sockets_untagged),
+                               atomic64_read(&qtu_events.counter_set_changes),
+                               atomic64_read(&qtu_events.delete_cmds),
+                               atomic64_read(&qtu_events.iface_events),
+                               atomic64_read(&qtu_events.match_calls),
+                               atomic64_read(&qtu_events.match_found_sk),
+                               atomic64_read(&qtu_events.match_found_sk_in_ct),
+                               atomic64_read(
+                                       &qtu_events.match_found_no_sk_in_ct),
+                               atomic64_read(&qtu_events.match_no_sk),
+                               atomic64_read(&qtu_events.match_no_sk_file));
+                if (len >= char_count) {
+                        *outp = '\0';
+                        return outp - page;
+                }
+                outp += len;
+                char_count -= len;
+                (*num_items_returned)++;
+        }
+        /* Count the following as part of the last item_index */
+        if (item_index > items_to_skip) {
+                prdebug_full_state(indent_level, "proc ctrl");
+        }
+        *eof = 1;
+        return outp - page;
+}
+/*
+ * Delete socket tags, and stat tags associated with a given
+ * accouting tag and uid.
+ */
+static int ctrl_cmd_delete(const char *input)
+{
+        char cmd;
+        uid_t uid;
+        uid_t entry_uid;
+        tag_t acct_tag;
+        tag_t tag;
+        int res, argc;
+        struct iface_stat *iface_entry;
+        struct rb_node *node;
+        struct sock_tag *st_entry;
+        struct rb_root st_to_free_tree = RB_ROOT;
+        struct tag_stat *ts_entry;
+        struct tag_counter_set *tcs_entry;
+        struct tag_ref *tr_entry;
+        struct uid_tag_data *utd_entry;
+        argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
+        CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
+                 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
+                 acct_tag, uid);
+        if (argc < 2) {
+                res = -EINVAL;
+                goto err;
+        }
+        if (!valid_atag(acct_tag)) {
+                pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
+                res = -EINVAL;
+                goto err;
+        }
+        if (argc < 3) {
+                uid = current_fsuid();
+        } else if (!can_impersonate_uid(uid)) {
+                pr_info("qtaguid: ctrl_delete(%s): "
+                        "insufficient priv from pid=%u tgid=%u uid=%u\n",
+                        input, current->pid, current->tgid, current_fsuid());
+                res = -EPERM;
+                goto err;
+        }
+        tag = combine_atag_with_uid(acct_tag, uid);
+        CT_DEBUG("qtaguid: ctrl_delete(%s): "
+                 "looking for tag=0x%llx (uid=%u)\n",
+                 input, tag, uid);
+        /* Delete socket tags */
+        spin_lock_bh(&sock_tag_list_lock);
+        node = rb_first(&sock_tag_tree);
+        while (node) {
+                st_entry = rb_entry(node, struct sock_tag, sock_node);
+                entry_uid = get_uid_from_tag(st_entry->tag);
+                node = rb_next(node);
+                if (entry_uid != uid)
+                        continue;
+                CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
+                         input, st_entry->tag, entry_uid);
+                if (!acct_tag || st_entry->tag == tag) {
+                        rb_erase(&st_entry->sock_node, &sock_tag_tree);
+                        /* Can't sockfd_put() within spinlock, do it later. */
+                        sock_tag_tree_insert(st_entry, &st_to_free_tree);
+                        tr_entry = lookup_tag_ref(st_entry->tag, NULL);
+                        BUG_ON(tr_entry->num_sock_tags <= 0);
+                        tr_entry->num_sock_tags--;
+                        /*
+                         * TODO: remove if, and start failing.
+                         * This is a hack to work around the fact that in some
+                         * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
+                         * and are trying to work around apps
+                         * that didn't open the /dev/xt_qtaguid.
+                         */
+                        if (st_entry->list.next && st_entry->list.prev)
+                                list_del(&st_entry->list);
+                }
+        }
+        spin_unlock_bh(&sock_tag_list_lock);
+        sock_tag_tree_erase(&st_to_free_tree);
+        /* Delete tag counter-sets */
+        spin_lock_bh(&tag_counter_set_list_lock);
+        /* Counter sets are only on the uid tag, not full tag */
+        tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+        if (tcs_entry) {
+                CT_DEBUG("qtaguid: ctrl_delete(%s): "
+                         "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
+                         input,
+                         tcs_entry->tn.tag,
+                         get_uid_from_tag(tcs_entry->tn.tag),
+                         tcs_entry->active_set);
+                rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
+                kfree(tcs_entry);
+        }
+        spin_unlock_bh(&tag_counter_set_list_lock);
+        /*
+         * If acct_tag is 0, then all entries belonging to uid are
+         * erased.
+         */
+        spin_lock_bh(&iface_stat_list_lock);
+        list_for_each_entry(iface_entry, &iface_stat_list, list) {
+                spin_lock_bh(&iface_entry->tag_stat_list_lock);
+                node = rb_first(&iface_entry->tag_stat_tree);
+                while (node) {
+                        ts_entry = rb_entry(node, struct tag_stat, tn.node);
+                        entry_uid = get_uid_from_tag(ts_entry->tn.tag);
+                        node = rb_next(node);
+                        CT_DEBUG("qtaguid: ctrl_delete(%s): "
+                                 "ts tag=0x%llx (uid=%u)\n",
+                                 input, ts_entry->tn.tag, entry_uid);
+                        if (entry_uid != uid)
+                                continue;
+                        if (!acct_tag || ts_entry->tn.tag == tag) {
+                                CT_DEBUG("qtaguid: ctrl_delete(%s): "
+                                         "erase ts: %s 0x%llx %u\n",
+                                         input, iface_entry->ifname,
+                                         get_atag_from_tag(ts_entry->tn.tag),
+                                         entry_uid);
+                                rb_erase(&ts_entry->tn.node,
+                                         &iface_entry->tag_stat_tree);
+                                kfree(ts_entry);
+                        }
+                }
+                spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+        }
+        spin_unlock_bh(&iface_stat_list_lock);
+        /* Cleanup the uid_tag_data */
+        spin_lock_bh(&uid_tag_data_tree_lock);
+        node = rb_first(&uid_tag_data_tree);
+        while (node) {
+                utd_entry = rb_entry(node, struct uid_tag_data, node);
+                entry_uid = utd_entry->uid;
+                node = rb_next(node);
+                CT_DEBUG("qtaguid: ctrl_delete(%s): "
+                         "utd uid=%u\n",
+                         input, entry_uid);
+                if (entry_uid != uid)
+                        continue;
+                /*
+                 * Go over the tag_refs, and those that don't have
+                 * sock_tags using them are freed.
+                 */
+                put_tag_ref_tree(tag, utd_entry);
+                put_utd_entry(utd_entry);
+        }
+        spin_unlock_bh(&uid_tag_data_tree_lock);
+        atomic64_inc(&qtu_events.delete_cmds);
+        res = 0;
+err:
+        return res;
+}
+static int ctrl_cmd_counter_set(const char *input)
+{
+        char cmd;
+        uid_t uid = 0;
+        tag_t tag;
+        int res, argc;
+        struct tag_counter_set *tcs;
+        int counter_set;
+        argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
+        CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
+                 "set=%d uid=%u\n", input, argc, cmd,
+                 counter_set, uid);
+        if (argc != 3) {
+                res = -EINVAL;
+                goto err;
+        }
+        if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
+                pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
+                        input);
+                res = -EINVAL;
+                goto err;
+        }
+        if (!can_manipulate_uids()) {
+                pr_info("qtaguid: ctrl_counterset(%s): "
+                        "insufficient priv from pid=%u tgid=%u uid=%u\n",
+                        input, current->pid, current->tgid, current_fsuid());
+                res = -EPERM;
+                goto err;
+        }
+        tag = make_tag_from_uid(uid);
+        spin_lock_bh(&tag_counter_set_list_lock);
+        tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+        if (!tcs) {
+                tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
+                if (!tcs) {
+                        spin_unlock_bh(&tag_counter_set_list_lock);
+                        pr_err("qtaguid: ctrl_counterset(%s): "
+                               "failed to alloc counter set\n",
+                               input);
+                        res = -ENOMEM;
+                        goto err;
+                }
+                tcs->tn.tag = tag;
+                tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
+                CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
+                         "(uid=%u) set=%d\n",
+                         input, tag, get_uid_from_tag(tag), counter_set);
+        }
+        tcs->active_set = counter_set;
+        spin_unlock_bh(&tag_counter_set_list_lock);
+        atomic64_inc(&qtu_events.counter_set_changes);
+        res = 0;
+err:
+        return res;
+}
+static int ctrl_cmd_tag(const char *input)
+{
+        char cmd;
+        int sock_fd = 0;
+        uid_t uid = 0;
+        tag_t acct_tag = make_atag_from_value(0);
+        tag_t full_tag;
+        struct socket *el_socket;
+        int res, argc;
+        struct sock_tag *sock_tag_entry;
+        struct tag_ref *tag_ref_entry;
+        struct uid_tag_data *uid_tag_data_entry;
+        struct proc_qtu_data *pqd_entry;
+        /* Unassigned args will get defaulted later. */
+        argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
+        CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
+                 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
+                 acct_tag, uid);
+        if (argc < 2) {
+                res = -EINVAL;
+                goto err;
+        }
+        el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
+        if (!el_socket) {
+                pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
+                        " sock_fd=%d err=%d\n", input, sock_fd, res);
+                goto err;
+        }
+        CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
+                 input, atomic_long_read(&el_socket->file->f_count),
+                 el_socket->sk);
+        if (argc < 3) {
+                acct_tag = make_atag_from_value(0);
+        } else if (!valid_atag(acct_tag)) {
+                pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
+                res = -EINVAL;
+                goto err_put;
+        }
+        CT_DEBUG("qtaguid: ctrl_tag(%s): "
+                 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
+                 "in_group=%d in_egroup=%d\n",
+                 input, current->pid, current->tgid, current_uid(),
+                 current_euid(), current_fsuid(),
+                 in_group_p(proc_ctrl_write_gid),
+                 in_egroup_p(proc_ctrl_write_gid));
+        if (argc < 4) {
+                uid = current_fsuid();
+        } else if (!can_impersonate_uid(uid)) {
+                pr_info("qtaguid: ctrl_tag(%s): "
+                        "insufficient priv from pid=%u tgid=%u uid=%u\n",
+                        input, current->pid, current->tgid, current_fsuid());
+                res = -EPERM;
+                goto err_put;
+        }
+        full_tag = combine_atag_with_uid(acct_tag, uid);
+        spin_lock_bh(&sock_tag_list_lock);
+        sock_tag_entry = get_sock_stat_nl(el_socket->sk);
+        tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
+        if (IS_ERR(tag_ref_entry)) {
+                res = PTR_ERR(tag_ref_entry);
+                spin_unlock_bh(&sock_tag_list_lock);
+                goto err_put;
+        }
+        tag_ref_entry->num_sock_tags++;
+        if (sock_tag_entry) {
+                struct tag_ref *prev_tag_ref_entry;
+                CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
+                         "st@%p ...->f_count=%ld\n",
+                         input, el_socket->sk, sock_tag_entry,
+                         atomic_long_read(&el_socket->file->f_count));
+                /*
+                 * This is a re-tagging, so release the sock_fd that was
+                 * locked at the time of the 1st tagging.
+                 * There is still the ref from this call's sockfd_lookup() so
+                 * it can be done within the spinlock.
+                 */
+                sockfd_put(sock_tag_entry->socket);
+                prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
+                                                    &uid_tag_data_entry);
+                BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
+                BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
+                prev_tag_ref_entry->num_sock_tags--;
+                sock_tag_entry->tag = full_tag;
+        } else {
+                CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
+                         input, el_socket->sk);
+                sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
+                                         GFP_ATOMIC);
+                if (!sock_tag_entry) {
+                        pr_err("qtaguid: ctrl_tag(%s): "
+                               "socket tag alloc failed\n",
+                               input);
+                        spin_unlock_bh(&sock_tag_list_lock);
+                        res = -ENOMEM;
+                        goto err_tag_unref_put;
+                }
+                sock_tag_entry->sk = el_socket->sk;
+                sock_tag_entry->socket = el_socket;
+                sock_tag_entry->pid = current->tgid;
+                sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
+                                                            uid);
+                spin_lock_bh(&uid_tag_data_tree_lock);
+                pqd_entry = proc_qtu_data_tree_search(
+                        &proc_qtu_data_tree, current->tgid);
+                /*
+                 * TODO: remove if, and start failing.
+                 * At first, we want to catch user-space code that is not
+                 * opening the /dev/xt_qtaguid.
+                 */
+                if (IS_ERR_OR_NULL(pqd_entry))
+                        pr_warn_once(
+                                "qtaguid: %s(): "
+                                "User space forgot to open /dev/xt_qtaguid? "
+                                "pid=%u tgid=%u uid=%u\n", __func__,
+                                current->pid, current->tgid,
+                                current_fsuid());
+                else
+                        list_add(&sock_tag_entry->list,
+                                 &pqd_entry->sock_tag_list);
+                spin_unlock_bh(&uid_tag_data_tree_lock);
+                sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
+                atomic64_inc(&qtu_events.sockets_tagged);
+        }
+        spin_unlock_bh(&sock_tag_list_lock);
+        /* We keep the ref to the socket (file) until it is untagged */
+        CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
+                 input, sock_tag_entry,
+                 atomic_long_read(&el_socket->file->f_count));
+        return 0;
+err_tag_unref_put:
+        BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+        tag_ref_entry->num_sock_tags--;
+        free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
+err_put:
+        CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
+                 input, atomic_long_read(&el_socket->file->f_count) - 1);
+        /* Release the sock_fd that was grabbed by sockfd_lookup(). */
+        sockfd_put(el_socket);
+        return res;
+err:
+        CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
+        return res;
+}
+static int ctrl_cmd_untag(const char *input)
+{
+        char cmd;
+        int sock_fd = 0;
+        struct socket *el_socket;
+        int res, argc;
+        struct sock_tag *sock_tag_entry;
+        struct tag_ref *tag_ref_entry;
+        struct uid_tag_data *utd_entry;
+        struct proc_qtu_data *pqd_entry;
+        argc = sscanf(input, "%c %d", &cmd, &sock_fd);
+        CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
+                 input, argc, cmd, sock_fd);
+        if (argc < 2) {
+                res = -EINVAL;
+                goto err;
+        }
+        el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
+        if (!el_socket) {
+                pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
+                        " sock_fd=%d err=%d\n", input, sock_fd, res);
+                goto err;
+        }
+        CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
+                 input, atomic_long_read(&el_socket->file->f_count),
+                 el_socket->sk);
+        spin_lock_bh(&sock_tag_list_lock);
+        sock_tag_entry = get_sock_stat_nl(el_socket->sk);
+        if (!sock_tag_entry) {
+                spin_unlock_bh(&sock_tag_list_lock);
+                res = -EINVAL;
+                goto err_put;
+        }
+        /*
+         * The socket already belongs to the current process
+         * so it can do whatever it wants to it.
+         */
+        rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
+        tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
+        BUG_ON(!tag_ref_entry);
+        BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+        spin_lock_bh(&uid_tag_data_tree_lock);
+        pqd_entry = proc_qtu_data_tree_search(
+                &proc_qtu_data_tree, current->tgid);
+        /*
+         * TODO: remove if, and start failing.
+         * At first, we want to catch user-space code that is not
+         * opening the /dev/xt_qtaguid.
+         */
+        if (IS_ERR_OR_NULL(pqd_entry))
+                pr_warn_once("qtaguid: %s(): "
+                             "User space forgot to open /dev/xt_qtaguid? "
+                             "pid=%u tgid=%u uid=%u\n", __func__,
+                             current->pid, current->tgid, current_fsuid());
+        else
+                list_del(&sock_tag_entry->list);
+        spin_unlock_bh(&uid_tag_data_tree_lock);
+        /*
+         * We don't free tag_ref from the utd_entry here,
+         * only during a cmd_delete().
+         */
+        tag_ref_entry->num_sock_tags--;
+        spin_unlock_bh(&sock_tag_list_lock);
+        /*
+         * Release the sock_fd that was grabbed at tag time,
+         * and once more for the sockfd_lookup() here.
+         */
+        sockfd_put(sock_tag_entry->socket);
+        CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
+                 input, sock_tag_entry,
+                 atomic_long_read(&el_socket->file->f_count) - 1);
+        sockfd_put(el_socket);
+        kfree(sock_tag_entry);
+        atomic64_inc(&qtu_events.sockets_untagged);
+        return 0;
+err_put:
+        CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
+                 input, atomic_long_read(&el_socket->file->f_count) - 1);
+        /* Release the sock_fd that was grabbed by sockfd_lookup(). */
+        sockfd_put(el_socket);
+        return res;
+err:
+        CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
+        return res;
+}
+static int qtaguid_ctrl_parse(const char *input, int count)
+{
+        char cmd;
+        int res;
+        cmd = input[0];
+        /* Collect params for commands */
+        switch (cmd) {
+        case 'd':
+                res = ctrl_cmd_delete(input);
+                break;
+        case 's':
+                res = ctrl_cmd_counter_set(input);
+                break;
+        case 't':
+                res = ctrl_cmd_tag(input);
+                break;
+        case 'u':
+                res = ctrl_cmd_untag(input);
+                break;
+        default:
+                res = -EINVAL;
+                goto err;
+        }
+        if (!res)
+                res = count;
+err:
+        CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
+        return res;
+}
+#define MAX_QTAGUID_CTRL_INPUT_LEN 255
+static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
+                        unsigned long count, void *data)
+{
+        char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
+        if (unlikely(module_passive))
+                return count;
+        if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
+                return -EINVAL;
+        if (copy_from_user(input_buf, buffer, count))
+                return -EFAULT;
+        input_buf[count] = '\0';
+        return qtaguid_ctrl_parse(input_buf, count);
+}
+struct proc_print_info {
+        char *outp;
+        char **num_items_returned;
+        struct iface_stat *iface_entry;
+        struct tag_stat *ts_entry;
+        int item_index;
+        int items_to_skip;
+        int char_count;
+};
+static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
+{
+        int len;
+        struct data_counters *cnts;
+        if (!ppi->item_index) {
+                if (ppi->item_index++ < ppi->items_to_skip)
+                        return 0;
+                len = snprintf(ppi->outp, ppi->char_count,
+                               "idx iface acct_tag_hex uid_tag_int cnt_set "
+                               "rx_bytes rx_packets "
+                               "tx_bytes tx_packets "
+                               "rx_tcp_bytes rx_tcp_packets "
+                               "rx_udp_bytes rx_udp_packets "
+                               "rx_other_bytes rx_other_packets "
+                               "tx_tcp_bytes tx_tcp_packets "
+                               "tx_udp_bytes tx_udp_packets "
+                               "tx_other_bytes tx_other_packets\n");
+        } else {
+                tag_t tag = ppi->ts_entry->tn.tag;
+                uid_t stat_uid = get_uid_from_tag(tag);
+                if (!can_read_other_uid_stats(stat_uid)) {
+                        CT_DEBUG("qtaguid: stats line: "
+                                 "%s 0x%llx %u: insufficient priv "
+                                 "from pid=%u tgid=%u uid=%u\n",
+                                 ppi->iface_entry->ifname,
+                                 get_atag_from_tag(tag), stat_uid,
+                                 current->pid, current->tgid, current_fsuid());
+                        return 0;
+                }
+                if (ppi->item_index++ < ppi->items_to_skip)
+                        return 0;
+                cnts = &ppi->ts_entry->counters;
+                len = snprintf(
+                        ppi->outp, ppi->char_count,
+                        "%d %s 0x%llx %u %u "
+                        "%llu %llu "
+                        "%llu %llu "
+                        "%llu %llu "
+                        "%llu %llu "
+                        "%llu %llu "
+                        "%llu %llu "
+                        "%llu %llu "
+                        "%llu %llu\n",
+                        ppi->item_index,
+                        ppi->iface_entry->ifname,
+                        get_atag_from_tag(tag),
+                        stat_uid,
+                        cnt_set,
+                        dc_sum_bytes(cnts, cnt_set, IFS_RX),
+                        dc_sum_packets(cnts, cnt_set, IFS_RX),
+                        dc_sum_bytes(cnts, cnt_set, IFS_TX),
+                        dc_sum_packets(cnts, cnt_set, IFS_TX),
+                        cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
+                        cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
+                        cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
+                        cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
+                        cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
+                        cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
+                        cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
+                        cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
+                        cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
+                        cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
+                        cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
+                        cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
+        }
+        return len;
+}
+static bool pp_sets(struct proc_print_info *ppi)
+{
+        int len;
+        int counter_set;
+        for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
+             counter_set++) {
+                len = pp_stats_line(ppi, counter_set);
+                if (len >= ppi->char_count) {
+                        *ppi->outp = '\0';
+                        return false;
+                }
+                if (len) {
+                        ppi->outp += len;
+                        ppi->char_count -= len;
+                        (*ppi->num_items_returned)++;
+                }
+        }
+        return true;
+}
+/*
+ * Procfs reader to get all tag stats using style "1)" as described in
+ * fs/proc/generic.c
+ * Groups all protocols tx/rx bytes.
+ */
+static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
+                                off_t items_to_skip, int char_count, int *eof,
+                                void *data)
+{
+        struct proc_print_info ppi;
+        int len;
+        ppi.outp = page;
+        ppi.item_index = 0;
+        ppi.char_count = char_count;
+        ppi.num_items_returned = num_items_returned;
+        ppi.items_to_skip = items_to_skip;
+        if (unlikely(module_passive)) {
+                len = pp_stats_line(&ppi, 0);
+                /* The header should always be shorter than the buffer. */
+                BUG_ON(len >= ppi.char_count);
+                (*num_items_returned)++;
+                *eof = 1;
+                return len;
+        }
+        CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
+                "char_count=%d *eof=%d\n", page, *num_items_returned,
+                items_to_skip, char_count, *eof);
+        if (*eof)
+                return 0;
+        /* The idx is there to help debug when things go belly up. */
+        len = pp_stats_line(&ppi, 0);
+        /* Don't advance the outp unless the whole line was printed */
+        if (len >= ppi.char_count) {
+                *ppi.outp = '\0';
+                return ppi.outp - page;
+        }
+        if (len) {
+                ppi.outp += len;
+                ppi.char_count -= len;
+                (*num_items_returned)++;
+        }
+        spin_lock_bh(&iface_stat_list_lock);
+        list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
+                struct rb_node *node;
+                spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
+                for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
+                     node;
+                     node = rb_next(node)) {
+                        ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
+                        if (!pp_sets(&ppi)) {
+                                spin_unlock_bh(
+                                        &ppi.iface_entry->tag_stat_list_lock);
+                                spin_unlock_bh(&iface_stat_list_lock);
+                                return ppi.outp - page;
+                        }
+                }
+                spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
+        }
+        spin_unlock_bh(&iface_stat_list_lock);
+        *eof = 1;
+        return ppi.outp - page;
+}
+/*------------------------------------------*/
+static int qtudev_open(struct inode *inode, struct file *file)
+{
+        struct uid_tag_data *utd_entry;
+        struct proc_qtu_data  *pqd_entry;
+        struct proc_qtu_data  *new_pqd_entry;
+        int res;
+        bool utd_entry_found;
+        if (unlikely(qtu_proc_handling_passive))
+                return 0;
+        DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
+                 current->pid, current->tgid, current_fsuid());
+        spin_lock_bh(&uid_tag_data_tree_lock);
+        /* Look for existing uid data, or alloc one. */
+        utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
+        if (IS_ERR_OR_NULL(utd_entry)) {
+                res = PTR_ERR(utd_entry);
+                goto err;
+        }
+        /* Look for existing PID based proc_data */
+        pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
+                                              current->tgid);
+        if (pqd_entry) {
+                pr_err("qtaguid: qtudev_open(): %u/%u %u "
+                       "%s already opened\n",
+                       current->pid, current->tgid, current_fsuid(),
+                       QTU_DEV_NAME);
+                res = -EBUSY;
+                goto err_unlock_free_utd;
+        }
+        new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
+        if (!new_pqd_entry) {
+                pr_err("qtaguid: qtudev_open(): %u/%u %u: "
+                       "proc data alloc failed\n",
+                       current->pid, current->tgid, current_fsuid());
+                res = -ENOMEM;
+                goto err_unlock_free_utd;
+        }
+        new_pqd_entry->pid = current->tgid;
+        INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
+        new_pqd_entry->parent_tag_data = utd_entry;
+        utd_entry->num_pqd++;
+        proc_qtu_data_tree_insert(new_pqd_entry,
+                                  &proc_qtu_data_tree);
+        spin_unlock_bh(&uid_tag_data_tree_lock);
+        DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
+                 current_fsuid(), new_pqd_entry);
+        file->private_data = new_pqd_entry;
+        return 0;
+err_unlock_free_utd:
+        if (!utd_entry_found) {
+                rb_erase(&utd_entry->node, &uid_tag_data_tree);
+                kfree(utd_entry);
+        }
+        spin_unlock_bh(&uid_tag_data_tree_lock);
+err:
+        return res;
+}
+static int qtudev_release(struct inode *inode, struct file *file)
+{
+        struct proc_qtu_data  *pqd_entry = file->private_data;
+        struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
+        struct sock_tag *st_entry;
+        struct rb_root st_to_free_tree = RB_ROOT;
+        struct list_head *entry, *next;
+        struct tag_ref *tr;
+        if (unlikely(qtu_proc_handling_passive))
+                return 0;
+        /*
+         * Do not trust the current->pid, it might just be a kworker cleaning
+         * up after a dead proc.
+         */
+        DR_DEBUG("qtaguid: qtudev_release(): "
+                 "pid=%u tgid=%u uid=%u "
+                 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
+                 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
+                 pqd_entry, pqd_entry->pid, utd_entry,
+                 utd_entry->num_active_tags);
+        spin_lock_bh(&sock_tag_list_lock);
+        spin_lock_bh(&uid_tag_data_tree_lock);
+        list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
+                st_entry = list_entry(entry, struct sock_tag, list);
+                DR_DEBUG("qtaguid: %s(): "
+                         "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
+                         __func__,
+                         st_entry, st_entry->sk,
+                         current->pid, current->tgid,
+                         pqd_entry->parent_tag_data->uid);
+                utd_entry = uid_tag_data_tree_search(
+                        &uid_tag_data_tree,
+                        get_uid_from_tag(st_entry->tag));
+                BUG_ON(IS_ERR_OR_NULL(utd_entry));
+                DR_DEBUG("qtaguid: %s(): "
+                         "looking for tag=0x%llx in utd_entry=%p\n", __func__,
+                         st_entry->tag, utd_entry);
+                tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
+                                         st_entry->tag);
+                BUG_ON(!tr);
+                BUG_ON(tr->num_sock_tags <= 0);
+                tr->num_sock_tags--;
+                free_tag_ref_from_utd_entry(tr, utd_entry);
+                rb_erase(&st_entry->sock_node, &sock_tag_tree);
+                list_del(&st_entry->list);
+                /* Can't sockfd_put() within spinlock, do it later. */
+                sock_tag_tree_insert(st_entry, &st_to_free_tree);
+                /*
+                 * Try to free the utd_entry if no other proc_qtu_data is
+                 * using it (num_pqd is 0) and it doesn't have active tags
+                 * (num_active_tags is 0).
+                 */
+                put_utd_entry(utd_entry);
+        }
+        rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
+        BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
+        pqd_entry->parent_tag_data->num_pqd--;
+        put_utd_entry(pqd_entry->parent_tag_data);
+        kfree(pqd_entry);
+        file->private_data = NULL;
+        spin_unlock_bh(&uid_tag_data_tree_lock);
+        spin_unlock_bh(&sock_tag_list_lock);
+        sock_tag_tree_erase(&st_to_free_tree);
+        prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
+                           current->pid, current->tgid);
+        return 0;
+}
+/*------------------------------------------*/
+static const struct file_operations qtudev_fops = {
+        .owner = THIS_MODULE,
+        .open = qtudev_open,
+        .release = qtudev_release,
+};
+static struct miscdevice qtu_device = {
+        .minor = MISC_DYNAMIC_MINOR,
+        .name = QTU_DEV_NAME,
+        .fops = &qtudev_fops,
+        /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
+};
+/*------------------------------------------*/
+static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
+{
+        int ret;
+        *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
+        if (!*res_procdir) {
+                pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
+                ret = -ENOMEM;
+                goto no_dir;
+        }
+        xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
+                                                *res_procdir);
+        if (!xt_qtaguid_ctrl_file) {
+                pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
+                        " file\n");
+                ret = -ENOMEM;
+                goto no_ctrl_entry;
+        }
+        xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
+        xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
+        xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
+                                                *res_procdir);
+        if (!xt_qtaguid_stats_file) {
+                pr_err("qtaguid: failed to create xt_qtaguid/stats "
+                        "file\n");
+                ret = -ENOMEM;
+                goto no_stats_entry;
+        }
+        xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
+        /*
+         * TODO: add support counter hacking
+         * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
+         */
+        return 0;
+no_stats_entry:
+        remove_proc_entry("ctrl", *res_procdir);
+no_ctrl_entry:
+        remove_proc_entry("xt_qtaguid", NULL);
+no_dir:
+        return ret;
+}
+static struct xt_match qtaguid_mt_reg __read_mostly = {
+        /*
+         * This module masquerades as the "owner" module so that iptables
+         * tools can deal with it.
+         */
+        .name       = "owner",
+        .revision   = 1,
+        .family     = NFPROTO_UNSPEC,
+        .match      = qtaguid_mt,
+        .matchsize  = sizeof(struct xt_qtaguid_match_info),
+        .me         = THIS_MODULE,
+};
+static int __init qtaguid_mt_init(void)
+{
+        if (qtaguid_proc_register(&xt_qtaguid_procdir)
+            || iface_stat_init(xt_qtaguid_procdir)
+            || xt_register_match(&qtaguid_mt_reg)
+            || misc_register(&qtu_device))
+                return -1;
+        return 0;
+}
+/*
+ * TODO: allow unloading of the module.
+ * For now stats are permanent.
+ * Kconfig forces'y/n' and never an 'm'.
+ */
+module_init(qtaguid_mt_init);
+MODULE_AUTHOR("jpa <jpa@google.com>");
+MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_owner");
+MODULE_ALIAS("ip6t_owner");
+MODULE_ALIAS("ipt_qtaguid");
+MODULE_ALIAS("ip6t_qtaguid");
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h
new file mode 100644
index 00000000000..02479d6d317
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_internal.h
@@ -0,0 +1,330 @@
+/*
+ * Kernel iptables module to track stats for packets based on user tags.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __XT_QTAGUID_INTERNAL_H__
+#define __XT_QTAGUID_INTERNAL_H__
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock_types.h>
+#include <linux/workqueue.h>
+/* Iface handling */
+#define IDEBUG_MASK (1<<0)
+/* Iptable Matching. Per packet. */
+#define MDEBUG_MASK (1<<1)
+/* Red-black tree handling. Per packet. */
+#define RDEBUG_MASK (1<<2)
+/* procfs ctrl/stats handling */
+#define CDEBUG_MASK (1<<3)
+/* dev and resource tracking */
+#define DDEBUG_MASK (1<<4)
+/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
+#define DEFAULT_DEBUG_MASK 0
+/*
+ * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
+ * All undef: text size ~ 0x3030; all def: ~ 0x4404.
+ */
+#define IDEBUG
+#define MDEBUG
+#define RDEBUG
+#define CDEBUG
+#define DDEBUG
+#define MSK_DEBUG(mask, ...) do {                           \
+                if (unlikely(qtaguid_debug_mask & (mask)))  \
+                        pr_debug(__VA_ARGS__);              \
+        } while (0)
+#ifdef IDEBUG
+#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
+#else
+#define IF_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef MDEBUG
+#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
+#else
+#define MT_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef RDEBUG
+#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
+#else
+#define RB_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef CDEBUG
+#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
+#else
+#define CT_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef DDEBUG
+#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
+#else
+#define DR_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+extern uint qtaguid_debug_mask;
+/*---------------------------------------------------------------------------*/
+/*
+ * Tags:
+ *
+ * They represent what the data usage counters will be tracked against.
+ * By default a tag is just based on the UID.
+ * The UID is used as the base for policing, and can not be ignored.
+ * So a tag will always at least represent a UID (uid_tag).
+ *
+ * A tag can be augmented with an "accounting tag" which is associated
+ * with a UID.
+ * User space can set the acct_tag portion of the tag which is then used
+ * with sockets: all data belonging to that socket will be counted against the
+ * tag. The policing is then based on the tag's uid_tag portion,
+ * and stats are collected for the acct_tag portion separately.
+ *
+ * There could be
+ * a:  {acct_tag=1, uid_tag=10003}
+ * b:  {acct_tag=2, uid_tag=10003}
+ * c:  {acct_tag=3, uid_tag=10003}
+ * d:  {acct_tag=0, uid_tag=10003}
+ * a, b, and c represent tags associated with specific sockets.
+ * d is for the totals for that uid, including all untagged traffic.
+ * Typically d is used with policing/quota rules.
+ *
+ * We want tag_t big enough to distinguish uid_t and acct_tag.
+ * It might become a struct if needed.
+ * Nothing should be using it as an int.
+ */
+typedef uint64_t tag_t;  /* Only used via accessors */
+#define TAG_UID_MASK 0xFFFFFFFFULL
+#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
+static inline int tag_compare(tag_t t1, tag_t t2)
+{
+        return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
+}
+static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
+{
+        return acct_tag | uid;
+}
+static inline tag_t make_tag_from_uid(uid_t uid)
+{
+        return uid;
+}
+static inline uid_t get_uid_from_tag(tag_t tag)
+{
+        return tag & TAG_UID_MASK;
+}
+static inline tag_t get_utag_from_tag(tag_t tag)
+{
+        return tag & TAG_UID_MASK;
+}
+static inline tag_t get_atag_from_tag(tag_t tag)
+{
+        return tag & TAG_ACCT_MASK;
+}
+static inline bool valid_atag(tag_t tag)
+{
+        return !(tag & TAG_UID_MASK);
+}
+static inline tag_t make_atag_from_value(uint32_t value)
+{
+        return (uint64_t)value << 32;
+}
+/*---------------------------------------------------------------------------*/
+/*
+ * Maximum number of socket tags that a UID is allowed to have active.
+ * Multiple processes belonging to the same UID contribute towards this limit.
+ * Special UIDs that can impersonate a UID also contribute (e.g. download
+ * manager, ...)
+ */
+#define DEFAULT_MAX_SOCK_TAGS 1024
+/*
+ * For now we only track 2 sets of counters.
+ * The default set is 0.
+ * Userspace can activate another set for a given uid being tracked.
+ */
+#define IFS_MAX_COUNTER_SETS 2
+enum ifs_tx_rx {
+        IFS_TX,
+        IFS_RX,
+        IFS_MAX_DIRECTIONS
+};
+/* For now, TCP, UDP, the rest */
+enum ifs_proto {
+        IFS_TCP,
+        IFS_UDP,
+        IFS_PROTO_OTHER,
+        IFS_MAX_PROTOS
+};
+struct byte_packet_counters {
+        uint64_t bytes;
+        uint64_t packets;
+};
+struct data_counters {
+        struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
+};
+/* Generic X based nodes used as a base for rb_tree ops */
+struct tag_node {
+        struct rb_node node;
+        tag_t tag;
+};
+struct tag_stat {
+        struct tag_node tn;
+        struct data_counters counters;
+        /*
+         * If this tag is acct_tag based, we need to count against the
+         * matching parent uid_tag.
+         */
+        struct data_counters *parent_counters;
+};
+struct iface_stat {
+        struct list_head list;  /* in iface_stat_list */
+        char *ifname;
+        bool active;
+        /* net_dev is only valid for active iface_stat */
+        struct net_device *net_dev;
+        struct byte_packet_counters totals[IFS_MAX_DIRECTIONS];
+        /*
+         * We keep the last_known, because some devices reset their counters
+         * just before NETDEV_UP, while some will reset just before
+         * NETDEV_REGISTER (which is more normal).
+         * So now, if the device didn't do a NETDEV_UNREGISTER and we see
+         * its current dev stats smaller that what was previously known, we
+         * assume an UNREGISTER and just use the last_known.
+         */
+        struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
+        /* last_known is usable when last_known_valid is true */
+        bool last_known_valid;
+        struct proc_dir_entry *proc_ptr;
+        struct rb_root tag_stat_tree;
+        spinlock_t tag_stat_list_lock;
+};
+/* This is needed to create proc_dir_entries from atomic context. */
+struct iface_stat_work {
+        struct work_struct iface_work;
+        struct iface_stat *iface_entry;
+};
+/*
+ * Track tag that this socket is transferring data for, and not necessarily
+ * the uid that owns the socket.
+ * This is the tag against which tag_stat.counters will be billed.
+ * These structs need to be looked up by sock and pid.
+ */
+struct sock_tag {
+        struct rb_node sock_node;
+        struct sock *sk;  /* Only used as a number, never dereferenced */
+        /* The socket is needed for sockfd_put() */
+        struct socket *socket;
+        /* Used to associate with a given pid */
+        struct list_head list;   /* in proc_qtu_data.sock_tag_list */
+        pid_t pid;
+        tag_t tag;
+};
+struct qtaguid_event_counts {
+        /* Various successful events */
+        atomic64_t sockets_tagged;
+        atomic64_t sockets_untagged;
+        atomic64_t counter_set_changes;
+        atomic64_t delete_cmds;
+        atomic64_t iface_events;  /* Number of NETDEV_* events handled */
+        atomic64_t match_calls;   /* Number of times iptables called mt */
+        /*
+         * match_found_sk_*: numbers related to the netfilter matching
+         * function finding a sock for the sk_buff.
+         * Total skbs processed is sum(match_found*).
+         */
+        atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */
+        /* The connection tracker had or didn't have the sk. */
+        atomic64_t match_found_sk_in_ct;
+        atomic64_t match_found_no_sk_in_ct;
+        /*
+         * No sk could be found. No apparent owner. Could happen with
+         * unsolicited traffic.
+         */
+        atomic64_t match_no_sk;
+        /*
+         * The file ptr in the sk_socket wasn't there.
+         * This might happen for traffic while the socket is being closed.
+         */
+        atomic64_t match_no_sk_file;
+};
+/* Track the set active_set for the given tag. */
+struct tag_counter_set {
+        struct tag_node tn;
+        int active_set;
+};
+/*----------------------------------------------*/
+/*
+ * The qtu uid data is used to track resources that are created directly or
+ * indirectly by processes (uid tracked).
+ * It is shared by the processes with the same uid.
+ * Some of the resource will be counted to prevent further rogue allocations,
+ * some will need freeing once the owner process (uid) exits.
+ */
+struct uid_tag_data {
+        struct rb_node node;
+        uid_t uid;
+        /*
+         * For the uid, how many accounting tags have been set.
+         */
+        int num_active_tags;
+        /* Track the number of proc_qtu_data that reference it */
+        int num_pqd;
+        struct rb_root tag_ref_tree;
+        /* No tag_node_tree_lock; use uid_tag_data_tree_lock */
+};
+struct tag_ref {
+        struct tag_node tn;
+        /*
+         * This tracks the number of active sockets that have a tag on them
+         * which matches this tag_ref.tn.tag.
+         * A tag ref can live on after the sockets are untagged.
+         * A tag ref can only be removed during a tag delete command.
+         */
+        int num_sock_tags;
+};
+struct proc_qtu_data {
+        struct rb_node node;
+        pid_t pid;
+        struct uid_tag_data *parent_tag_data;
+        /* Tracks the sock_tags that need freeing upon this proc's death */
+        struct list_head sock_tag_list;
+        /* No spinlock_t sock_tag_list_lock; use the global one. */
+};
+/*----------------------------------------------*/
+#endif  /* ifndef __XT_QTAGUID_INTERNAL_H__ */
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c
new file mode 100644
index 00000000000..39176785c91
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.c
@@ -0,0 +1,556 @@
+/*
+ * Pretty printing Support for iptables xt_qtaguid module.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/*
+ * Most of the functions in this file just waste time if DEBUG is not defined.
+ * The matching xt_qtaguid_print.h will static inline empty funcs if the needed
+ * debug flags ore not defined.
+ * Those funcs that fail to allocate memory will panic as there is no need to
+ * hobble allong just pretending to do the requested work.
+ */
+#define DEBUG
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/net.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+#ifdef DDEBUG
+static void _bug_on_err_or_null(void *ptr)
+{
+        if (IS_ERR_OR_NULL(ptr)) {
+                pr_err("qtaguid: kmalloc failed\n");
+                BUG();
+        }
+}
+char *pp_tag_t(tag_t *tag)
+{
+        char *res;
+        if (!tag)
+                res = kasprintf(GFP_ATOMIC, "tag_t@null{}");
+        else
+                res = kasprintf(GFP_ATOMIC,
+                                "tag_t@%p{tag=0x%llx, uid=%u}",
+                                tag, *tag, get_uid_from_tag(*tag));
+        _bug_on_err_or_null(res);
+        return res;
+}
+char *pp_data_counters(struct data_counters *dc, bool showValues)
+{
+        char *res;
+        if (!dc)
+                res = kasprintf(GFP_ATOMIC, "data_counters@null{}");
+        else if (showValues)
+                res = kasprintf(
+                        GFP_ATOMIC, "data_counters@%p{"
+                        "set0{"
+                        "rx{"
+                        "tcp{b=%llu, p=%llu}, "
+                        "udp{b=%llu, p=%llu},"
+                        "other{b=%llu, p=%llu}}, "
+                        "tx{"
+                        "tcp{b=%llu, p=%llu}, "
+                        "udp{b=%llu, p=%llu},"
+                        "other{b=%llu, p=%llu}}}, "
+                        "set1{"
+                        "rx{"
+                        "tcp{b=%llu, p=%llu}, "
+                        "udp{b=%llu, p=%llu},"
+                        "other{b=%llu, p=%llu}}, "
+                        "tx{"
+                        "tcp{b=%llu, p=%llu}, "
+                        "udp{b=%llu, p=%llu},"
+                        "other{b=%llu, p=%llu}}}}",
+                        dc,
+                        dc->bpc[0][IFS_RX][IFS_TCP].bytes,
+                        dc->bpc[0][IFS_RX][IFS_TCP].packets,
+                        dc->bpc[0][IFS_RX][IFS_UDP].bytes,
+                        dc->bpc[0][IFS_RX][IFS_UDP].packets,
+                        dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes,
+                        dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets,
+                        dc->bpc[0][IFS_TX][IFS_TCP].bytes,
+                        dc->bpc[0][IFS_TX][IFS_TCP].packets,
+                        dc->bpc[0][IFS_TX][IFS_UDP].bytes,
+                        dc->bpc[0][IFS_TX][IFS_UDP].packets,
+                        dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes,
+                        dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets,
+                        dc->bpc[1][IFS_RX][IFS_TCP].bytes,
+                        dc->bpc[1][IFS_RX][IFS_TCP].packets,
+                        dc->bpc[1][IFS_RX][IFS_UDP].bytes,
+                        dc->bpc[1][IFS_RX][IFS_UDP].packets,
+                        dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes,
+                        dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets,
+                        dc->bpc[1][IFS_TX][IFS_TCP].bytes,
+                        dc->bpc[1][IFS_TX][IFS_TCP].packets,
+                        dc->bpc[1][IFS_TX][IFS_UDP].bytes,
+                        dc->bpc[1][IFS_TX][IFS_UDP].packets,
+                        dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes,
+                        dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets);
+        else
+                res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc);
+        _bug_on_err_or_null(res);
+        return res;
+}
+char *pp_tag_node(struct tag_node *tn)
+{
+        char *tag_str;
+        char *res;
+        if (!tn) {
+                res = kasprintf(GFP_ATOMIC, "tag_node@null{}");
+                _bug_on_err_or_null(res);
+                return res;
+        }
+        tag_str = pp_tag_t(&tn->tag);
+        res = kasprintf(GFP_ATOMIC,
+                        "tag_node@%p{tag=%s}",
+                        tn, tag_str);
+        _bug_on_err_or_null(res);
+        kfree(tag_str);
+        return res;
+}
+char *pp_tag_ref(struct tag_ref *tr)
+{
+        char *tn_str;
+        char *res;
+        if (!tr) {
+                res = kasprintf(GFP_ATOMIC, "tag_ref@null{}");
+                _bug_on_err_or_null(res);
+                return res;
+        }
+        tn_str = pp_tag_node(&tr->tn);
+        res = kasprintf(GFP_ATOMIC,
+                        "tag_ref@%p{%s, num_sock_tags=%d}",
+                        tr, tn_str, tr->num_sock_tags);
+        _bug_on_err_or_null(res);
+        kfree(tn_str);
+        return res;
+}
+char *pp_tag_stat(struct tag_stat *ts)
+{
+        char *tn_str;
+        char *counters_str;
+        char *parent_counters_str;
+        char *res;
+        if (!ts) {
+                res = kasprintf(GFP_ATOMIC, "tag_stat@null{}");
+                _bug_on_err_or_null(res);
+                return res;
+        }
+        tn_str = pp_tag_node(&ts->tn);
+        counters_str = pp_data_counters(&ts->counters, true);
+        parent_counters_str = pp_data_counters(ts->parent_counters, false);
+        res = kasprintf(GFP_ATOMIC,
+                        "tag_stat@%p{%s, counters=%s, parent_counters=%s}",
+                        ts, tn_str, counters_str, parent_counters_str);
+        _bug_on_err_or_null(res);
+        kfree(tn_str);
+        kfree(counters_str);
+        kfree(parent_counters_str);
+        return res;
+}
+char *pp_iface_stat(struct iface_stat *is)
+{
+        char *res;
+        if (!is)
+                res = kasprintf(GFP_ATOMIC, "iface_stat@null{}");
+        else
+                res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
+                                "list=list_head{...}, "
+                                "ifname=%s, "
+                                "total={rx={bytes=%llu, "
+                                "packets=%llu}, "
+                                "tx={bytes=%llu, "
+                                "packets=%llu}}, "
+                                "last_known_valid=%d, "
+                                "last_known={rx={bytes=%llu, "
+                                "packets=%llu}, "
+                                "tx={bytes=%llu, "
+                                "packets=%llu}}, "
+                                "active=%d, "
+                                "net_dev=%p, "
+                                "proc_ptr=%p, "
+                                "tag_stat_tree=rb_root{...}}",
+                                is,
+                                is->ifname,
+                                is->totals[IFS_RX].bytes,
+                                is->totals[IFS_RX].packets,
+                                is->totals[IFS_TX].bytes,
+                                is->totals[IFS_TX].packets,
+                                is->last_known_valid,
+                                is->last_known[IFS_RX].bytes,
+                                is->last_known[IFS_RX].packets,
+                                is->last_known[IFS_TX].bytes,
+                                is->last_known[IFS_TX].packets,
+                                is->active,
+                                is->net_dev,
+                                is->proc_ptr);
+        _bug_on_err_or_null(res);
+        return res;
+}
+char *pp_sock_tag(struct sock_tag *st)
+{
+        char *tag_str;
+        char *res;
+        if (!st) {
+                res = kasprintf(GFP_ATOMIC, "sock_tag@null{}");
+                _bug_on_err_or_null(res);
+                return res;
+        }
+        tag_str = pp_tag_t(&st->tag);
+        res = kasprintf(GFP_ATOMIC, "sock_tag@%p{"
+                        "sock_node=rb_node{...}, "
+                        "sk=%p socket=%p (f_count=%lu), list=list_head{...}, "
+                        "pid=%u, tag=%s}",
+                        st, st->sk, st->socket, atomic_long_read(
+                                &st->socket->file->f_count),
+                        st->pid, tag_str);
+        _bug_on_err_or_null(res);
+        kfree(tag_str);
+        return res;
+}
+char *pp_uid_tag_data(struct uid_tag_data *utd)
+{
+        char *res;
+        if (!utd)
+                res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}");
+        else
+                res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{"
+                                "uid=%u, num_active_acct_tags=%d, "
+                                "num_pqd=%d, "
+                                "tag_node_tree=rb_root{...}, "
+                                "proc_qtu_data_tree=rb_root{...}}",
+                                utd, utd->uid,
+                                utd->num_active_tags, utd->num_pqd);
+        _bug_on_err_or_null(res);
+        return res;
+}
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
+{
+        char *parent_tag_data_str;
+        char *res;
+        if (!pqd) {
+                res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}");
+                _bug_on_err_or_null(res);
+                return res;
+        }
+        parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data);
+        res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{"
+                        "node=rb_node{...}, pid=%u, "
+                        "parent_tag_data=%s, "
+                        "sock_tag_list=list_head{...}}",
+                        pqd, pqd->pid, parent_tag_data_str
+                );
+        _bug_on_err_or_null(res);
+        kfree(parent_tag_data_str);
+        return res;
+}
+/*------------------------------------------*/
+void prdebug_sock_tag_tree(int indent_level,
+                           struct rb_root *sock_tag_tree)
+{
+        struct rb_node *node;
+        struct sock_tag *sock_tag_entry;
+        char *str;
+        if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+                return;
+        if (RB_EMPTY_ROOT(sock_tag_tree)) {
+                str = "sock_tag_tree=rb_root{}";
+                pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+                return;
+        }
+        str = "sock_tag_tree=rb_root{";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+        indent_level++;
+        for (node = rb_first(sock_tag_tree);
+             node;
+             node = rb_next(node)) {
+                sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+                str = pp_sock_tag(sock_tag_entry);
+                pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+                kfree(str);
+        }
+        indent_level--;
+        str = "}";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+void prdebug_sock_tag_list(int indent_level,
+                           struct list_head *sock_tag_list)
+{
+        struct sock_tag *sock_tag_entry;
+        char *str;
+        if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+                return;
+        if (list_empty(sock_tag_list)) {
+                str = "sock_tag_list=list_head{}";
+                pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+                return;
+        }
+        str = "sock_tag_list=list_head{";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+        indent_level++;
+        list_for_each_entry(sock_tag_entry, sock_tag_list, list) {
+                str = pp_sock_tag(sock_tag_entry);
+                pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+                kfree(str);
+        }
+        indent_level--;
+        str = "}";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+void prdebug_proc_qtu_data_tree(int indent_level,
+                                struct rb_root *proc_qtu_data_tree)
+{
+        char *str;
+        struct rb_node *node;
+        struct proc_qtu_data *proc_qtu_data_entry;
+        if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+                return;
+        if (RB_EMPTY_ROOT(proc_qtu_data_tree)) {
+                str = "proc_qtu_data_tree=rb_root{}";
+                pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+                return;
+        }
+        str = "proc_qtu_data_tree=rb_root{";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+        indent_level++;
+        for (node = rb_first(proc_qtu_data_tree);
+             node;
+             node = rb_next(node)) {
+                proc_qtu_data_entry = rb_entry(node,
+                                               struct proc_qtu_data,
+                                               node);
+                str = pp_proc_qtu_data(proc_qtu_data_entry);
+                pr_debug("%*d: %s,\n", indent_level*2, indent_level,
+                         str);
+                kfree(str);
+                indent_level++;
+                prdebug_sock_tag_list(indent_level,
+                                      &proc_qtu_data_entry->sock_tag_list);
+                indent_level--;
+        }
+        indent_level--;
+        str = "}";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
+{
+        char *str;
+        struct rb_node *node;
+        struct tag_ref *tag_ref_entry;
+        if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+                return;
+        if (RB_EMPTY_ROOT(tag_ref_tree)) {
+                str = "tag_ref_tree{}";
+                pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+                return;
+        }
+        str = "tag_ref_tree{";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+        indent_level++;
+        for (node = rb_first(tag_ref_tree);
+             node;
+             node = rb_next(node)) {
+                tag_ref_entry = rb_entry(node,
+                                         struct tag_ref,
+                                         tn.node);
+                str = pp_tag_ref(tag_ref_entry);
+                pr_debug("%*d: %s,\n", indent_level*2, indent_level,
+                         str);
+                kfree(str);
+        }
+        indent_level--;
+        str = "}";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+void prdebug_uid_tag_data_tree(int indent_level,
+                               struct rb_root *uid_tag_data_tree)
+{
+        char *str;
+        struct rb_node *node;
+        struct uid_tag_data *uid_tag_data_entry;
+        if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+                return;
+        if (RB_EMPTY_ROOT(uid_tag_data_tree)) {
+                str = "uid_tag_data_tree=rb_root{}";
+                pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+                return;
+        }
+        str = "uid_tag_data_tree=rb_root{";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+        indent_level++;
+        for (node = rb_first(uid_tag_data_tree);
+             node;
+             node = rb_next(node)) {
+                uid_tag_data_entry = rb_entry(node, struct uid_tag_data,
+                                              node);
+                str = pp_uid_tag_data(uid_tag_data_entry);
+                pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+                kfree(str);
+                if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) {
+                        indent_level++;
+                        prdebug_tag_ref_tree(indent_level,
+                                             &uid_tag_data_entry->tag_ref_tree);
+                        indent_level--;
+                }
+        }
+        indent_level--;
+        str = "}";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+void prdebug_tag_stat_tree(int indent_level,
+                                  struct rb_root *tag_stat_tree)
+{
+        char *str;
+        struct rb_node *node;
+        struct tag_stat *ts_entry;
+        if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+                return;
+        if (RB_EMPTY_ROOT(tag_stat_tree)) {
+                str = "tag_stat_tree{}";
+                pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+                return;
+        }
+        str = "tag_stat_tree{";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+        indent_level++;
+        for (node = rb_first(tag_stat_tree);
+             node;
+             node = rb_next(node)) {
+                ts_entry = rb_entry(node, struct tag_stat, tn.node);
+                str = pp_tag_stat(ts_entry);
+                pr_debug("%*d: %s\n", indent_level*2, indent_level,
+                         str);
+                kfree(str);
+        }
+        indent_level--;
+        str = "}";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+void prdebug_iface_stat_list(int indent_level,
+                             struct list_head *iface_stat_list)
+{
+        char *str;
+        struct iface_stat *iface_entry;
+        if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+                return;
+        if (list_empty(iface_stat_list)) {
+                str = "iface_stat_list=list_head{}";
+                pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+                return;
+        }
+        str = "iface_stat_list=list_head{";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+        indent_level++;
+        list_for_each_entry(iface_entry, iface_stat_list, list) {
+                str = pp_iface_stat(iface_entry);
+                pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+                kfree(str);
+                spin_lock_bh(&iface_entry->tag_stat_list_lock);
+                if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) {
+                        indent_level++;
+                        prdebug_tag_stat_tree(indent_level,
+                                              &iface_entry->tag_stat_tree);
+                        indent_level--;
+                }
+                spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+        }
+        indent_level--;
+        str = "}";
+        pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+#endif  /* ifdef DDEBUG */
+/*------------------------------------------*/
+static const char * const netdev_event_strings[] = {
+        "netdev_unknown",
+        "NETDEV_UP",
+        "NETDEV_DOWN",
+        "NETDEV_REBOOT",
+        "NETDEV_CHANGE",
+        "NETDEV_REGISTER",
+        "NETDEV_UNREGISTER",
+        "NETDEV_CHANGEMTU",
+        "NETDEV_CHANGEADDR",
+        "NETDEV_GOING_DOWN",
+        "NETDEV_CHANGENAME",
+        "NETDEV_FEAT_CHANGE",
+        "NETDEV_BONDING_FAILOVER",
+        "NETDEV_PRE_UP",
+        "NETDEV_PRE_TYPE_CHANGE",
+        "NETDEV_POST_TYPE_CHANGE",
+        "NETDEV_POST_INIT",
+        "NETDEV_UNREGISTER_BATCH",
+        "NETDEV_RELEASE",
+        "NETDEV_NOTIFY_PEERS",
+        "NETDEV_JOIN",
+};
+const char *netdev_evt_str(int netdev_event)
+{
+        if (netdev_event < 0
+            || netdev_event >= ARRAY_SIZE(netdev_event_strings))
+                return "bad event num";
+        return netdev_event_strings[netdev_event];
+}
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h
new file mode 100644
index 00000000000..b63871a0be5
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.h
@@ -0,0 +1,120 @@
+/*
+ * Pretty printing Support for iptables xt_qtaguid module.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __XT_QTAGUID_PRINT_H__
+#define __XT_QTAGUID_PRINT_H__
+#include "xt_qtaguid_internal.h"
+#ifdef DDEBUG
+char *pp_tag_t(tag_t *tag);
+char *pp_data_counters(struct data_counters *dc, bool showValues);
+char *pp_tag_node(struct tag_node *tn);
+char *pp_tag_ref(struct tag_ref *tr);
+char *pp_tag_stat(struct tag_stat *ts);
+char *pp_iface_stat(struct iface_stat *is);
+char *pp_sock_tag(struct sock_tag *st);
+char *pp_uid_tag_data(struct uid_tag_data *qtd);
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd);
+/*------------------------------------------*/
+void prdebug_sock_tag_list(int indent_level,
+                           struct list_head *sock_tag_list);
+void prdebug_sock_tag_tree(int indent_level,
+                           struct rb_root *sock_tag_tree);
+void prdebug_proc_qtu_data_tree(int indent_level,
+                                struct rb_root *proc_qtu_data_tree);
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree);
+void prdebug_uid_tag_data_tree(int indent_level,
+                               struct rb_root *uid_tag_data_tree);
+void prdebug_tag_stat_tree(int indent_level,
+                           struct rb_root *tag_stat_tree);
+void prdebug_iface_stat_list(int indent_level,
+                             struct list_head *iface_stat_list);
+#else
+/*------------------------------------------*/
+static inline char *pp_tag_t(tag_t *tag)
+{
+        return NULL;
+}
+static inline char *pp_data_counters(struct data_counters *dc, bool showValues)
+{
+        return NULL;
+}
+static inline char *pp_tag_node(struct tag_node *tn)
+{
+        return NULL;
+}
+static inline char *pp_tag_ref(struct tag_ref *tr)
+{
+        return NULL;
+}
+static inline char *pp_tag_stat(struct tag_stat *ts)
+{
+        return NULL;
+}
+static inline char *pp_iface_stat(struct iface_stat *is)
+{
+        return NULL;
+}
+static inline char *pp_sock_tag(struct sock_tag *st)
+{
+        return NULL;
+}
+static inline char *pp_uid_tag_data(struct uid_tag_data *qtd)
+{
+        return NULL;
+}
+static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
+{
+        return NULL;
+}
+/*------------------------------------------*/
+static inline
+void prdebug_sock_tag_list(int indent_level,
+                           struct list_head *sock_tag_list)
+{
+}
+static inline
+void prdebug_sock_tag_tree(int indent_level,
+                           struct rb_root *sock_tag_tree)
+{
+}
+static inline
+void prdebug_proc_qtu_data_tree(int indent_level,
+                                struct rb_root *proc_qtu_data_tree)
+{
+}
+static inline
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
+{
+}
+static inline
+void prdebug_uid_tag_data_tree(int indent_level,
+                               struct rb_root *uid_tag_data_tree)
+{
+}
+static inline
+void prdebug_tag_stat_tree(int indent_level,
+                           struct rb_root *tag_stat_tree)
+{
+}
+static inline
+void prdebug_iface_stat_list(int indent_level,
+                             struct list_head *iface_stat_list)
+{
+}
+#endif
+/*------------------------------------------*/
+const char *netdev_evt_str(int netdev_event);
+#endif  /* ifndef __XT_QTAGUID_PRINT_H__ */
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c
new file mode 100644
index 00000000000..3c72bea2dd6
--- /dev/null
+++ b/net/netfilter/xt_quota2.c
@@ -0,0 +1,381 @@
+/*
+ * xt_quota2 - enhanced xt_quota that can count upwards and in packets
+ * as a minimal accounting match.
+ * by Jan Engelhardt <jengelh@medozas.de>, 2008
+ *
+ * Originally based on xt_quota.c:
+ *      netfilter module to enforce network quotas
+ *      Sam Johnston <samj@samj.net>
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License; either
+ *      version 2 of the License, as published by the Free Software Foundation.
+ */
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_quota2.h>
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+#include <linux/netfilter_ipv4/ipt_ULOG.h>
+#endif
+/**
+ * @lock:       lock to protect quota writers from each other
+ */
+struct xt_quota_counter {
+        u_int64_t quota;
+        spinlock_t lock;
+        struct list_head list;
+        atomic_t ref;
+        char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)];
+        struct proc_dir_entry *procfs_entry;
+};
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+/* Harald's favorite number +1 :D From ipt_ULOG.C */
+static int qlog_nl_event = 112;
+module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(event_num,
+                 "Event number for NETLINK_NFLOG message. 0 disables log."
+                 "111 is what ipt_ULOG uses.");
+static struct sock *nflognl;
+#endif
+static LIST_HEAD(counter_list);
+static DEFINE_SPINLOCK(counter_list_lock);
+static struct proc_dir_entry *proc_xt_quota;
+static unsigned int quota_list_perms = S_IRUGO | S_IWUSR;
+static unsigned int quota_list_uid   = 0;
+static unsigned int quota_list_gid   = 0;
+module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR);
+module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR);
+module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR);
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+static void quota2_log(unsigned int hooknum,
+                       const struct sk_buff *skb,
+                       const struct net_device *in,
+                       const struct net_device *out,
+                       const char *prefix)
+{
+        ulog_packet_msg_t *pm;
+        struct sk_buff *log_skb;
+        size_t size;
+        struct nlmsghdr *nlh;
+        if (!qlog_nl_event)
+                return;
+        size = NLMSG_SPACE(sizeof(*pm));
+        size = max(size, (size_t)NLMSG_GOODSIZE);
+        log_skb = alloc_skb(size, GFP_ATOMIC);
+        if (!log_skb) {
+                pr_err("xt_quota2: cannot alloc skb for logging\n");
+                return;
+        }
+        /* NLMSG_PUT() uses "goto nlmsg_failure" */
+        nlh = NLMSG_PUT(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event,
+                        sizeof(*pm));
+        pm = NLMSG_DATA(nlh);
+        if (skb->tstamp.tv64 == 0)
+                __net_timestamp((struct sk_buff *)skb);
+        pm->data_len = 0;
+        pm->hook = hooknum;
+        if (prefix != NULL)
+                strlcpy(pm->prefix, prefix, sizeof(pm->prefix));
+        else
+                *(pm->prefix) = '\0';
+        if (in)
+                strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name));
+        else
+                pm->indev_name[0] = '\0';
+        if (out)
+                strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
+        else
+                pm->outdev_name[0] = '\0';
+        NETLINK_CB(log_skb).dst_group = 1;
+        pr_debug("throwing 1 packets to netlink group 1\n");
+        netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC);
+nlmsg_failure:  /* Used within NLMSG_PUT() */
+        pr_debug("xt_quota2: error during NLMSG_PUT\n");
+}
+#else
+static void quota2_log(unsigned int hooknum,
+                       const struct sk_buff *skb,
+                       const struct net_device *in,
+                       const struct net_device *out,
+                       const char *prefix)
+{
+}
+#endif  /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */
+static int quota_proc_read(char *page, char **start, off_t offset,
+                           int count, int *eof, void *data)
+{
+        struct xt_quota_counter *e = data;
+        int ret;
+        spin_lock_bh(&e->lock);
+        ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota);
+        spin_unlock_bh(&e->lock);
+        return ret;
+}
+static int quota_proc_write(struct file *file, const char __user *input,
+                            unsigned long size, void *data)
+{
+        struct xt_quota_counter *e = data;
+        char buf[sizeof("18446744073709551616")];
+        if (size > sizeof(buf))
+                size = sizeof(buf);
+        if (copy_from_user(buf, input, size) != 0)
+                return -EFAULT;
+        buf[sizeof(buf)-1] = '\0';
+        spin_lock_bh(&e->lock);
+        e->quota = simple_strtoull(buf, NULL, 0);
+        spin_unlock_bh(&e->lock);
+        return size;
+}
+static struct xt_quota_counter *
+q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon)
+{
+        struct xt_quota_counter *e;
+        unsigned int size;
+        /* Do not need all the procfs things for anonymous counters. */
+        size = anon ? offsetof(typeof(*e), list) : sizeof(*e);
+        e = kmalloc(size, GFP_KERNEL);
+        if (e == NULL)
+                return NULL;
+        e->quota = q->quota;
+        spin_lock_init(&e->lock);
+        if (!anon) {
+                INIT_LIST_HEAD(&e->list);
+                atomic_set(&e->ref, 1);
+                strlcpy(e->name, q->name, sizeof(e->name));
+        }
+        return e;
+}
+/**
+ * q2_get_counter - get ref to counter or create new
+ * @name:       name of counter
+ */
+static struct xt_quota_counter *
+q2_get_counter(const struct xt_quota_mtinfo2 *q)
+{
+        struct proc_dir_entry *p;
+        struct xt_quota_counter *e = NULL;
+        struct xt_quota_counter *new_e;
+        if (*q->name == '\0')
+                return q2_new_counter(q, true);
+        /* No need to hold a lock while getting a new counter */
+        new_e = q2_new_counter(q, false);
+        if (new_e == NULL)
+                goto out;
+        spin_lock_bh(&counter_list_lock);
+        list_for_each_entry(e, &counter_list, list)
+                if (strcmp(e->name, q->name) == 0) {
+                        atomic_inc(&e->ref);
+                        spin_unlock_bh(&counter_list_lock);
+                        kfree(new_e);
+                        pr_debug("xt_quota2: old counter name=%s", e->name);
+                        return e;
+                }
+        e = new_e;
+        pr_debug("xt_quota2: new_counter name=%s", e->name);
+        list_add_tail(&e->list, &counter_list);
+        /* The entry having a refcount of 1 is not directly destructible.
+         * This func has not yet returned the new entry, thus iptables
+         * has not references for destroying this entry.
+         * For another rule to try to destroy it, it would 1st need for this
+         * func* to be re-invoked, acquire a new ref for the same named quota.
+         * Nobody will access the e->procfs_entry either.
+         * So release the lock. */
+        spin_unlock_bh(&counter_list_lock);
+        /* create_proc_entry() is not spin_lock happy */
+        p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms,
+                              proc_xt_quota);
+        if (IS_ERR_OR_NULL(p)) {
+                spin_lock_bh(&counter_list_lock);
+                list_del(&e->list);
+                spin_unlock_bh(&counter_list_lock);
+                goto out;
+        }
+        p->data         = e;
+        p->read_proc    = quota_proc_read;
+        p->write_proc   = quota_proc_write;
+        p->uid          = quota_list_uid;
+        p->gid          = quota_list_gid;
+        return e;
+ out:
+        kfree(e);
+        return NULL;
+}
+static int quota_mt2_check(const struct xt_mtchk_param *par)
+{
+        struct xt_quota_mtinfo2 *q = par->matchinfo;
+        pr_debug("xt_quota2: check() flags=0x%04x", q->flags);
+        if (q->flags & ~XT_QUOTA_MASK)
+                return -EINVAL;
+        q->name[sizeof(q->name)-1] = '\0';
+        if (*q->name == '.' || strchr(q->name, '/') != NULL) {
+                printk(KERN_ERR "xt_quota.3: illegal name\n");
+                return -EINVAL;
+        }
+        q->master = q2_get_counter(q);
+        if (q->master == NULL) {
+                printk(KERN_ERR "xt_quota.3: memory alloc failure\n");
+                return -ENOMEM;
+        }
+        return 0;
+}
+static void quota_mt2_destroy(const struct xt_mtdtor_param *par)
+{
+        struct xt_quota_mtinfo2 *q = par->matchinfo;
+        struct xt_quota_counter *e = q->master;
+        if (*q->name == '\0') {
+                kfree(e);
+                return;
+        }
+        spin_lock_bh(&counter_list_lock);
+        if (!atomic_dec_and_test(&e->ref)) {
+                spin_unlock_bh(&counter_list_lock);
+                return;
+        }
+        list_del(&e->list);
+        remove_proc_entry(e->name, proc_xt_quota);
+        spin_unlock_bh(&counter_list_lock);
+        kfree(e);
+}
+static bool
+quota_mt2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+        struct xt_quota_mtinfo2 *q = (void *)par->matchinfo;
+        struct xt_quota_counter *e = q->master;
+        bool ret = q->flags & XT_QUOTA_INVERT;
+        spin_lock_bh(&e->lock);
+        if (q->flags & XT_QUOTA_GROW) {
+                /*
+                 * While no_change is pointless in "grow" mode, we will
+                 * implement it here simply to have a consistent behavior.
+                 */
+                if (!(q->flags & XT_QUOTA_NO_CHANGE)) {
+                        e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+                }
+                ret = true;
+        } else {
+                if (e->quota >= skb->len) {
+                        if (!(q->flags & XT_QUOTA_NO_CHANGE))
+                                e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+                        ret = !ret;
+                } else {
+                        /* We are transitioning, log that fact. */
+                        if (e->quota) {
+                                quota2_log(par->hooknum,
+                                           skb,
+                                           par->in,
+                                           par->out,
+                                           q->name);
+                        }
+                        /* we do not allow even small packets from now on */
+                        e->quota = 0;
+                }
+        }
+        spin_unlock_bh(&e->lock);
+        return ret;
+}
+static struct xt_match quota_mt2_reg[] __read_mostly = {
+        {
+                .name       = "quota2",
+                .revision   = 3,
+                .family     = NFPROTO_IPV4,
+                .checkentry = quota_mt2_check,
+                .match      = quota_mt2,
+                .destroy    = quota_mt2_destroy,
+                .matchsize  = sizeof(struct xt_quota_mtinfo2),
+                .me         = THIS_MODULE,
+        },
+        {
+                .name       = "quota2",
+                .revision   = 3,
+                .family     = NFPROTO_IPV6,
+                .checkentry = quota_mt2_check,
+                .match      = quota_mt2,
+                .destroy    = quota_mt2_destroy,
+                .matchsize  = sizeof(struct xt_quota_mtinfo2),
+                .me         = THIS_MODULE,
+        },
+};
+static int __init quota_mt2_init(void)
+{
+        int ret;
+        pr_debug("xt_quota2: init()");
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+        nflognl = netlink_kernel_create(&init_net,
+                                        NETLINK_NFLOG, 1, NULL,
+                                        NULL, THIS_MODULE);
+        if (!nflognl)
+                return -ENOMEM;
+#endif
+        proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net);
+        if (proc_xt_quota == NULL)
+                return -EACCES;
+        ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+        if (ret < 0)
+                remove_proc_entry("xt_quota", init_net.proc_net);
+        pr_debug("xt_quota2: init() %d", ret);
+        return ret;
+}
+static void __exit quota_mt2_exit(void)
+{
+        xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+        remove_proc_entry("xt_quota", init_net.proc_net);
+}
+module_init(quota_mt2_init);
+module_exit(quota_mt2_exit);
+MODULE_DESCRIPTION("Xtables: countdown quota match; up counter");
+MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_quota2");
+MODULE_ALIAS("ip6t_quota2");
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
new file mode 100644
index 00000000000..e6b5190dadd
--- /dev/null
+++ b/net/rds/xlist.h
@@ -0,0 +1,80 @@
+#ifndef _LINUX_XLIST_H
+#define _LINUX_XLIST_H
+#include <linux/stddef.h>
+#include <linux/poison.h>
+#include <linux/prefetch.h>
+#include <asm/system.h>
+struct xlist_head {
+        struct xlist_head *next;
+};
+static inline void INIT_XLIST_HEAD(struct xlist_head *list)
+{
+        list->next = NULL;
+}
+static inline int xlist_empty(struct xlist_head *head)
+{
+        return head->next == NULL;
+}
+static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail,
+                             struct xlist_head *head)
+{
+        struct xlist_head *cur;
+        struct xlist_head *check;
+        while (1) {
+                cur = head->next;
+                tail->next = cur;
+                check = cmpxchg(&head->next, cur, new);
+                if (check == cur)
+                        break;
+        }
+}
+static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
+{
+        struct xlist_head *cur;
+        struct xlist_head *check;
+        struct xlist_head *next;
+        while (1) {
+                cur = head->next;
+                if (!cur)
+                        goto out;
+                next = cur->next;
+                check = cmpxchg(&head->next, cur, next);
+                if (check == cur)
+                        goto out;
+        }
+out:
+        return cur;
+}
+static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
+{
+        struct xlist_head *cur;
+        cur = head->next;
+        if (!cur)
+                return NULL;
+        head->next = cur->next;
+        return cur;
+}
+static inline void xlist_splice(struct xlist_head *list,
+                                struct xlist_head *head)
+{
+        struct xlist_head *cur;
+        WARN_ON(head->next);
+        cur = xchg(&list->next, NULL);
+        head->next = cur;
+}
+#endif
diff --git a/net/tipc/log.h b/net/tipc/log.h
new file mode 100644
index 00000000000..2248d96238e
--- /dev/null
+++ b/net/tipc/log.h
@@ -0,0 +1,67 @@
+/*
+ * net/tipc/log.h: Include file for TIPC print buffer routines
+ *
+ * Copyright (c) 1997-2006, Ericsson AB
+ * Copyright (c) 2005-2007, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _TIPC_LOG_H
+#define _TIPC_LOG_H
+/**
+ * struct print_buf - TIPC print buffer structure
+ * @buf: pointer to character array containing print buffer contents
+ * @size: size of character array
+ * @crs: pointer to first unused space in character array (i.e. final NUL)
+ * @echo: echo output to system console if non-zero
+ */
+struct print_buf {
+        char *buf;
+        u32 size;
+        char *crs;
+        int echo;
+};
+#define TIPC_PB_MIN_SIZE 64     /* minimum size for a print buffer's array */
+#define TIPC_PB_MAX_STR 512     /* max printable string (with trailing NUL) */
+void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size);
+int  tipc_printbuf_validate(struct print_buf *pb);
+int tipc_log_resize(int log_size);
+struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area,
+                                    int req_tlv_space);
+struct sk_buff *tipc_log_dump(void);
+#endif
author	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-22 10:38:37 -0500
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-22 10:38:37 -0500
commit	fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch)
tree	a57612d1888735a2ec7972891b68c1ac5ec8faea /net
parent	8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff)