143 files changed, 15283 insertions, 2475 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 9ed7c0e7dc17..941f2a324d3a 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -576,7 +576,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
        if (!app)
                goto err2;
-        err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0);
+        err = dev_mc_add(dev, appl->proto.group_address);
        if (err < 0)
                goto err3;
@@ -616,7 +616,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
        garp_pdu_queue(app);
        garp_queue_xmit(app);
-        dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0);
+        dev_mc_del(dev, appl->proto.group_address);
        kfree(app);
        garp_release_port(dev);
 }
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 97da977c2a23..3c1c8c14e929 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -357,13 +357,13 @@ static void vlan_sync_address(struct net_device *dev,
         * the new address */
        if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
            !compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-                dev_unicast_delete(dev, vlandev->dev_addr);
+                dev_uc_del(dev, vlandev->dev_addr);
        /* vlan address was equal to the old address and is different from
         * the new address */
        if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
            compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-                dev_unicast_add(dev, vlandev->dev_addr);
+                dev_uc_add(dev, vlandev->dev_addr);
        memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
 }
@@ -533,6 +533,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
                }
                unregister_netdevice_many(&list);
                break;
+        case NETDEV_PRE_TYPE_CHANGE:
+                /* Forbid underlaying device to change its type. */
+                return NOTIFY_BAD;
        }
 out:
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 29b6348c8d4d..b5249c5fd4d3 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -470,7 +470,7 @@ static int vlan_dev_open(struct net_device *dev)
                return -ENETDOWN;
        if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
-                err = dev_unicast_add(real_dev, dev->dev_addr);
+                err = dev_uc_add(real_dev, dev->dev_addr);
                if (err < 0)
                        goto out;
        }
@@ -499,7 +499,7 @@ clear_allmulti:
                dev_set_allmulti(real_dev, -1);
 del_unicast:
        if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-                dev_unicast_delete(real_dev, dev->dev_addr);
+                dev_uc_del(real_dev, dev->dev_addr);
 out:
        netif_carrier_off(dev);
        return err;
@@ -514,14 +514,14 @@ static int vlan_dev_stop(struct net_device *dev)
                vlan_gvrp_request_leave(dev);
        dev_mc_unsync(real_dev, dev);
-        dev_unicast_unsync(real_dev, dev);
+        dev_uc_unsync(real_dev, dev);
        if (dev->flags & IFF_ALLMULTI)
                dev_set_allmulti(real_dev, -1);
        if (dev->flags & IFF_PROMISC)
                dev_set_promiscuity(real_dev, -1);
        if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-                dev_unicast_delete(real_dev, dev->dev_addr);
+                dev_uc_del(real_dev, dev->dev_addr);
        netif_carrier_off(dev);
        return 0;
@@ -540,13 +540,13 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
                goto out;
        if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) {
-                err = dev_unicast_add(real_dev, addr->sa_data);
+                err = dev_uc_add(real_dev, addr->sa_data);
                if (err < 0)
                        return err;
        }
        if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-                dev_unicast_delete(real_dev, dev->dev_addr);
+                dev_uc_del(real_dev, dev->dev_addr);
 out:
        memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
@@ -663,7 +663,7 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 {
        dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
-        dev_unicast_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
+        dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
 }
 /*
diff --git a/net/Kconfig b/net/Kconfig
index 041c35edb763..0d68b40fc0e6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -186,6 +186,7 @@ source "net/sctp/Kconfig"
 source "net/rds/Kconfig"
 source "net/tipc/Kconfig"
 source "net/atm/Kconfig"
+source "net/l2tp/Kconfig"
 source "net/802/Kconfig"
 source "net/bridge/Kconfig"
 source "net/dsa/Kconfig"
@@ -203,6 +204,11 @@ source "net/ieee802154/Kconfig"
 source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
+config RPS
+        boolean
+        depends on SMP && SYSFS
+        default y
 menu "Network testing"
 config NET_PKTGEN
@@ -275,5 +281,7 @@ source "net/wimax/Kconfig"
 source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
+source "net/caif/Kconfig"
 endif   # if NET
diff --git a/net/Makefile b/net/Makefile
index 1542e7268a7b..cb7bdc1210cb 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)            += sunrpc/
 obj-$(CONFIG_AF_RXRPC)          += rxrpc/
 obj-$(CONFIG_ATM)               += atm/
+obj-$(CONFIG_L2TP)              += l2tp/
 obj-$(CONFIG_DECNET)            += decnet/
 obj-$(CONFIG_ECONET)            += econet/
 obj-$(CONFIG_PHONET)            += phonet/
@@ -56,6 +57,7 @@ obj-$(CONFIG_NETLABEL)		+= netlabel/
 obj-$(CONFIG_IUCV)              += iucv/
 obj-$(CONFIG_RFKILL)            += rfkill/
 obj-$(CONFIG_NET_9P)            += 9p/
+obj-$(CONFIG_CAIF)              += caif/
 ifneq ($(CONFIG_DCB),)
 obj-y                           += dcb/
 endif
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7b02967fbbe7..c410b93fda2e 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -782,7 +782,7 @@ static int atif_ioctl(int cmd, void __user *arg)
                                                atrtr_create(&rtdef, dev);
                                        }
                        }
-                        dev_mc_add(dev, aarp_mcast, 6, 1);
+                        dev_mc_add_global(dev, aarp_mcast);
                        return 0;
                case SIOCGIFADDR:
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 696e218436e5..6262aeae398e 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -407,7 +407,6 @@ EXPORT_SYMBOL(atm_proc_root);
 int atm_proc_dev_register(struct atm_dev *dev)
 {
-        int digits, num;
        int error;
        /* No proc info */
@@ -415,16 +414,9 @@ int atm_proc_dev_register(struct atm_dev *dev)
                return 0;
        error = -ENOMEM;
-        digits = 0;
+        dev->proc_name = kasprintf(GFP_KERNEL, "%s:%d", dev->type, dev->number);
-        for (num = dev->number; num; num /= 10)
-                digits++;
-        if (!digits)
-                digits++;
-        dev->proc_name = kmalloc(strlen(dev->type) + digits + 2, GFP_KERNEL);
        if (!dev->proc_name)
                goto err_out;
-        sprintf(dev->proc_name, "%s:%d", dev->type, dev->number);
        dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root,
                                           &proc_atm_dev_ops, dev);
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 5643a2391e76..d48b33f4d4ba 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -88,7 +88,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
                memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
                r->len = htons(ETH_ALEN * 2);
        } else {
-                struct dev_mc_list *dmi = dev->mc_list;
+                struct netdev_hw_addr *ha;
                int i, len = skb->len;
                if (dev->flags & IFF_BROADCAST) {
@@ -98,12 +98,12 @@ static void bnep_net_set_mc_list(struct net_device *dev)
                /* FIXME: We should group addresses here. */
-                for (i = 0;
+                i = 0;
-                     i < netdev_mc_count(dev) && i < BNEP_MAX_MULTICAST_FILTERS;
+                netdev_for_each_mc_addr(ha, dev) {
-                     i++) {
+                        if (i == BNEP_MAX_MULTICAST_FILTERS)
-                        memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
+                                break;
-                        memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
+                        memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
-                        dmi = dmi->next;
+                        memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
                }
                r->len = htons(skb->len - len);
        }
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 90a9024e5c1e..5b8a6e73b02f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
        const unsigned char *dest = skb->data;
        struct net_bridge_fdb_entry *dst;
        struct net_bridge_mdb_entry *mdst;
+        struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
-        BR_INPUT_SKB_CB(skb)->brdev = dev;
+        brstats->tx_packets++;
+        brstats->tx_bytes += skb->len;
-        dev->stats.tx_packets++;
+        BR_INPUT_SKB_CB(skb)->brdev = dev;
-        dev->stats.tx_bytes += skb->len;
        skb_reset_mac_header(skb);
        skb_pull(skb, ETH_HLEN);
@@ -81,6 +82,31 @@ static int br_dev_stop(struct net_device *dev)
        return 0;
 }
+static struct net_device_stats *br_get_stats(struct net_device *dev)
+{
+        struct net_bridge *br = netdev_priv(dev);
+        struct net_device_stats *stats = &dev->stats;
+        struct br_cpu_netstats sum = { 0 };
+        unsigned int cpu;
+        for_each_possible_cpu(cpu) {
+                const struct br_cpu_netstats *bstats
+                        = per_cpu_ptr(br->stats, cpu);
+                sum.tx_bytes   += bstats->tx_bytes;
+                sum.tx_packets += bstats->tx_packets;
+                sum.rx_bytes   += bstats->rx_bytes;
+                sum.rx_packets += bstats->rx_packets;
+        }
+        stats->tx_bytes   = sum.tx_bytes;
+        stats->tx_packets = sum.tx_packets;
+        stats->rx_bytes   = sum.rx_bytes;
+        stats->rx_packets = sum.rx_packets;
+        return stats;
+}
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
        struct net_bridge *br = netdev_priv(dev);
@@ -180,19 +206,28 @@ static const struct net_device_ops br_netdev_ops = {
        .ndo_open                = br_dev_open,
        .ndo_stop                = br_dev_stop,
        .ndo_start_xmit          = br_dev_xmit,
+        .ndo_get_stats           = br_get_stats,
        .ndo_set_mac_address     = br_set_mac_address,
        .ndo_set_multicast_list  = br_dev_set_multicast_list,
        .ndo_change_mtu          = br_change_mtu,
        .ndo_do_ioctl            = br_dev_ioctl,
 };
+static void br_dev_free(struct net_device *dev)
+{
+        struct net_bridge *br = netdev_priv(dev);
+        free_percpu(br->stats);
+        free_netdev(dev);
+}
 void br_dev_setup(struct net_device *dev)
 {
        random_ether_addr(dev->dev_addr);
        ether_setup(dev);
        dev->netdev_ops = &br_netdev_ops;
-        dev->destructor = free_netdev;
+        dev->destructor = br_dev_free;
        SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
        dev->tx_queue_len = 0;
        dev->priv_flags = IFF_EBRIDGE;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 0b6b1f2ff7ac..521439333316 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -186,6 +186,12 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name)
        br = netdev_priv(dev);
        br->dev = dev;
+        br->stats = alloc_percpu(struct br_cpu_netstats);
+        if (!br->stats) {
+                free_netdev(dev);
+                return NULL;
+        }
        spin_lock_init(&br->lock);
        INIT_LIST_HEAD(&br->port_list);
        spin_lock_init(&br->hash_lock);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index a82dde2d2ead..e7f4c1d02f57 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -24,9 +24,11 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 static int br_pass_frame_up(struct sk_buff *skb)
 {
        struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+        struct net_bridge *br = netdev_priv(brdev);
+        struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
-        brdev->stats.rx_packets++;
+        brstats->rx_packets++;
-        brdev->stats.rx_bytes += skb->len;
+        brstats->rx_bytes += skb->len;
        indev = skb->dev;
        skb->dev = brdev;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f29ada827a6a..3fe86ffc069c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1003,8 +1003,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
        if (!pskb_may_pull(skb2, sizeof(*ih)))
                goto out;
-        iph = ip_hdr(skb2);
        switch (skb2->ip_summed) {
        case CHECKSUM_COMPLETE:
                if (!csum_fold(skb2->csum))
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 763a3ec292e5..1413b72acc7f 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -82,6 +82,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
        case NETDEV_UNREGISTER:
                br_del_if(br, dev);
                break;
+        case NETDEV_PRE_TYPE_CHANGE:
+                /* Forbid underlaying device to change its type. */
+                return NOTIFY_BAD;
        }
        /* Events that may cause spanning tree to refresh */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 846d7d1e2075..791d4ab0fd4d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -135,6 +135,14 @@ struct net_bridge
        spinlock_t                      lock;
        struct list_head                port_list;
        struct net_device               *dev;
+        struct br_cpu_netstats __percpu {
+                unsigned long   rx_packets;
+                unsigned long   rx_bytes;
+                unsigned long   tx_packets;
+                unsigned long   tx_bytes;
+        } *stats;
        spinlock_t                      hash_lock;
        struct hlist_head               hash[BR_HASH_SIZE];
        unsigned long                   feature_mask;
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
new file mode 100644
index 000000000000..cd1daf6008bd
--- /dev/null
+++ b/net/caif/Kconfig
@@ -0,0 +1,48 @@
+#
+# CAIF net configurations
+#
+#menu "CAIF Support"
+comment "CAIF Support"
+menuconfig CAIF
+        tristate "Enable CAIF support"
+        select CRC_CCITT
+        default n
+        ---help---
+        The "Communication CPU to Application CPU Interface" (CAIF) is a packet
+        based connection-oriented MUX protocol developed by ST-Ericsson for use
+        with its modems. It is accessed from user space as sockets (PF_CAIF).
+        Say Y (or M) here if you build for a phone product (e.g. Android or
+        MeeGo ) that uses CAIF as transport, if unsure say N.
+        If you select to build it as module then CAIF_NETDEV also needs to be
+        built as modules. You will also need to say yes to any CAIF physical
+        devices that your platform requires.
+        See Documentation/networking/caif for a further explanation on how to
+        use and configure CAIF.
+if CAIF
+config  CAIF_DEBUG
+        bool "Enable Debug"
+        default n
+        --- help ---
+        Enable the inclusion of debug code in the CAIF stack.
+        Be aware that doing this will impact performance.
+        If unsure say N.
+config CAIF_NETDEV
+        tristate "CAIF GPRS Network device"
+        default CAIF
+        ---help---
+        Say Y if you will be using a CAIF based GPRS network device.
+        This can be either built-in or a loadable module,
+        If you select to build it as a built-in then the main CAIF device must
+        also be a built-in.
+        If unsure say Y.
+endif
+#endmenu
diff --git a/net/caif/Makefile b/net/caif/Makefile
new file mode 100644
index 000000000000..34852af2595e
--- /dev/null
+++ b/net/caif/Makefile
@@ -0,0 +1,26 @@
+ifeq ($(CONFIG_CAIF_DEBUG),1)
+CAIF_DBG_FLAGS := -DDEBUG
+endif
+ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS)
+caif-objs := caif_dev.o \
+        cfcnfg.o cfmuxl.o cfctrl.o  \
+        cffrml.o cfveil.o cfdbgl.o\
+        cfserl.o cfdgml.o  \
+        cfrfml.o cfvidl.o cfutill.o \
+        cfsrvl.o cfpkt_skbuff.o caif_config_util.o
+clean-dirs:= .tmp_versions
+clean-files:= \
+        Module.symvers \
+        modules.order \
+        *.cmd \
+        *.o \
+        *~
+obj-$(CONFIG_CAIF) += caif.o
+obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
+obj-$(CONFIG_CAIF) += caif_socket.o
+export-objs := caif.o
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
new file mode 100644
index 000000000000..6f36580366f0
--- /dev/null
+++ b/net/caif/caif_config_util.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <net/caif/cfctrl.h>
+#include <net/caif/cfcnfg.h>
+#include <net/caif/caif_dev.h>
+int connect_req_to_link_param(struct cfcnfg *cnfg,
+                                struct caif_connect_request *s,
+                                struct cfctrl_link_param *l)
+{
+        struct dev_info *dev_info;
+        enum cfcnfg_phy_preference pref;
+        memset(l, 0, sizeof(*l));
+        l->priority = s->priority;
+        if (s->link_name[0] != '\0')
+                l->phyid = cfcnfg_get_named(cnfg, s->link_name);
+        else {
+                switch (s->link_selector) {
+                case CAIF_LINK_HIGH_BANDW:
+                        pref = CFPHYPREF_HIGH_BW;
+                        break;
+                case CAIF_LINK_LOW_LATENCY:
+                        pref = CFPHYPREF_LOW_LAT;
+                        break;
+                default:
+                        return -EINVAL;
+                }
+                dev_info = cfcnfg_get_phyid(cnfg, pref);
+                if (dev_info == NULL)
+                        return -ENODEV;
+                l->phyid = dev_info->id;
+        }
+        switch (s->protocol) {
+        case CAIFPROTO_AT:
+                l->linktype = CFCTRL_SRV_VEI;
+                if (s->sockaddr.u.at.type == CAIF_ATTYPE_PLAIN)
+                        l->chtype = 0x02;
+                else
+                        l->chtype = s->sockaddr.u.at.type;
+                l->endpoint = 0x00;
+                break;
+        case CAIFPROTO_DATAGRAM:
+                l->linktype = CFCTRL_SRV_DATAGRAM;
+                l->chtype = 0x00;
+                l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
+                break;
+        case CAIFPROTO_DATAGRAM_LOOP:
+                l->linktype = CFCTRL_SRV_DATAGRAM;
+                l->chtype = 0x03;
+                l->endpoint = 0x00;
+                l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
+                break;
+        case CAIFPROTO_RFM:
+                l->linktype = CFCTRL_SRV_RFM;
+                l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
+                strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+                        sizeof(l->u.rfm.volume)-1);
+                l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
+                break;
+        case CAIFPROTO_UTIL:
+                l->linktype = CFCTRL_SRV_UTIL;
+                l->endpoint = 0x00;
+                l->chtype = 0x00;
+                strncpy(l->u.utility.name, s->sockaddr.u.util.service,
+                        sizeof(l->u.utility.name)-1);
+                l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
+                caif_assert(sizeof(l->u.utility.name) > 10);
+                l->u.utility.paramlen = s->param.size;
+                if (l->u.utility.paramlen > sizeof(l->u.utility.params))
+                        l->u.utility.paramlen = sizeof(l->u.utility.params);
+                memcpy(l->u.utility.params, s->param.data,
+                       l->u.utility.paramlen);
+                break;
+        default:
+                return -EINVAL;
+        }
+        return 0;
+}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
new file mode 100644
index 000000000000..e84837e1bc86
--- /dev/null
+++ b/net/caif/caif_dev.c
@@ -0,0 +1,413 @@
+/*
+ * CAIF Interface registration.
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Borrowed heavily from file: pn_dev.c. Thanks to
+ *  Remi Denis-Courmont <remi.denis-courmont@nokia.com>
+ *  and Sakari Ailus <sakari.ailus@nokia.com>
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <net/netns/generic.h>
+#include <net/net_namespace.h>
+#include <net/pkt_sched.h>
+#include <net/caif/caif_device.h>
+#include <net/caif/caif_dev.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfcnfg.h>
+MODULE_LICENSE("GPL");
+#define TIMEOUT (HZ*5)
+/* Used for local tracking of the CAIF net devices */
+struct caif_device_entry {
+        struct cflayer layer;
+        struct list_head list;
+        atomic_t in_use;
+        atomic_t state;
+        u16 phyid;
+        struct net_device *netdev;
+        wait_queue_head_t event;
+};
+struct caif_device_entry_list {
+        struct list_head list;
+        /* Protects simulanous deletes in list */
+        spinlock_t lock;
+};
+struct caif_net {
+        struct caif_device_entry_list caifdevs;
+};
+static int caif_net_id;
+static struct cfcnfg *cfg;
+static struct caif_device_entry_list *caif_device_list(struct net *net)
+{
+        struct caif_net *caifn;
+        BUG_ON(!net);
+        caifn = net_generic(net, caif_net_id);
+        BUG_ON(!caifn);
+        return &caifn->caifdevs;
+}
+/* Allocate new CAIF device. */
+static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
+{
+        struct caif_device_entry_list *caifdevs;
+        struct caif_device_entry *caifd;
+        caifdevs = caif_device_list(dev_net(dev));
+        BUG_ON(!caifdevs);
+        caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC);
+        if (!caifd)
+                return NULL;
+        caifd->netdev = dev;
+        list_add(&caifd->list, &caifdevs->list);
+        init_waitqueue_head(&caifd->event);
+        return caifd;
+}
+static struct caif_device_entry *caif_get(struct net_device *dev)
+{
+        struct caif_device_entry_list *caifdevs =
+            caif_device_list(dev_net(dev));
+        struct caif_device_entry *caifd;
+        BUG_ON(!caifdevs);
+        list_for_each_entry(caifd, &caifdevs->list, list) {
+                if (caifd->netdev == dev)
+                        return caifd;
+        }
+        return NULL;
+}
+static void caif_device_destroy(struct net_device *dev)
+{
+        struct caif_device_entry_list *caifdevs =
+            caif_device_list(dev_net(dev));
+        struct caif_device_entry *caifd;
+        ASSERT_RTNL();
+        if (dev->type != ARPHRD_CAIF)
+                return;
+        spin_lock_bh(&caifdevs->lock);
+        caifd = caif_get(dev);
+        if (caifd == NULL) {
+                spin_unlock_bh(&caifdevs->lock);
+                return;
+        }
+        list_del(&caifd->list);
+        spin_unlock_bh(&caifdevs->lock);
+        kfree(caifd);
+        return;
+}
+static int transmit(struct cflayer *layer, struct cfpkt *pkt)
+{
+        struct caif_device_entry *caifd =
+            container_of(layer, struct caif_device_entry, layer);
+        struct sk_buff *skb, *skb2;
+        int ret = -EINVAL;
+        skb = cfpkt_tonative(pkt);
+        skb->dev = caifd->netdev;
+        /*
+         * Don't allow SKB to be destroyed upon error, but signal resend
+         * notification to clients. We can't rely on the return value as
+         * congestion (NET_XMIT_CN) sometimes drops the packet, sometimes don't.
+         */
+        if (netif_queue_stopped(caifd->netdev))
+                return -EAGAIN;
+        skb2 = skb_get(skb);
+        ret = dev_queue_xmit(skb2);
+        if (!ret)
+                kfree_skb(skb);
+        else
+                return -EAGAIN;
+        return 0;
+}
+static int modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
+{
+        struct caif_device_entry *caifd;
+        struct caif_dev_common *caifdev;
+        caifd = container_of(layr, struct caif_device_entry, layer);
+        caifdev = netdev_priv(caifd->netdev);
+        if (ctrl == _CAIF_MODEMCMD_PHYIF_USEFULL) {
+                atomic_set(&caifd->in_use, 1);
+                wake_up_interruptible(&caifd->event);
+        } else if (ctrl == _CAIF_MODEMCMD_PHYIF_USELESS) {
+                atomic_set(&caifd->in_use, 0);
+                wake_up_interruptible(&caifd->event);
+        }
+        return 0;
+}
+/*
+ * Stuff received packets to associated sockets.
+ * On error, returns non-zero and releases the skb.
+ */
+static int receive(struct sk_buff *skb, struct net_device *dev,
+                   struct packet_type *pkttype, struct net_device *orig_dev)
+{
+        struct net *net;
+        struct cfpkt *pkt;
+        struct caif_device_entry *caifd;
+        net = dev_net(dev);
+        pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
+        caifd = caif_get(dev);
+        if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+                return NET_RX_DROP;
+        if (caifd->layer.up->receive(caifd->layer.up, pkt))
+                return NET_RX_DROP;
+        return 0;
+}
+static struct packet_type caif_packet_type __read_mostly = {
+        .type = cpu_to_be16(ETH_P_CAIF),
+        .func = receive,
+};
+static void dev_flowctrl(struct net_device *dev, int on)
+{
+        struct caif_device_entry *caifd = caif_get(dev);
+        if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+                return;
+        caifd->layer.up->ctrlcmd(caifd->layer.up,
+                                 on ?
+                                 _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND :
+                                 _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
+                                 caifd->layer.id);
+}
+/* notify Caif of device events */
+static int caif_device_notify(struct notifier_block *me, unsigned long what,
+                              void *arg)
+{
+        struct net_device *dev = arg;
+        struct caif_device_entry *caifd = NULL;
+        struct caif_dev_common *caifdev;
+        enum cfcnfg_phy_preference pref;
+        int res = -EINVAL;
+        enum cfcnfg_phy_type phy_type;
+        if (dev->type != ARPHRD_CAIF)
+                return 0;
+        switch (what) {
+        case NETDEV_REGISTER:
+                pr_info("CAIF: %s():register %s\n", __func__, dev->name);
+                caifd = caif_device_alloc(dev);
+                if (caifd == NULL)
+                        break;
+                caifdev = netdev_priv(dev);
+                caifdev->flowctrl = dev_flowctrl;
+                atomic_set(&caifd->state, what);
+                res = 0;
+                break;
+        case NETDEV_UP:
+                pr_info("CAIF: %s(): up %s\n", __func__, dev->name);
+                caifd = caif_get(dev);
+                if (caifd == NULL)
+                        break;
+                caifdev = netdev_priv(dev);
+                if (atomic_read(&caifd->state) == NETDEV_UP) {
+                        pr_info("CAIF: %s():%s already up\n",
+                                __func__, dev->name);
+                        break;
+                }
+                atomic_set(&caifd->state, what);
+                caifd->layer.transmit = transmit;
+                caifd->layer.modemcmd = modemcmd;
+                if (caifdev->use_frag)
+                        phy_type = CFPHYTYPE_FRAG;
+                else
+                        phy_type = CFPHYTYPE_CAIF;
+                switch (caifdev->link_select) {
+                case CAIF_LINK_HIGH_BANDW:
+                        pref = CFPHYPREF_LOW_LAT;
+                        break;
+                case CAIF_LINK_LOW_LATENCY:
+                        pref = CFPHYPREF_HIGH_BW;
+                        break;
+                default:
+                        pref = CFPHYPREF_HIGH_BW;
+                        break;
+                }
+                cfcnfg_add_phy_layer(get_caif_conf(),
+                                     phy_type,
+                                     dev,
+                                     &caifd->layer,
+                                     &caifd->phyid,
+                                     pref,
+                                     caifdev->use_fcs,
+                                     caifdev->use_stx);
+                strncpy(caifd->layer.name, dev->name,
+                        sizeof(caifd->layer.name) - 1);
+                caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+                break;
+        case NETDEV_GOING_DOWN:
+                caifd = caif_get(dev);
+                if (caifd == NULL)
+                        break;
+                pr_info("CAIF: %s():going down %s\n", __func__, dev->name);
+                if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
+                        atomic_read(&caifd->state) == NETDEV_DOWN)
+                        break;
+                atomic_set(&caifd->state, what);
+                if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+                        return -EINVAL;
+                caifd->layer.up->ctrlcmd(caifd->layer.up,
+                                         _CAIF_CTRLCMD_PHYIF_DOWN_IND,
+                                         caifd->layer.id);
+                res = wait_event_interruptible_timeout(caifd->event,
+                                        atomic_read(&caifd->in_use) == 0,
+                                        TIMEOUT);
+                break;
+        case NETDEV_DOWN:
+                caifd = caif_get(dev);
+                if (caifd == NULL)
+                        break;
+                pr_info("CAIF: %s(): down %s\n", __func__, dev->name);
+                if (atomic_read(&caifd->in_use))
+                        pr_warning("CAIF: %s(): "
+                                   "Unregistering an active CAIF device: %s\n",
+                                   __func__, dev->name);
+                cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
+                atomic_set(&caifd->state, what);
+                break;
+        case NETDEV_UNREGISTER:
+                caifd = caif_get(dev);
+                pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name);
+                atomic_set(&caifd->state, what);
+                caif_device_destroy(dev);
+                break;
+        }
+        return 0;
+}
+static struct notifier_block caif_device_notifier = {
+        .notifier_call = caif_device_notify,
+        .priority = 0,
+};
+struct cfcnfg *get_caif_conf(void)
+{
+        return cfg;
+}
+EXPORT_SYMBOL(get_caif_conf);
+int caif_connect_client(struct caif_connect_request *conn_req,
+                           struct cflayer *client_layer)
+{
+        struct cfctrl_link_param param;
+        if (connect_req_to_link_param(get_caif_conf(), conn_req, &param) == 0)
+                /* Hook up the adaptation layer. */
+                return cfcnfg_add_adaptation_layer(get_caif_conf(),
+                                                &param, client_layer);
+        return -EINVAL;
+        caif_assert(0);
+}
+EXPORT_SYMBOL(caif_connect_client);
+int caif_disconnect_client(struct cflayer *adap_layer)
+{
+        return cfcnfg_del_adapt_layer(get_caif_conf(), adap_layer);
+}
+EXPORT_SYMBOL(caif_disconnect_client);
+/* Per-namespace Caif devices handling */
+static int caif_init_net(struct net *net)
+{
+        struct caif_net *caifn = net_generic(net, caif_net_id);
+        INIT_LIST_HEAD(&caifn->caifdevs.list);
+        spin_lock_init(&caifn->caifdevs.lock);
+        return 0;
+}
+static void caif_exit_net(struct net *net)
+{
+        struct net_device *dev;
+        int res;
+        rtnl_lock();
+        for_each_netdev(net, dev) {
+                if (dev->type != ARPHRD_CAIF)
+                        continue;
+                res = dev_close(dev);
+                caif_device_destroy(dev);
+        }
+        rtnl_unlock();
+}
+static struct pernet_operations caif_net_ops = {
+        .init = caif_init_net,
+        .exit = caif_exit_net,
+        .id   = &caif_net_id,
+        .size = sizeof(struct caif_net),
+};
+/* Initialize Caif devices list */
+static int __init caif_device_init(void)
+{
+        int result;
+        cfg = cfcnfg_create();
+        if (!cfg) {
+                pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__);
+                goto err_cfcnfg_create_failed;
+        }
+        result = register_pernet_device(&caif_net_ops);
+        if (result) {
+                kfree(cfg);
+                cfg = NULL;
+                return result;
+        }
+        dev_add_pack(&caif_packet_type);
+        register_netdevice_notifier(&caif_device_notifier);
+        return result;
+err_cfcnfg_create_failed:
+        return -ENODEV;
+}
+static void __exit caif_device_exit(void)
+{
+        dev_remove_pack(&caif_packet_type);
+        unregister_pernet_device(&caif_net_ops);
+        unregister_netdevice_notifier(&caif_device_notifier);
+        cfcnfg_remove(cfg);
+}
+module_init(caif_device_init);
+module_exit(caif_device_exit);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
new file mode 100644
index 000000000000..cdf62b9fefac
--- /dev/null
+++ b/net/caif/caif_socket.c
@@ -0,0 +1,1391 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland sjur.brandeland@stericsson.com
+ *              Per Sigmond per.sigmond@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/tcp.h>
+#include <linux/uaccess.h>
+#include <asm/atomic.h>
+#include <linux/caif/caif_socket.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/caif_dev.h>
+#include <net/caif/cfpkt.h>
+MODULE_LICENSE("GPL");
+#define CHNL_SKT_READ_QUEUE_HIGH 200
+#define CHNL_SKT_READ_QUEUE_LOW 100
+static int caif_sockbuf_size = 40000;
+static atomic_t caif_nr_socks = ATOMIC_INIT(0);
+#define CONN_STATE_OPEN_BIT           1
+#define CONN_STATE_PENDING_BIT        2
+#define CONN_STATE_PEND_DESTROY_BIT   3
+#define CONN_REMOTE_SHUTDOWN_BIT      4
+#define TX_FLOW_ON_BIT                1
+#define RX_FLOW_ON_BIT                2
+#define STATE_IS_OPEN(cf_sk) test_bit(CONN_STATE_OPEN_BIT,\
+                                    (void *) &(cf_sk)->conn_state)
+#define STATE_IS_REMOTE_SHUTDOWN(cf_sk) test_bit(CONN_REMOTE_SHUTDOWN_BIT,\
+                                    (void *) &(cf_sk)->conn_state)
+#define STATE_IS_PENDING(cf_sk) test_bit(CONN_STATE_PENDING_BIT,\
+                                       (void *) &(cf_sk)->conn_state)
+#define STATE_IS_PENDING_DESTROY(cf_sk) test_bit(CONN_STATE_PEND_DESTROY_BIT,\
+                                       (void *) &(cf_sk)->conn_state)
+#define SET_STATE_PENDING_DESTROY(cf_sk) set_bit(CONN_STATE_PEND_DESTROY_BIT,\
+                                    (void *) &(cf_sk)->conn_state)
+#define SET_STATE_OPEN(cf_sk) set_bit(CONN_STATE_OPEN_BIT,\
+                                    (void *) &(cf_sk)->conn_state)
+#define SET_STATE_CLOSED(cf_sk) clear_bit(CONN_STATE_OPEN_BIT,\
+                                        (void *) &(cf_sk)->conn_state)
+#define SET_PENDING_ON(cf_sk) set_bit(CONN_STATE_PENDING_BIT,\
+                                    (void *) &(cf_sk)->conn_state)
+#define SET_PENDING_OFF(cf_sk) clear_bit(CONN_STATE_PENDING_BIT,\
+                                       (void *) &(cf_sk)->conn_state)
+#define SET_REMOTE_SHUTDOWN(cf_sk) set_bit(CONN_REMOTE_SHUTDOWN_BIT,\
+                                    (void *) &(cf_sk)->conn_state)
+#define SET_REMOTE_SHUTDOWN_OFF(dev) clear_bit(CONN_REMOTE_SHUTDOWN_BIT,\
+                                    (void *) &(dev)->conn_state)
+#define RX_FLOW_IS_ON(cf_sk) test_bit(RX_FLOW_ON_BIT,\
+                                    (void *) &(cf_sk)->flow_state)
+#define TX_FLOW_IS_ON(cf_sk) test_bit(TX_FLOW_ON_BIT,\
+                                    (void *) &(cf_sk)->flow_state)
+#define SET_RX_FLOW_OFF(cf_sk) clear_bit(RX_FLOW_ON_BIT,\
+                                       (void *) &(cf_sk)->flow_state)
+#define SET_RX_FLOW_ON(cf_sk) set_bit(RX_FLOW_ON_BIT,\
+                                    (void *) &(cf_sk)->flow_state)
+#define SET_TX_FLOW_OFF(cf_sk) clear_bit(TX_FLOW_ON_BIT,\
+                                       (void *) &(cf_sk)->flow_state)
+#define SET_TX_FLOW_ON(cf_sk) set_bit(TX_FLOW_ON_BIT,\
+                                    (void *) &(cf_sk)->flow_state)
+#define SKT_READ_FLAG 0x01
+#define SKT_WRITE_FLAG 0x02
+static struct dentry *debugfsdir;
+#include <linux/debugfs.h>
+#ifdef CONFIG_DEBUG_FS
+struct debug_fs_counter {
+        atomic_t num_open;
+        atomic_t num_close;
+        atomic_t num_init;
+        atomic_t num_init_resp;
+        atomic_t num_init_fail_resp;
+        atomic_t num_deinit;
+        atomic_t num_deinit_resp;
+        atomic_t num_remote_shutdown_ind;
+        atomic_t num_tx_flow_off_ind;
+        atomic_t num_tx_flow_on_ind;
+        atomic_t num_rx_flow_off;
+        atomic_t num_rx_flow_on;
+        atomic_t skb_in_use;
+        atomic_t skb_alloc;
+        atomic_t skb_free;
+};
+static struct debug_fs_counter cnt;
+#define dbfs_atomic_inc(v) atomic_inc(v)
+#define dbfs_atomic_dec(v) atomic_dec(v)
+#else
+#define dbfs_atomic_inc(v)
+#define dbfs_atomic_dec(v)
+#endif
+/* The AF_CAIF socket */
+struct caifsock {
+        /* NOTE: sk has to be the first member */
+        struct sock sk;
+        struct cflayer layer;
+        char name[CAIF_LAYER_NAME_SZ];
+        u32 conn_state;
+        u32 flow_state;
+        struct cfpktq *pktq;
+        int file_mode;
+        struct caif_connect_request conn_req;
+        int read_queue_len;
+        /* protect updates of read_queue_len */
+        spinlock_t read_queue_len_lock;
+        struct dentry *debugfs_socket_dir;
+};
+static void drain_queue(struct caifsock *cf_sk);
+/* Packet Receive Callback function called from CAIF Stack */
+static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
+{
+        struct caifsock *cf_sk;
+        int read_queue_high;
+        cf_sk = container_of(layr, struct caifsock, layer);
+        if (!STATE_IS_OPEN(cf_sk)) {
+                /*FIXME: This should be allowed finally!*/
+                pr_debug("CAIF: %s(): called after close request\n", __func__);
+                cfpkt_destroy(pkt);
+                return 0;
+        }
+        /* NOTE: This function may be called in Tasklet context! */
+        /* The queue has its own lock */
+        cfpkt_queue(cf_sk->pktq, pkt, 0);
+        spin_lock(&cf_sk->read_queue_len_lock);
+        cf_sk->read_queue_len++;
+        read_queue_high = (cf_sk->read_queue_len > CHNL_SKT_READ_QUEUE_HIGH);
+        spin_unlock(&cf_sk->read_queue_len_lock);
+        if (RX_FLOW_IS_ON(cf_sk) && read_queue_high) {
+                dbfs_atomic_inc(&cnt.num_rx_flow_off);
+                SET_RX_FLOW_OFF(cf_sk);
+                /* Send flow off (NOTE: must not sleep) */
+                pr_debug("CAIF: %s():"
+                        " sending flow OFF (queue len = %d)\n",
+                        __func__,
+                     cf_sk->read_queue_len);
+                caif_assert(cf_sk->layer.dn);
+                caif_assert(cf_sk->layer.dn->ctrlcmd);
+                (void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+                                               CAIF_MODEMCMD_FLOW_OFF_REQ);
+        }
+        /* Signal reader that data is available. */
+        wake_up_interruptible(cf_sk->sk.sk_sleep);
+        return 0;
+}
+/* Packet Flow Control Callback function called from CAIF */
+static void caif_sktflowctrl_cb(struct cflayer *layr,
+                                enum caif_ctrlcmd flow,
+                                int phyid)
+{
+        struct caifsock *cf_sk;
+        /* NOTE: This function may be called in Tasklet context! */
+        pr_debug("CAIF: %s(): flowctrl func called: %s.\n",
+                      __func__,
+                      flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
+                      flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
+                      flow == CAIF_CTRLCMD_INIT_RSP ? "INIT_RSP" :
+                      flow == CAIF_CTRLCMD_DEINIT_RSP ? "DEINIT_RSP" :
+                      flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "INIT_FAIL_RSP" :
+                      flow ==
+                      CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" :
+                      "UKNOWN CTRL COMMAND");
+        if (layr == NULL)
+                return;
+        cf_sk = container_of(layr, struct caifsock, layer);
+        switch (flow) {
+        case CAIF_CTRLCMD_FLOW_ON_IND:
+                dbfs_atomic_inc(&cnt.num_tx_flow_on_ind);
+                /* Signal reader that data is available. */
+                SET_TX_FLOW_ON(cf_sk);
+                wake_up_interruptible(cf_sk->sk.sk_sleep);
+                break;
+        case CAIF_CTRLCMD_FLOW_OFF_IND:
+                dbfs_atomic_inc(&cnt.num_tx_flow_off_ind);
+                SET_TX_FLOW_OFF(cf_sk);
+                break;
+        case CAIF_CTRLCMD_INIT_RSP:
+                dbfs_atomic_inc(&cnt.num_init_resp);
+                /* Signal reader that data is available. */
+                caif_assert(STATE_IS_OPEN(cf_sk));
+                SET_PENDING_OFF(cf_sk);
+                SET_TX_FLOW_ON(cf_sk);
+                wake_up_interruptible(cf_sk->sk.sk_sleep);
+                break;
+        case CAIF_CTRLCMD_DEINIT_RSP:
+                dbfs_atomic_inc(&cnt.num_deinit_resp);
+                caif_assert(!STATE_IS_OPEN(cf_sk));
+                SET_PENDING_OFF(cf_sk);
+                if (!STATE_IS_PENDING_DESTROY(cf_sk)) {
+                        if (cf_sk->sk.sk_sleep != NULL)
+                                wake_up_interruptible(cf_sk->sk.sk_sleep);
+                }
+                dbfs_atomic_inc(&cnt.num_deinit);
+                sock_put(&cf_sk->sk);
+                break;
+        case CAIF_CTRLCMD_INIT_FAIL_RSP:
+                dbfs_atomic_inc(&cnt.num_init_fail_resp);
+                caif_assert(STATE_IS_OPEN(cf_sk));
+                SET_STATE_CLOSED(cf_sk);
+                SET_PENDING_OFF(cf_sk);
+                SET_TX_FLOW_OFF(cf_sk);
+                wake_up_interruptible(cf_sk->sk.sk_sleep);
+                break;
+        case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+                dbfs_atomic_inc(&cnt.num_remote_shutdown_ind);
+                SET_REMOTE_SHUTDOWN(cf_sk);
+                /* Use sk_shutdown to indicate remote shutdown indication */
+                cf_sk->sk.sk_shutdown |= RCV_SHUTDOWN;
+                cf_sk->file_mode = 0;
+                wake_up_interruptible(cf_sk->sk.sk_sleep);
+                break;
+        default:
+                pr_debug("CAIF: %s(): Unexpected flow command %d\n",
+                              __func__, flow);
+        }
+}
+static void skb_destructor(struct sk_buff *skb)
+{
+        dbfs_atomic_inc(&cnt.skb_free);
+        dbfs_atomic_dec(&cnt.skb_in_use);
+}
+static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
+                                struct msghdr *m, size_t buf_len, int flags)
+{
+        struct sock *sk = sock->sk;
+        struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+        struct cfpkt *pkt = NULL;
+        size_t len;
+        int result;
+        struct sk_buff *skb;
+        ssize_t ret = -EIO;
+        int read_queue_low;
+        if (cf_sk == NULL) {
+                pr_debug("CAIF: %s(): private_data not set!\n",
+                              __func__);
+                ret = -EBADFD;
+                goto read_error;
+        }
+        /* Don't do multiple iovec entries yet */
+        if (m->msg_iovlen != 1)
+                return -EOPNOTSUPP;
+        if (unlikely(!buf_len))
+                return -EINVAL;
+        lock_sock(&(cf_sk->sk));
+        caif_assert(cf_sk->pktq);
+        if (!STATE_IS_OPEN(cf_sk)) {
+                /* Socket is closed or closing. */
+                if (!STATE_IS_PENDING(cf_sk)) {
+                        pr_debug("CAIF: %s(): socket is closed (by remote)\n",
+                                 __func__);
+                        ret = -EPIPE;
+                } else {
+                        pr_debug("CAIF: %s(): socket is closing..\n", __func__);
+                        ret = -EBADF;
+                }
+                goto read_error;
+        }
+        /* Socket is open or opening. */
+        if (STATE_IS_PENDING(cf_sk)) {
+                pr_debug("CAIF: %s(): socket is opening...\n", __func__);
+                if (flags & MSG_DONTWAIT) {
+                        /* We can't block. */
+                        pr_debug("CAIF: %s():state pending and MSG_DONTWAIT\n",
+                                 __func__);
+                        ret = -EAGAIN;
+                        goto read_error;
+                }
+                /*
+                 * Blocking mode; state is pending and we need to wait
+                 * for its conclusion.
+                 */
+                release_sock(&cf_sk->sk);
+                result =
+                    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+                                             !STATE_IS_PENDING(cf_sk));
+                lock_sock(&(cf_sk->sk));
+                if (result == -ERESTARTSYS) {
+                        pr_debug("CAIF: %s(): wait_event_interruptible"
+                                 " woken by a signal (1)", __func__);
+                        ret = -ERESTARTSYS;
+                        goto read_error;
+                }
+        }
+        if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
+                !STATE_IS_OPEN(cf_sk) ||
+                STATE_IS_PENDING(cf_sk)) {
+                pr_debug("CAIF: %s(): socket closed\n",
+                        __func__);
+                ret = -ESHUTDOWN;
+                goto read_error;
+        }
+        /*
+         * Block if we don't have any received buffers.
+         * The queue has its own lock.
+         */
+        while ((pkt = cfpkt_qpeek(cf_sk->pktq)) == NULL) {
+                if (flags & MSG_DONTWAIT) {
+                        pr_debug("CAIF: %s(): MSG_DONTWAIT\n", __func__);
+                        ret = -EAGAIN;
+                        goto read_error;
+                }
+                trace_printk("CAIF: %s() wait_event\n", __func__);
+                /* Let writers in. */
+                release_sock(&cf_sk->sk);
+                /* Block reader until data arrives or socket is closed. */
+                if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+                                        cfpkt_qpeek(cf_sk->pktq)
+                                        || STATE_IS_REMOTE_SHUTDOWN(cf_sk)
+                                        || !STATE_IS_OPEN(cf_sk)) ==
+                    -ERESTARTSYS) {
+                        pr_debug("CAIF: %s():"
+                                " wait_event_interruptible woken by "
+                                "a signal, signal_pending(current) = %d\n",
+                                __func__,
+                                signal_pending(current));
+                        return -ERESTARTSYS;
+                }
+                trace_printk("CAIF: %s() awake\n", __func__);
+                if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
+                        pr_debug("CAIF: %s(): "
+                                 "received remote_shutdown indication\n",
+                                 __func__);
+                        ret = -ESHUTDOWN;
+                        goto read_error_no_unlock;
+                }
+                /* I want to be alone on cf_sk (except status and queue). */
+                lock_sock(&(cf_sk->sk));
+                if (!STATE_IS_OPEN(cf_sk)) {
+                        /* Someone closed the link, report error. */
+                        pr_debug("CAIF: %s(): remote end shutdown!\n",
+                                      __func__);
+                        ret = -EPIPE;
+                        goto read_error;
+                }
+        }
+        /* The queue has its own lock. */
+        len = cfpkt_getlen(pkt);
+        /* Check max length that can be copied. */
+        if (len <= buf_len)
+                pkt = cfpkt_dequeue(cf_sk->pktq);
+        else {
+                pr_debug("CAIF: %s(): user buffer too small (%ld,%ld)\n",
+                         __func__, (long) len, (long) buf_len);
+                if (sock->type == SOCK_SEQPACKET) {
+                        ret = -EMSGSIZE;
+                        goto read_error;
+                }
+                len = buf_len;
+        }
+        spin_lock(&cf_sk->read_queue_len_lock);
+        cf_sk->read_queue_len--;
+        read_queue_low = (cf_sk->read_queue_len < CHNL_SKT_READ_QUEUE_LOW);
+        spin_unlock(&cf_sk->read_queue_len_lock);
+        if (!RX_FLOW_IS_ON(cf_sk) && read_queue_low) {
+                dbfs_atomic_inc(&cnt.num_rx_flow_on);
+                SET_RX_FLOW_ON(cf_sk);
+                /* Send flow on. */
+                pr_debug("CAIF: %s(): sending flow ON (queue len = %d)\n",
+                         __func__, cf_sk->read_queue_len);
+                caif_assert(cf_sk->layer.dn);
+                caif_assert(cf_sk->layer.dn->ctrlcmd);
+                (void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+                                               CAIF_MODEMCMD_FLOW_ON_REQ);
+                caif_assert(cf_sk->read_queue_len >= 0);
+        }
+        skb = cfpkt_tonative(pkt);
+        result = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
+        skb_pull(skb, len);
+        if (result) {
+                pr_debug("CAIF: %s(): copy to_iovec failed\n", __func__);
+                cfpkt_destroy(pkt);
+                ret = -EFAULT;
+                goto read_error;
+        }
+        /* Free packet and remove from queue */
+        if (skb->len == 0)
+                skb_free_datagram(sk, skb);
+        /* Let the others in. */
+        release_sock(&cf_sk->sk);
+        return len;
+read_error:
+        release_sock(&cf_sk->sk);
+read_error_no_unlock:
+        return ret;
+}
+/* Send a signal as a consequence of sendmsg, sendto or caif_sendmsg. */
+static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
+                        struct msghdr *msg, size_t len)
+{
+        struct sock *sk = sock->sk;
+        struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+        size_t payload_size = msg->msg_iov->iov_len;
+        struct cfpkt *pkt = NULL;
+        struct caif_payload_info info;
+        unsigned char *txbuf;
+        ssize_t ret = -EIO;
+        int result;
+        struct sk_buff *skb;
+        caif_assert(msg->msg_iovlen == 1);
+        if (cf_sk == NULL) {
+                pr_debug("CAIF: %s(): private_data not set!\n",
+                              __func__);
+                ret = -EBADFD;
+                goto write_error_no_unlock;
+        }
+        if (unlikely(msg->msg_iov->iov_base == NULL)) {
+                pr_warning("CAIF: %s(): Buffer is NULL.\n", __func__);
+                ret = -EINVAL;
+                goto write_error_no_unlock;
+        }
+        if (payload_size > CAIF_MAX_PAYLOAD_SIZE) {
+                pr_debug("CAIF: %s(): buffer too long\n", __func__);
+                if (sock->type == SOCK_SEQPACKET) {
+                        ret = -EINVAL;
+                        goto write_error_no_unlock;
+                }
+                payload_size = CAIF_MAX_PAYLOAD_SIZE;
+        }
+        /* I want to be alone on cf_sk (except status and queue) */
+        lock_sock(&(cf_sk->sk));
+        caif_assert(cf_sk->pktq);
+        if (!STATE_IS_OPEN(cf_sk)) {
+                /* Socket is closed or closing */
+                if (!STATE_IS_PENDING(cf_sk)) {
+                        pr_debug("CAIF: %s(): socket is closed (by remote)\n",
+                                 __func__);
+                        ret = -EPIPE;
+                } else {
+                        pr_debug("CAIF: %s(): socket is closing...\n",
+                                 __func__);
+                        ret = -EBADF;
+                }
+                goto write_error;
+        }
+        /* Socket is open or opening */
+        if (STATE_IS_PENDING(cf_sk)) {
+                pr_debug("CAIF: %s(): socket is opening...\n", __func__);
+                if (msg->msg_flags & MSG_DONTWAIT) {
+                        /* We can't block */
+                        trace_printk("CAIF: %s():state pending:"
+                                     "state=MSG_DONTWAIT\n", __func__);
+                        ret = -EAGAIN;
+                        goto write_error;
+                }
+                /* Let readers in */
+                release_sock(&cf_sk->sk);
+                /*
+                 * Blocking mode; state is pending and we need to wait
+                 * for its conclusion.
+                 */
+                result =
+                    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+                                             !STATE_IS_PENDING(cf_sk));
+                /* I want to be alone on cf_sk (except status and queue) */
+                lock_sock(&(cf_sk->sk));
+                if (result == -ERESTARTSYS) {
+                        pr_debug("CAIF: %s(): wait_event_interruptible"
+                                 " woken by a signal (1)", __func__);
+                        ret = -ERESTARTSYS;
+                        goto write_error;
+                }
+        }
+        if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
+                !STATE_IS_OPEN(cf_sk) ||
+                STATE_IS_PENDING(cf_sk)) {
+                pr_debug("CAIF: %s(): socket closed\n",
+                        __func__);
+                ret = -ESHUTDOWN;
+                goto write_error;
+        }
+        if (!TX_FLOW_IS_ON(cf_sk)) {
+                /* Flow is off. Check non-block flag */
+                if (msg->msg_flags & MSG_DONTWAIT) {
+                        trace_printk("CAIF: %s(): MSG_DONTWAIT and tx flow off",
+                                 __func__);
+                        ret = -EAGAIN;
+                        goto write_error;
+                }
+                /* release lock before waiting */
+                release_sock(&cf_sk->sk);
+                /* Wait until flow is on or socket is closed */
+                if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+                                        TX_FLOW_IS_ON(cf_sk)
+                                        || !STATE_IS_OPEN(cf_sk)
+                                        || STATE_IS_REMOTE_SHUTDOWN(cf_sk)
+                                        ) == -ERESTARTSYS) {
+                        pr_debug("CAIF: %s():"
+                                 " wait_event_interruptible woken by a signal",
+                                 __func__);
+                        ret = -ERESTARTSYS;
+                        goto write_error_no_unlock;
+                }
+                /* I want to be alone on cf_sk (except status and queue) */
+                lock_sock(&(cf_sk->sk));
+                if (!STATE_IS_OPEN(cf_sk)) {
+                        /* someone closed the link, report error */
+                        pr_debug("CAIF: %s(): remote end shutdown!\n",
+                                      __func__);
+                        ret = -EPIPE;
+                        goto write_error;
+                }
+                if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
+                        pr_debug("CAIF: %s(): "
+                                 "received remote_shutdown indication\n",
+                                 __func__);
+                        ret = -ESHUTDOWN;
+                        goto write_error;
+                }
+        }
+        pkt = cfpkt_create(payload_size);
+        skb = (struct sk_buff *)pkt;
+        skb->destructor = skb_destructor;
+        skb->sk = sk;
+        dbfs_atomic_inc(&cnt.skb_alloc);
+        dbfs_atomic_inc(&cnt.skb_in_use);
+        if (cfpkt_raw_append(pkt, (void **) &txbuf, payload_size) < 0) {
+                pr_debug("CAIF: %s(): cfpkt_raw_append failed\n", __func__);
+                cfpkt_destroy(pkt);
+                ret = -EINVAL;
+                goto write_error;
+        }
+        /* Copy data into buffer. */
+        if (copy_from_user(txbuf, msg->msg_iov->iov_base, payload_size)) {
+                pr_debug("CAIF: %s(): copy_from_user returned non zero.\n",
+                         __func__);
+                cfpkt_destroy(pkt);
+                ret = -EINVAL;
+                goto write_error;
+        }
+        memset(&info, 0, sizeof(info));
+        /* Send the packet down the stack. */
+        caif_assert(cf_sk->layer.dn);
+        caif_assert(cf_sk->layer.dn->transmit);
+        do {
+                ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
+                if (likely((ret >= 0) || (ret != -EAGAIN)))
+                        break;
+                /* EAGAIN - retry */
+                if (msg->msg_flags & MSG_DONTWAIT) {
+                        pr_debug("CAIF: %s(): NONBLOCK and transmit failed,"
+                                 " error = %ld\n", __func__, (long) ret);
+                        ret = -EAGAIN;
+                        goto write_error;
+                }
+                /* Let readers in */
+                release_sock(&cf_sk->sk);
+                /* Wait until flow is on or socket is closed */
+                if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+                                        TX_FLOW_IS_ON(cf_sk)
+                                        || !STATE_IS_OPEN(cf_sk)
+                                        || STATE_IS_REMOTE_SHUTDOWN(cf_sk)
+                                        ) == -ERESTARTSYS) {
+                        pr_debug("CAIF: %s(): wait_event_interruptible"
+                                 " woken by a signal", __func__);
+                        ret = -ERESTARTSYS;
+                        goto write_error_no_unlock;
+                }
+                /* I want to be alone on cf_sk (except status and queue) */
+                lock_sock(&(cf_sk->sk));
+        } while (ret == -EAGAIN);
+        if (ret < 0) {
+                cfpkt_destroy(pkt);
+                pr_debug("CAIF: %s(): transmit failed, error = %ld\n",
+                         __func__, (long) ret);
+                goto write_error;
+        }
+        release_sock(&cf_sk->sk);
+        return payload_size;
+write_error:
+        release_sock(&cf_sk->sk);
+write_error_no_unlock:
+        return ret;
+}
+static unsigned int caif_poll(struct file *file, struct socket *sock,
+                                                poll_table *wait)
+{
+        struct sock *sk = sock->sk;
+        struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+        u32 mask = 0;
+        poll_wait(file, sk->sk_sleep, wait);
+        lock_sock(&(cf_sk->sk));
+        if (!STATE_IS_OPEN(cf_sk)) {
+                if (!STATE_IS_PENDING(cf_sk))
+                        mask |= POLLHUP;
+        } else {
+                if (cfpkt_qpeek(cf_sk->pktq) != NULL)
+                        mask |= (POLLIN | POLLRDNORM);
+                if (TX_FLOW_IS_ON(cf_sk))
+                        mask |= (POLLOUT | POLLWRNORM);
+        }
+        release_sock(&cf_sk->sk);
+        trace_printk("CAIF: %s(): poll mask=0x%04x\n",
+                      __func__, mask);
+        return mask;
+}
+static void drain_queue(struct caifsock *cf_sk)
+{
+        struct cfpkt *pkt = NULL;
+        /* Empty the queue */
+        do {
+                /* The queue has its own lock */
+                if (!cf_sk->pktq)
+                        break;
+                pkt = cfpkt_dequeue(cf_sk->pktq);
+                if (!pkt)
+                        break;
+                pr_debug("CAIF: %s(): freeing packet from read queue\n",
+                         __func__);
+                cfpkt_destroy(pkt);
+        } while (1);
+        cf_sk->read_queue_len = 0;
+}
+static int setsockopt(struct socket *sock,
+                        int lvl, int opt, char __user *ov, unsigned int ol)
+{
+        struct sock *sk = sock->sk;
+        struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+        int prio, linksel;
+        struct ifreq ifreq;
+        if (STATE_IS_OPEN(cf_sk)) {
+                pr_debug("CAIF: %s(): setsockopt "
+                         "cannot be done on a connected socket\n",
+                         __func__);
+                return -ENOPROTOOPT;
+        }
+        switch (opt) {
+        case CAIFSO_LINK_SELECT:
+                if (ol < sizeof(int)) {
+                        pr_debug("CAIF: %s(): setsockopt"
+                                 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
+                        return -EINVAL;
+                }
+                if (lvl != SOL_CAIF)
+                        goto bad_sol;
+                if (copy_from_user(&linksel, ov, sizeof(int)))
+                        return -EINVAL;
+                lock_sock(&(cf_sk->sk));
+                cf_sk->conn_req.link_selector = linksel;
+                release_sock(&cf_sk->sk);
+                return 0;
+        case SO_PRIORITY:
+                if (lvl != SOL_SOCKET)
+                        goto bad_sol;
+                if (ol < sizeof(int)) {
+                        pr_debug("CAIF: %s(): setsockopt"
+                                 " SO_PRIORITY bad size\n", __func__);
+                        return -EINVAL;
+                }
+                if (copy_from_user(&prio, ov, sizeof(int)))
+                        return -EINVAL;
+                lock_sock(&(cf_sk->sk));
+                cf_sk->conn_req.priority = prio;
+                pr_debug("CAIF: %s(): Setting sockopt priority=%d\n", __func__,
+                        cf_sk->conn_req.priority);
+                release_sock(&cf_sk->sk);
+                return 0;
+        case SO_BINDTODEVICE:
+                if (lvl != SOL_SOCKET)
+                        goto bad_sol;
+                if (ol < sizeof(struct ifreq)) {
+                        pr_debug("CAIF: %s(): setsockopt"
+                                 " SO_PRIORITY bad size\n", __func__);
+                        return -EINVAL;
+                }
+                if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
+                        return -EFAULT;
+                lock_sock(&(cf_sk->sk));
+                strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
+                        sizeof(cf_sk->conn_req.link_name));
+                cf_sk->conn_req.link_name
+                        [sizeof(cf_sk->conn_req.link_name)-1] = 0;
+                release_sock(&cf_sk->sk);
+                return 0;
+        case CAIFSO_REQ_PARAM:
+                if (lvl != SOL_CAIF)
+                        goto bad_sol;
+                if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
+                        return -ENOPROTOOPT;
+                if (ol > sizeof(cf_sk->conn_req.param.data))
+                        goto req_param_bad_size;
+                lock_sock(&(cf_sk->sk));
+                cf_sk->conn_req.param.size = ol;
+                if (copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
+                        release_sock(&cf_sk->sk);
+req_param_bad_size:
+                        pr_debug("CAIF: %s(): setsockopt"
+                                 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
+                        return -EINVAL;
+                }
+                release_sock(&cf_sk->sk);
+                return 0;
+        default:
+                pr_debug("CAIF: %s(): unhandled option %d\n", __func__, opt);
+                return -EINVAL;
+        }
+        return 0;
+bad_sol:
+        pr_debug("CAIF: %s(): setsockopt bad level\n", __func__);
+        return -ENOPROTOOPT;
+}
+static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
+               int sockaddr_len, int flags)
+{
+        struct caifsock *cf_sk = NULL;
+        int result = -1;
+        int mode = 0;
+        int ret = -EIO;
+        struct sock *sk = sock->sk;
+        BUG_ON(sk == NULL);
+        cf_sk = container_of(sk, struct caifsock, sk);
+        trace_printk("CAIF: %s(): cf_sk=%p OPEN=%d, TX_FLOW=%d, RX_FLOW=%d\n",
+                 __func__, cf_sk,
+                STATE_IS_OPEN(cf_sk),
+                TX_FLOW_IS_ON(cf_sk), RX_FLOW_IS_ON(cf_sk));
+        if (sock->type == SOCK_SEQPACKET || sock->type == SOCK_STREAM)
+                sock->state     = SS_CONNECTING;
+        else
+                goto out;
+        /* I want to be alone on cf_sk (except status and queue) */
+        lock_sock(&(cf_sk->sk));
+        if (sockaddr_len != sizeof(struct sockaddr_caif)) {
+                pr_debug("CAIF: %s(): Bad address len (%ld,%lu)\n",
+                         __func__, (long) sockaddr_len,
+                        (long unsigned) sizeof(struct sockaddr_caif));
+                ret = -EINVAL;
+                goto open_error;
+        }
+        if (uservaddr->sa_family != AF_CAIF) {
+                pr_debug("CAIF: %s(): Bad address family (%d)\n",
+                         __func__, uservaddr->sa_family);
+                ret = -EAFNOSUPPORT;
+                goto open_error;
+        }
+        memcpy(&cf_sk->conn_req.sockaddr, uservaddr,
+                sizeof(struct sockaddr_caif));
+        dbfs_atomic_inc(&cnt.num_open);
+        mode = SKT_READ_FLAG | SKT_WRITE_FLAG;
+        /* If socket is not open, make sure socket is in fully closed state */
+        if (!STATE_IS_OPEN(cf_sk)) {
+                /* Has link close response been received (if we ever sent it)?*/
+                if (STATE_IS_PENDING(cf_sk)) {
+                        /*
+                         * Still waiting for close response from remote.
+                         * If opened non-blocking, report "would block"
+                         */
+                        if (flags & O_NONBLOCK) {
+                                pr_debug("CAIF: %s(): O_NONBLOCK"
+                                        " && close pending\n", __func__);
+                                ret = -EAGAIN;
+                                goto open_error;
+                        }
+                        pr_debug("CAIF: %s(): Wait for close response"
+                                 " from remote...\n", __func__);
+                        release_sock(&cf_sk->sk);
+                        /*
+                         * Blocking mode; close is pending and we need to wait
+                         * for its conclusion.
+                         */
+                        result =
+                            wait_event_interruptible(*cf_sk->sk.sk_sleep,
+                                                     !STATE_IS_PENDING(cf_sk));
+                        lock_sock(&(cf_sk->sk));
+                        if (result == -ERESTARTSYS) {
+                                pr_debug("CAIF: %s(): wait_event_interruptible"
+                                         "woken by a signal (1)", __func__);
+                                ret = -ERESTARTSYS;
+                                goto open_error;
+                        }
+                }
+        }
+        /* socket is now either closed, pending open or open */
+        if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
+                /* Open */
+                pr_debug("CAIF: %s(): Socket is already opened (cf_sk=%p)"
+                        " check access f_flags = 0x%x file_mode = 0x%x\n",
+                         __func__, cf_sk, mode, cf_sk->file_mode);
+        } else {
+                /* We are closed or pending open.
+                 * If closed:       send link setup
+                 * If pending open: link setup already sent (we could have been
+                 *                  interrupted by a signal last time)
+                 */
+                if (!STATE_IS_OPEN(cf_sk)) {
+                        /* First opening of file; connect lower layers: */
+                        /* Drain queue (very unlikely) */
+                        drain_queue(cf_sk);
+                        cf_sk->layer.receive = caif_sktrecv_cb;
+                        SET_STATE_OPEN(cf_sk);
+                        SET_PENDING_ON(cf_sk);
+                        /* Register this channel. */
+                        result =
+                                caif_connect_client(&cf_sk->conn_req,
+                                                        &cf_sk->layer);
+                        if (result < 0) {
+                                pr_debug("CAIF: %s(): can't register channel\n",
+                                        __func__);
+                                ret = -EIO;
+                                SET_STATE_CLOSED(cf_sk);
+                                SET_PENDING_OFF(cf_sk);
+                                goto open_error;
+                        }
+                        dbfs_atomic_inc(&cnt.num_init);
+                }
+                /* If opened non-blocking, report "success".
+                 */
+                if (flags & O_NONBLOCK) {
+                        pr_debug("CAIF: %s(): O_NONBLOCK success\n",
+                                 __func__);
+                        ret = -EINPROGRESS;
+                        cf_sk->sk.sk_err = -EINPROGRESS;
+                        goto open_error;
+                }
+                trace_printk("CAIF: %s(): Wait for connect response\n",
+                             __func__);
+                /* release lock before waiting */
+                release_sock(&cf_sk->sk);
+                result =
+                    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+                                             !STATE_IS_PENDING(cf_sk));
+                lock_sock(&(cf_sk->sk));
+                if (result == -ERESTARTSYS) {
+                        pr_debug("CAIF: %s(): wait_event_interruptible"
+                                 "woken by a signal (2)", __func__);
+                        ret = -ERESTARTSYS;
+                        goto open_error;
+                }
+                if (!STATE_IS_OPEN(cf_sk)) {
+                        /* Lower layers said "no" */
+                        pr_debug("CAIF: %s(): Closed received\n", __func__);
+                        ret = -EPIPE;
+                        goto open_error;
+                }
+                trace_printk("CAIF: %s(): Connect received\n", __func__);
+        }
+        /* Open is ok */
+        cf_sk->file_mode |= mode;
+        trace_printk("CAIF: %s(): Connected - file mode = %x\n",
+                  __func__, cf_sk->file_mode);
+        release_sock(&cf_sk->sk);
+        return 0;
+open_error:
+        sock->state     = SS_UNCONNECTED;
+        release_sock(&cf_sk->sk);
+out:
+        return ret;
+}
+static int caif_shutdown(struct socket *sock, int how)
+{
+        struct caifsock *cf_sk = NULL;
+        int result = 0;
+        int tx_flow_state_was_on;
+        struct sock *sk = sock->sk;
+        trace_printk("CAIF: %s(): enter\n", __func__);
+        pr_debug("f_flags=%x\n", sock->file->f_flags);
+        if (how != SHUT_RDWR)
+                return -EOPNOTSUPP;
+        cf_sk = container_of(sk, struct caifsock, sk);
+        if (cf_sk == NULL) {
+                pr_debug("CAIF: %s(): COULD NOT FIND SOCKET\n", __func__);
+                return -EBADF;
+        }
+        /* I want to be alone on cf_sk (except status queue) */
+        lock_sock(&(cf_sk->sk));
+        sock_hold(&cf_sk->sk);
+        /* IS_CLOSED have double meaning:
+         * 1) Spontanous Remote Shutdown Request.
+         * 2) Ack on a channel teardown(disconnect)
+         * Must clear bit in case we previously received
+         * remote shudown request.
+         */
+        if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
+                SET_STATE_CLOSED(cf_sk);
+                SET_PENDING_ON(cf_sk);
+                tx_flow_state_was_on = TX_FLOW_IS_ON(cf_sk);
+                SET_TX_FLOW_OFF(cf_sk);
+                /* Hold the socket until DEINIT_RSP is received */
+                sock_hold(&cf_sk->sk);
+                result = caif_disconnect_client(&cf_sk->layer);
+                if (result < 0) {
+                        pr_debug("CAIF: %s(): "
+                                        "caif_disconnect_client() failed\n",
+                                         __func__);
+                        SET_STATE_CLOSED(cf_sk);
+                        SET_PENDING_OFF(cf_sk);
+                        SET_TX_FLOW_OFF(cf_sk);
+                        release_sock(&cf_sk->sk);
+                        sock_put(&cf_sk->sk);
+                        return -EIO;
+                }
+        }
+        if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
+                SET_PENDING_OFF(cf_sk);
+                SET_REMOTE_SHUTDOWN_OFF(cf_sk);
+        }
+        /*
+         * Socket is no longer in state pending close,
+         * and we can release the reference.
+         */
+        dbfs_atomic_inc(&cnt.num_close);
+        drain_queue(cf_sk);
+        SET_RX_FLOW_ON(cf_sk);
+        cf_sk->file_mode = 0;
+        sock_put(&cf_sk->sk);
+        release_sock(&cf_sk->sk);
+        if (!result && (sock->file->f_flags & O_NONBLOCK)) {
+                pr_debug("nonblocking shutdown returing -EAGAIN\n");
+                return -EAGAIN;
+        } else
+                return result;
+}
+static ssize_t caif_sock_no_sendpage(struct socket *sock,
+                                     struct page *page,
+                                     int offset, size_t size, int flags)
+{
+        return -EOPNOTSUPP;
+}
+/* This function is called as part of close. */
+static int caif_release(struct socket *sock)
+{
+        struct sock *sk = sock->sk;
+        struct caifsock *cf_sk = NULL;
+        int res;
+        caif_assert(sk != NULL);
+        cf_sk = container_of(sk, struct caifsock, sk);
+        if (cf_sk->debugfs_socket_dir != NULL)
+                debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
+        res = caif_shutdown(sock, SHUT_RDWR);
+        if (res && res != -EINPROGRESS)
+                return res;
+        /*
+         * FIXME: Shutdown should probably be possible to do async
+         * without flushing queues, allowing reception of frames while
+         * waiting for DEINIT_IND.
+         * Release should always block, to allow secure decoupling of
+         * CAIF stack.
+         */
+        if (!(sock->file->f_flags & O_NONBLOCK)) {
+                res = wait_event_interruptible(*cf_sk->sk.sk_sleep,
+                                                !STATE_IS_PENDING(cf_sk));
+                if (res == -ERESTARTSYS) {
+                        pr_debug("CAIF: %s(): wait_event_interruptible"
+                                "woken by a signal (1)", __func__);
+                }
+        }
+        lock_sock(&(cf_sk->sk));
+        sock->sk = NULL;
+        /* Detach the socket from its process context by making it orphan. */
+        sock_orphan(sk);
+        /*
+         * Setting SHUTDOWN_MASK means that both send and receive are shutdown
+         * for the socket.
+         */
+        sk->sk_shutdown = SHUTDOWN_MASK;
+        /*
+         * Set the socket state to closed, the TCP_CLOSE macro is used when
+         * closing any socket.
+         */
+        /* Flush out this sockets receive queue. */
+        drain_queue(cf_sk);
+        /* Finally release the socket. */
+        SET_STATE_PENDING_DESTROY(cf_sk);
+        release_sock(&cf_sk->sk);
+        sock_put(sk);
+        /*
+         * The rest of the cleanup will be handled from the
+         * caif_sock_destructor
+         */
+        return res;
+}
+static const struct proto_ops caif_ops = {
+        .family = PF_CAIF,
+        .owner = THIS_MODULE,
+        .release = caif_release,
+        .bind = sock_no_bind,
+        .connect = caif_connect,
+        .socketpair = sock_no_socketpair,
+        .accept = sock_no_accept,
+        .getname = sock_no_getname,
+        .poll = caif_poll,
+        .ioctl = sock_no_ioctl,
+        .listen = sock_no_listen,
+        .shutdown = caif_shutdown,
+        .setsockopt = setsockopt,
+        .getsockopt = sock_no_getsockopt,
+        .sendmsg = caif_sendmsg,
+        .recvmsg = caif_recvmsg,
+        .mmap = sock_no_mmap,
+        .sendpage = caif_sock_no_sendpage,
+};
+/* This function is called when a socket is finally destroyed. */
+static void caif_sock_destructor(struct sock *sk)
+{
+        struct caifsock *cf_sk = NULL;
+        cf_sk = container_of(sk, struct caifsock, sk);
+        /* Error checks. */
+        caif_assert(!atomic_read(&sk->sk_wmem_alloc));
+        caif_assert(sk_unhashed(sk));
+        caif_assert(!sk->sk_socket);
+        if (!sock_flag(sk, SOCK_DEAD)) {
+                pr_debug("CAIF: %s(): 0x%p", __func__, sk);
+                return;
+        }
+        if (STATE_IS_OPEN(cf_sk)) {
+                pr_debug("CAIF: %s(): socket is opened (cf_sk=%p)"
+                         " file_mode = 0x%x\n", __func__,
+                         cf_sk, cf_sk->file_mode);
+                return;
+        }
+        drain_queue(cf_sk);
+        kfree(cf_sk->pktq);
+        trace_printk("CAIF: %s(): caif_sock_destructor: Removing socket %s\n",
+                __func__, cf_sk->name);
+        atomic_dec(&caif_nr_socks);
+}
+static int caif_create(struct net *net, struct socket *sock, int protocol,
+                       int kern)
+{
+        struct sock *sk = NULL;
+        struct caifsock *cf_sk = NULL;
+        int result = 0;
+        static struct proto prot = {.name = "PF_CAIF",
+                .owner = THIS_MODULE,
+                .obj_size = sizeof(struct caifsock),
+        };
+        /*
+         * The sock->type specifies the socket type to use.
+         * in SEQPACKET mode packet boundaries are enforced.
+         */
+        if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+                return -ESOCKTNOSUPPORT;
+        if (net != &init_net)
+                return -EAFNOSUPPORT;
+        if (protocol < 0 || protocol >= CAIFPROTO_MAX)
+                return -EPROTONOSUPPORT;
+        /*
+         * Set the socket state to unconnected.  The socket state is really
+         * not used at all in the net/core or socket.c but the
+         * initialization makes sure that sock->state is not uninitialized.
+         */
+        sock->state = SS_UNCONNECTED;
+        sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot);
+        if (!sk)
+                return -ENOMEM;
+        cf_sk = container_of(sk, struct caifsock, sk);
+        /* Store the protocol */
+        sk->sk_protocol = (unsigned char) protocol;
+        spin_lock_init(&cf_sk->read_queue_len_lock);
+        /* Fill in some information concerning the misc socket. */
+        snprintf(cf_sk->name, sizeof(cf_sk->name), "cf_sk%d",
+                atomic_read(&caif_nr_socks));
+        /*
+         * Lock in order to try to stop someone from opening the socket
+         * too early.
+         */
+        lock_sock(&(cf_sk->sk));
+        /* Initialize the nozero default sock structure data. */
+        sock_init_data(sock, sk);
+        sock->ops = &caif_ops;
+        sk->sk_destruct = caif_sock_destructor;
+        sk->sk_sndbuf = caif_sockbuf_size;
+        sk->sk_rcvbuf = caif_sockbuf_size;
+        cf_sk->pktq = cfpktq_create();
+        if (!cf_sk->pktq) {
+                pr_err("CAIF: %s(): queue create failed.\n", __func__);
+                result = -ENOMEM;
+                release_sock(&cf_sk->sk);
+                goto err_failed;
+        }
+        cf_sk->layer.ctrlcmd = caif_sktflowctrl_cb;
+        SET_STATE_CLOSED(cf_sk);
+        SET_PENDING_OFF(cf_sk);
+        SET_TX_FLOW_OFF(cf_sk);
+        SET_RX_FLOW_ON(cf_sk);
+        /* Set default options on configuration */
+        cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
+        cf_sk->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
+        cf_sk->conn_req.protocol = protocol;
+        /* Increase the number of sockets created. */
+        atomic_inc(&caif_nr_socks);
+        if (!IS_ERR(debugfsdir)) {
+                cf_sk->debugfs_socket_dir =
+                        debugfs_create_dir(cf_sk->name, debugfsdir);
+                debugfs_create_u32("conn_state", S_IRUSR | S_IWUSR,
+                                cf_sk->debugfs_socket_dir, &cf_sk->conn_state);
+                debugfs_create_u32("flow_state", S_IRUSR | S_IWUSR,
+                                cf_sk->debugfs_socket_dir, &cf_sk->flow_state);
+                debugfs_create_u32("read_queue_len", S_IRUSR | S_IWUSR,
+                                cf_sk->debugfs_socket_dir,
+                                (u32 *) &cf_sk->read_queue_len);
+                debugfs_create_u32("identity", S_IRUSR | S_IWUSR,
+                                cf_sk->debugfs_socket_dir,
+                                (u32 *) &cf_sk->layer.id);
+        }
+        release_sock(&cf_sk->sk);
+        return 0;
+err_failed:
+        sk_free(sk);
+        return result;
+}
+static struct net_proto_family caif_family_ops = {
+        .family = PF_CAIF,
+        .create = caif_create,
+        .owner = THIS_MODULE,
+};
+static int af_caif_init(void)
+{
+        int err;
+        err = sock_register(&caif_family_ops);
+        if (!err)
+                return err;
+        return 0;
+}
+static int __init caif_sktinit_module(void)
+{
+        int stat;
+#ifdef CONFIG_DEBUG_FS
+        debugfsdir = debugfs_create_dir("chnl_skt", NULL);
+        if (!IS_ERR(debugfsdir)) {
+                debugfs_create_u32("skb_inuse", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.skb_in_use);
+                debugfs_create_u32("skb_alloc", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.skb_alloc);
+                debugfs_create_u32("skb_free", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.skb_free);
+                debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &caif_nr_socks);
+                debugfs_create_u32("num_open", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_open);
+                debugfs_create_u32("num_close", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_close);
+                debugfs_create_u32("num_init", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_init);
+                debugfs_create_u32("num_init_resp", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_init_resp);
+                debugfs_create_u32("num_init_fail_resp", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_init_fail_resp);
+                debugfs_create_u32("num_deinit", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_deinit);
+                debugfs_create_u32("num_deinit_resp", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_deinit_resp);
+                debugfs_create_u32("num_remote_shutdown_ind",
+                                S_IRUSR | S_IWUSR, debugfsdir,
+                                (u32 *) &cnt.num_remote_shutdown_ind);
+                debugfs_create_u32("num_tx_flow_off_ind", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_tx_flow_off_ind);
+                debugfs_create_u32("num_tx_flow_on_ind", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_tx_flow_on_ind);
+                debugfs_create_u32("num_rx_flow_off", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_rx_flow_off);
+                debugfs_create_u32("num_rx_flow_on", S_IRUSR | S_IWUSR,
+                                debugfsdir,
+                                (u32 *) &cnt.num_rx_flow_on);
+        }
+#endif
+        stat = af_caif_init();
+        if (stat) {
+                pr_err("CAIF: %s(): Failed to initialize CAIF socket layer.",
+                       __func__);
+                return stat;
+        }
+        return 0;
+}
+static void __exit caif_sktexit_module(void)
+{
+        sock_unregister(PF_CAIF);
+        if (debugfsdir != NULL)
+                debugfs_remove_recursive(debugfsdir);
+}
+module_init(caif_sktinit_module);
+module_exit(caif_sktexit_module);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
new file mode 100644
index 000000000000..c873e3d4387c
--- /dev/null
+++ b/net/caif/cfcnfg.c
@@ -0,0 +1,530 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfcnfg.h>
+#include <net/caif/cfctrl.h>
+#include <net/caif/cfmuxl.h>
+#include <net/caif/cffrml.h>
+#include <net/caif/cfserl.h>
+#include <net/caif/cfsrvl.h>
+#include <linux/module.h>
+#include <asm/atomic.h>
+#define MAX_PHY_LAYERS 7
+#define PHY_NAME_LEN 20
+#define container_obj(layr) container_of(layr, struct cfcnfg, layer)
+/* Information about CAIF physical interfaces held by Config Module in order
+ * to manage physical interfaces
+ */
+struct cfcnfg_phyinfo {
+        /* Pointer to the layer below the MUX (framing layer) */
+        struct cflayer *frm_layer;
+        /* Pointer to the lowest actual physical layer */
+        struct cflayer *phy_layer;
+        /* Unique identifier of the physical interface */
+        unsigned int id;
+        /* Preference of the physical in interface */
+        enum cfcnfg_phy_preference pref;
+        /* Reference count, number of channels using the device */
+        int phy_ref_count;
+        /* Information about the physical device */
+        struct dev_info dev_info;
+};
+struct cfcnfg {
+        struct cflayer layer;
+        struct cflayer *ctrl;
+        struct cflayer *mux;
+        u8 last_phyid;
+        struct cfcnfg_phyinfo phy_layers[MAX_PHY_LAYERS];
+};
+static void cncfg_linkup_rsp(struct cflayer *layer, u8 linkid,
+                             enum cfctrl_srv serv, u8 phyid,
+                             struct cflayer *adapt_layer);
+static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+                                  struct cflayer *client_layer);
+static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+                             struct cflayer *adapt_layer);
+static void cfctrl_resp_func(void);
+static void cfctrl_enum_resp(void);
+struct cfcnfg *cfcnfg_create(void)
+{
+        struct cfcnfg *this;
+        struct cfctrl_rsp *resp;
+        /* Initiate this layer */
+        this = kmalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
+        if (!this) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        memset(this, 0, sizeof(struct cfcnfg));
+        this->mux = cfmuxl_create();
+        if (!this->mux)
+                goto out_of_mem;
+        this->ctrl = cfctrl_create();
+        if (!this->ctrl)
+                goto out_of_mem;
+        /* Initiate response functions */
+        resp = cfctrl_get_respfuncs(this->ctrl);
+        resp->enum_rsp = cfctrl_enum_resp;
+        resp->linkerror_ind = cfctrl_resp_func;
+        resp->linkdestroy_rsp = cncfg_linkdestroy_rsp;
+        resp->sleep_rsp = cfctrl_resp_func;
+        resp->wake_rsp = cfctrl_resp_func;
+        resp->restart_rsp = cfctrl_resp_func;
+        resp->radioset_rsp = cfctrl_resp_func;
+        resp->linksetup_rsp = cncfg_linkup_rsp;
+        resp->reject_rsp = cncfg_reject_rsp;
+        this->last_phyid = 1;
+        cfmuxl_set_uplayer(this->mux, this->ctrl, 0);
+        layer_set_dn(this->ctrl, this->mux);
+        layer_set_up(this->ctrl, this);
+        return this;
+out_of_mem:
+        pr_warning("CAIF: %s(): Out of memory\n", __func__);
+        kfree(this->mux);
+        kfree(this->ctrl);
+        kfree(this);
+        return NULL;
+}
+EXPORT_SYMBOL(cfcnfg_create);
+void cfcnfg_remove(struct cfcnfg *cfg)
+{
+        if (cfg) {
+                kfree(cfg->mux);
+                kfree(cfg->ctrl);
+                kfree(cfg);
+        }
+}
+static void cfctrl_resp_func(void)
+{
+}
+static void cfctrl_enum_resp(void)
+{
+}
+struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg,
+                                  enum cfcnfg_phy_preference phy_pref)
+{
+        u16 i;
+        /* Try to match with specified preference */
+        for (i = 1; i < MAX_PHY_LAYERS; i++) {
+                if (cnfg->phy_layers[i].id == i &&
+                     cnfg->phy_layers[i].pref == phy_pref &&
+                     cnfg->phy_layers[i].frm_layer != NULL) {
+                        caif_assert(cnfg->phy_layers != NULL);
+                        caif_assert(cnfg->phy_layers[i].id == i);
+                        return &cnfg->phy_layers[i].dev_info;
+                }
+        }
+        /* Otherwise just return something */
+        for (i = 1; i < MAX_PHY_LAYERS; i++) {
+                if (cnfg->phy_layers[i].id == i) {
+                        caif_assert(cnfg->phy_layers != NULL);
+                        caif_assert(cnfg->phy_layers[i].id == i);
+                        return &cnfg->phy_layers[i].dev_info;
+                }
+        }
+        return NULL;
+}
+static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
+                                                        u8 phyid)
+{
+        int i;
+        /* Try to match with specified preference */
+        for (i = 0; i < MAX_PHY_LAYERS; i++)
+                if (cnfg->phy_layers[i].frm_layer != NULL &&
+                    cnfg->phy_layers[i].id == phyid)
+                        return &cnfg->phy_layers[i];
+        return NULL;
+}
+int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
+{
+        int i;
+        /* Try to match with specified name */
+        for (i = 0; i < MAX_PHY_LAYERS; i++) {
+                if (cnfg->phy_layers[i].frm_layer != NULL
+                    && strcmp(cnfg->phy_layers[i].phy_layer->name,
+                              name) == 0)
+                        return cnfg->phy_layers[i].frm_layer->id;
+        }
+        return 0;
+}
+/*
+ * NOTE: What happens on destroy failure:
+ *       1a) No response - Too early
+ *            This will not happen because enumerate has already
+ *            completed.
+ *       1b) No response - FATAL
+ *            Not handled, but this should be a CAIF PROTOCOL ERROR
+ *            Modem error, response is really expected -  this
+ *            case is not really handled.
+ *       2) O/E-bit indicate error
+ *            Ignored - this link is destroyed anyway.
+ *       3) Not able to match on request
+ *            Not handled, but this should be a CAIF PROTOCOL ERROR
+ *       4) Link-Error - (no response)
+ *            Not handled, but this should be a CAIF PROTOCOL ERROR
+ */
+int cfcnfg_del_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
+{
+        u8 channel_id = 0;
+        int ret = 0;
+        struct cfcnfg_phyinfo *phyinfo = NULL;
+        u8 phyid = 0;
+        caif_assert(adap_layer != NULL);
+        channel_id = adap_layer->id;
+        if (channel_id == 0) {
+                pr_err("CAIF: %s():adap_layer->id is 0\n", __func__);
+                ret = -ENOTCONN;
+                goto end;
+        }
+        if (adap_layer->dn == NULL) {
+                pr_err("CAIF: %s():adap_layer->dn is NULL\n", __func__);
+                ret = -ENODEV;
+                goto end;
+        }
+        if (adap_layer->dn != NULL)
+                phyid = cfsrvl_getphyid(adap_layer->dn);
+        phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
+        if (phyinfo == NULL) {
+                pr_warning("CAIF: %s(): No interface to send disconnect to\n",
+                           __func__);
+                ret = -ENODEV;
+                goto end;
+        }
+        if (phyinfo->id != phyid
+                || phyinfo->phy_layer->id != phyid
+                || phyinfo->frm_layer->id != phyid) {
+                pr_err("CAIF: %s(): Inconsistency in phy registration\n",
+                        __func__);
+                ret = -EINVAL;
+                goto end;
+        }
+        ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
+end:
+        if (phyinfo != NULL && --phyinfo->phy_ref_count == 0 &&
+                phyinfo->phy_layer != NULL &&
+                phyinfo->phy_layer->modemcmd != NULL) {
+                phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
+                                             _CAIF_MODEMCMD_PHYIF_USELESS);
+        }
+        return ret;
+}
+EXPORT_SYMBOL(cfcnfg_del_adapt_layer);
+static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+                                  struct cflayer *client_layer)
+{
+        struct cfcnfg *cnfg = container_obj(layer);
+        struct cflayer *servl;
+        /*
+         * 1) Remove service from the MUX layer. The MUX must
+         *    guarante that no more payload sent "upwards" (receive)
+         */
+        servl = cfmuxl_remove_uplayer(cnfg->mux, linkid);
+        if (servl == NULL) {
+                pr_err("CAIF: %s(): PROTOCOL ERROR "
+                       "- Error removing service_layer Linkid(%d)",
+                        __func__, linkid);
+                return;
+        }
+        caif_assert(linkid == servl->id);
+        if (servl != client_layer && servl->up != client_layer) {
+                pr_err("CAIF: %s(): Error removing service_layer "
+                       "Linkid(%d) %p %p",
+                        __func__, linkid, (void *) servl,
+                        (void *) client_layer);
+                return;
+        }
+        /*
+         * 2) DEINIT_RSP must guarantee that no more packets are transmitted
+         *    from client (adap_layer) when it returns.
+         */
+        if (servl->ctrlcmd == NULL) {
+                pr_err("CAIF: %s(): Error servl->ctrlcmd == NULL", __func__);
+                return;
+        }
+        servl->ctrlcmd(servl, CAIF_CTRLCMD_DEINIT_RSP, 0);
+        /* 3) It is now safe to destroy the service layer. */
+        cfservl_destroy(servl);
+}
+/*
+ * NOTE: What happens on linksetup failure:
+ *       1a) No response - Too early
+ *            This will not happen because enumerate is secured
+ *            before using interface.
+ *       1b) No response - FATAL
+ *            Not handled, but this should be a CAIF PROTOCOL ERROR
+ *            Modem error, response is really expected -  this case is
+ *            not really handled.
+ *       2) O/E-bit indicate error
+ *            Handled in cnfg_reject_rsp
+ *       3) Not able to match on request
+ *            Not handled, but this should be a CAIF PROTOCOL ERROR
+ *       4) Link-Error - (no response)
+ *            Not handled, but this should be a CAIF PROTOCOL ERROR
+ */
+int
+cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
+                                struct cfctrl_link_param *param,
+                                struct cflayer *adap_layer)
+{
+        struct cflayer *frml;
+        if (adap_layer == NULL) {
+                pr_err("CAIF: %s(): adap_layer is zero", __func__);
+                return -EINVAL;
+        }
+        if (adap_layer->receive == NULL) {
+                pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__);
+                return -EINVAL;
+        }
+        if (adap_layer->ctrlcmd == NULL) {
+                pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__);
+                return -EINVAL;
+        }
+        frml = cnfg->phy_layers[param->phyid].frm_layer;
+        if (frml == NULL) {
+                pr_err("CAIF: %s(): Specified PHY type does not exist!",
+                        __func__);
+                return -ENODEV;
+        }
+        caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
+        caif_assert(cnfg->phy_layers[param->phyid].frm_layer->id ==
+                     param->phyid);
+        caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id ==
+                     param->phyid);
+        /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
+        cfctrl_enum_req(cnfg->ctrl, param->phyid);
+        cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
+        return 0;
+}
+EXPORT_SYMBOL(cfcnfg_add_adaptation_layer);
+static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+                             struct cflayer *adapt_layer)
+{
+        if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
+                adapt_layer->ctrlcmd(adapt_layer,
+                                     CAIF_CTRLCMD_INIT_FAIL_RSP, 0);
+}
+static void
+cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv,
+                 u8 phyid, struct cflayer *adapt_layer)
+{
+        struct cfcnfg *cnfg = container_obj(layer);
+        struct cflayer *servicel = NULL;
+        struct cfcnfg_phyinfo *phyinfo;
+        if (adapt_layer == NULL) {
+                pr_err("CAIF: %s(): PROTOCOL ERROR "
+                        "- LinkUp Request/Response did not match\n", __func__);
+                return;
+        }
+        caif_assert(cnfg != NULL);
+        caif_assert(phyid != 0);
+        phyinfo = &cnfg->phy_layers[phyid];
+        caif_assert(phyinfo != NULL);
+        caif_assert(phyinfo->id == phyid);
+        caif_assert(phyinfo->phy_layer != NULL);
+        caif_assert(phyinfo->phy_layer->id == phyid);
+        if (phyinfo != NULL &&
+            phyinfo->phy_ref_count++ == 0 &&
+            phyinfo->phy_layer != NULL &&
+            phyinfo->phy_layer->modemcmd != NULL) {
+                caif_assert(phyinfo->phy_layer->id == phyid);
+                phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
+                                             _CAIF_MODEMCMD_PHYIF_USEFULL);
+        }
+        adapt_layer->id = linkid;
+        switch (serv) {
+        case CFCTRL_SRV_VEI:
+                servicel = cfvei_create(linkid, &phyinfo->dev_info);
+                break;
+        case CFCTRL_SRV_DATAGRAM:
+                servicel = cfdgml_create(linkid, &phyinfo->dev_info);
+                break;
+        case CFCTRL_SRV_RFM:
+                servicel = cfrfml_create(linkid, &phyinfo->dev_info);
+                break;
+        case CFCTRL_SRV_UTIL:
+                servicel = cfutill_create(linkid, &phyinfo->dev_info);
+                break;
+        case CFCTRL_SRV_VIDEO:
+                servicel = cfvidl_create(linkid, &phyinfo->dev_info);
+                break;
+        case CFCTRL_SRV_DBG:
+                servicel = cfdbgl_create(linkid, &phyinfo->dev_info);
+                break;
+        default:
+                pr_err("CAIF: %s(): Protocol error. "
+                        "Link setup response - unknown channel type\n",
+                        __func__);
+                return;
+        }
+        if (!servicel) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return;
+        }
+        layer_set_dn(servicel, cnfg->mux);
+        cfmuxl_set_uplayer(cnfg->mux, servicel, linkid);
+        layer_set_up(servicel, adapt_layer);
+        layer_set_dn(adapt_layer, servicel);
+        servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
+}
+void
+cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
+                     void *dev, struct cflayer *phy_layer, u16 *phyid,
+                     enum cfcnfg_phy_preference pref,
+                     bool fcs, bool stx)
+{
+        struct cflayer *frml;
+        struct cflayer *phy_driver = NULL;
+        int i;
+        if (cnfg->phy_layers[cnfg->last_phyid].frm_layer == NULL) {
+                *phyid = cnfg->last_phyid;
+                /* range: * 1..(MAX_PHY_LAYERS-1) */
+                cnfg->last_phyid =
+                    (cnfg->last_phyid % (MAX_PHY_LAYERS - 1)) + 1;
+        } else {
+                *phyid = 0;
+                for (i = 1; i < MAX_PHY_LAYERS; i++) {
+                        if (cnfg->phy_layers[i].frm_layer == NULL) {
+                                *phyid = i;
+                                break;
+                        }
+                }
+        }
+        if (*phyid == 0) {
+                pr_err("CAIF: %s(): No Available PHY ID\n", __func__);
+                return;
+        }
+        switch (phy_type) {
+        case CFPHYTYPE_FRAG:
+                phy_driver =
+                    cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
+                if (!phy_driver) {
+                        pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                        return;
+                }
+                break;
+        case CFPHYTYPE_CAIF:
+                phy_driver = NULL;
+                break;
+        default:
+                pr_err("CAIF: %s(): %d", __func__, phy_type);
+                return;
+                break;
+        }
+        phy_layer->id = *phyid;
+        cnfg->phy_layers[*phyid].pref = pref;
+        cnfg->phy_layers[*phyid].id = *phyid;
+        cnfg->phy_layers[*phyid].dev_info.id = *phyid;
+        cnfg->phy_layers[*phyid].dev_info.dev = dev;
+        cnfg->phy_layers[*phyid].phy_layer = phy_layer;
+        cnfg->phy_layers[*phyid].phy_ref_count = 0;
+        phy_layer->type = phy_type;
+        frml = cffrml_create(*phyid, fcs);
+        if (!frml) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return;
+        }
+        cnfg->phy_layers[*phyid].frm_layer = frml;
+        cfmuxl_set_dnlayer(cnfg->mux, frml, *phyid);
+        layer_set_up(frml, cnfg->mux);
+        if (phy_driver != NULL) {
+                phy_driver->id = *phyid;
+                layer_set_dn(frml, phy_driver);
+                layer_set_up(phy_driver, frml);
+                layer_set_dn(phy_driver, phy_layer);
+                layer_set_up(phy_layer, phy_driver);
+        } else {
+                layer_set_dn(frml, phy_layer);
+                layer_set_up(phy_layer, frml);
+        }
+}
+EXPORT_SYMBOL(cfcnfg_add_phy_layer);
+int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer)
+{
+        struct cflayer *frml, *frml_dn;
+        u16 phyid;
+        phyid = phy_layer->id;
+        caif_assert(phyid == cnfg->phy_layers[phyid].id);
+        caif_assert(phy_layer == cnfg->phy_layers[phyid].phy_layer);
+        caif_assert(phy_layer->id == phyid);
+        caif_assert(cnfg->phy_layers[phyid].frm_layer->id == phyid);
+        memset(&cnfg->phy_layers[phy_layer->id], 0,
+               sizeof(struct cfcnfg_phyinfo));
+        frml = cfmuxl_remove_dnlayer(cnfg->mux, phy_layer->id);
+        frml_dn = frml->dn;
+        cffrml_set_uplayer(frml, NULL);
+        cffrml_set_dnlayer(frml, NULL);
+        kfree(frml);
+        if (phy_layer != frml_dn) {
+                layer_set_up(frml_dn, NULL);
+                layer_set_dn(frml_dn, NULL);
+                kfree(frml_dn);
+        }
+        layer_set_up(phy_layer, NULL);
+        return 0;
+}
+EXPORT_SYMBOL(cfcnfg_del_phy_layer);
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
new file mode 100644
index 000000000000..11f80140f3cb
--- /dev/null
+++ b/net/caif/cfctrl.c
@@ -0,0 +1,664 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfctrl.h>
+#define container_obj(layr) container_of(layr, struct cfctrl, serv.layer)
+#define UTILITY_NAME_LENGTH 16
+#define CFPKT_CTRL_PKT_LEN 20
+#ifdef CAIF_NO_LOOP
+static int handle_loop(struct cfctrl *ctrl,
+                              int cmd, struct cfpkt *pkt){
+        return CAIF_FAILURE;
+}
+#else
+static int handle_loop(struct cfctrl *ctrl,
+                int cmd, struct cfpkt *pkt);
+#endif
+static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);
+static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+                           int phyid);
+struct cflayer *cfctrl_create(void)
+{
+        struct cfctrl *this =
+                kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
+        if (!this) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
+        memset(this, 0, sizeof(*this));
+        spin_lock_init(&this->info_list_lock);
+        atomic_set(&this->req_seq_no, 1);
+        atomic_set(&this->rsp_seq_no, 1);
+        this->serv.dev_info.id = 0xff;
+        this->serv.layer.id = 0;
+        this->serv.layer.receive = cfctrl_recv;
+        sprintf(this->serv.layer.name, "ctrl");
+        this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
+        spin_lock_init(&this->loop_linkid_lock);
+        this->loop_linkid = 1;
+        return &this->serv.layer;
+}
+static bool param_eq(struct cfctrl_link_param *p1, struct cfctrl_link_param *p2)
+{
+        bool eq =
+            p1->linktype == p2->linktype &&
+            p1->priority == p2->priority &&
+            p1->phyid == p2->phyid &&
+            p1->endpoint == p2->endpoint && p1->chtype == p2->chtype;
+        if (!eq)
+                return false;
+        switch (p1->linktype) {
+        case CFCTRL_SRV_VEI:
+                return true;
+        case CFCTRL_SRV_DATAGRAM:
+                return p1->u.datagram.connid == p2->u.datagram.connid;
+        case CFCTRL_SRV_RFM:
+                return
+                    p1->u.rfm.connid == p2->u.rfm.connid &&
+                    strcmp(p1->u.rfm.volume, p2->u.rfm.volume) == 0;
+        case CFCTRL_SRV_UTIL:
+                return
+                    p1->u.utility.fifosize_kb == p2->u.utility.fifosize_kb
+                    && p1->u.utility.fifosize_bufs ==
+                    p2->u.utility.fifosize_bufs
+                    && strcmp(p1->u.utility.name, p2->u.utility.name) == 0
+                    && p1->u.utility.paramlen == p2->u.utility.paramlen
+                    && memcmp(p1->u.utility.params, p2->u.utility.params,
+                              p1->u.utility.paramlen) == 0;
+        case CFCTRL_SRV_VIDEO:
+                return p1->u.video.connid == p2->u.video.connid;
+        case CFCTRL_SRV_DBG:
+                return true;
+        case CFCTRL_SRV_DECM:
+                return false;
+        default:
+                return false;
+        }
+        return false;
+}
+bool cfctrl_req_eq(struct cfctrl_request_info *r1,
+                   struct cfctrl_request_info *r2)
+{
+        if (r1->cmd != r2->cmd)
+                return false;
+        if (r1->cmd == CFCTRL_CMD_LINK_SETUP)
+                return param_eq(&r1->param, &r2->param);
+        else
+                return r1->channel_id == r2->channel_id;
+}
+/* Insert request at the end */
+void cfctrl_insert_req(struct cfctrl *ctrl,
+                              struct cfctrl_request_info *req)
+{
+        struct cfctrl_request_info *p;
+        spin_lock(&ctrl->info_list_lock);
+        req->next = NULL;
+        atomic_inc(&ctrl->req_seq_no);
+        req->sequence_no = atomic_read(&ctrl->req_seq_no);
+        if (ctrl->first_req == NULL) {
+                ctrl->first_req = req;
+                spin_unlock(&ctrl->info_list_lock);
+                return;
+        }
+        p = ctrl->first_req;
+        while (p->next != NULL)
+                p = p->next;
+        p->next = req;
+        spin_unlock(&ctrl->info_list_lock);
+}
+static void cfctrl_insert_req2(struct cfctrl *ctrl, enum cfctrl_cmd cmd,
+                               u8 linkid, struct cflayer *user_layer)
+{
+        struct cfctrl_request_info *req = kmalloc(sizeof(*req), GFP_KERNEL);
+        if (!req) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return;
+        }
+        req->client_layer = user_layer;
+        req->cmd = cmd;
+        req->channel_id = linkid;
+        cfctrl_insert_req(ctrl, req);
+}
+/* Compare and remove request */
+struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
+                                              struct cfctrl_request_info *req)
+{
+        struct cfctrl_request_info *p;
+        struct cfctrl_request_info *ret;
+        spin_lock(&ctrl->info_list_lock);
+        if (ctrl->first_req == NULL) {
+                spin_unlock(&ctrl->info_list_lock);
+                return NULL;
+        }
+        if (cfctrl_req_eq(req, ctrl->first_req)) {
+                ret = ctrl->first_req;
+                caif_assert(ctrl->first_req);
+                atomic_set(&ctrl->rsp_seq_no,
+                                 ctrl->first_req->sequence_no);
+                ctrl->first_req = ctrl->first_req->next;
+                spin_unlock(&ctrl->info_list_lock);
+                return ret;
+        }
+        p = ctrl->first_req;
+        while (p->next != NULL) {
+                if (cfctrl_req_eq(req, p->next)) {
+                        pr_warning("CAIF: %s(): Requests are not "
+                                        "received in order\n",
+                                        __func__);
+                        ret = p->next;
+                        atomic_set(&ctrl->rsp_seq_no,
+                                        p->next->sequence_no);
+                        p->next = p->next->next;
+                        spin_unlock(&ctrl->info_list_lock);
+                        return ret;
+                }
+                p = p->next;
+        }
+        spin_unlock(&ctrl->info_list_lock);
+        pr_warning("CAIF: %s(): Request does not match\n",
+                   __func__);
+        return NULL;
+}
+struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
+{
+        struct cfctrl *this = container_obj(layer);
+        return &this->res;
+}
+void cfctrl_set_dnlayer(struct cflayer *this, struct cflayer *dn)
+{
+        this->dn = dn;
+}
+void cfctrl_set_uplayer(struct cflayer *this, struct cflayer *up)
+{
+        this->up = up;
+}
+static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl)
+{
+        info->hdr_len = 0;
+        info->channel_id = cfctrl->serv.layer.id;
+        info->dev_info = &cfctrl->serv.dev_info;
+}
+void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
+{
+        struct cfctrl *cfctrl = container_obj(layer);
+        int ret;
+        struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+        if (!pkt) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return;
+        }
+        caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
+        init_info(cfpkt_info(pkt), cfctrl);
+        cfpkt_info(pkt)->dev_info->id = physlinkid;
+        cfctrl->serv.dev_info.id = physlinkid;
+        cfpkt_addbdy(pkt, CFCTRL_CMD_ENUM);
+        cfpkt_addbdy(pkt, physlinkid);
+        ret =
+            cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+        if (ret < 0) {
+                pr_err("CAIF: %s(): Could not transmit enum message\n",
+                        __func__);
+                cfpkt_destroy(pkt);
+        }
+}
+void cfctrl_linkup_request(struct cflayer *layer,
+                           struct cfctrl_link_param *param,
+                           struct cflayer *user_layer)
+{
+        struct cfctrl *cfctrl = container_obj(layer);
+        u32 tmp32;
+        u16 tmp16;
+        u8 tmp8;
+        struct cfctrl_request_info *req;
+        int ret;
+        char utility_name[16];
+        struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+        if (!pkt) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return;
+        }
+        cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
+        cfpkt_addbdy(pkt, (param->chtype << 4) + param->linktype);
+        cfpkt_addbdy(pkt, (param->priority << 3) + param->phyid);
+        cfpkt_addbdy(pkt, param->endpoint & 0x03);
+        switch (param->linktype) {
+        case CFCTRL_SRV_VEI:
+                break;
+        case CFCTRL_SRV_VIDEO:
+                cfpkt_addbdy(pkt, (u8) param->u.video.connid);
+                break;
+        case CFCTRL_SRV_DBG:
+                break;
+        case CFCTRL_SRV_DATAGRAM:
+                tmp32 = cpu_to_le32(param->u.datagram.connid);
+                cfpkt_add_body(pkt, &tmp32, 4);
+                break;
+        case CFCTRL_SRV_RFM:
+                /* Construct a frame, convert DatagramConnectionID to network
+                 * format long and copy it out...
+                 */
+                tmp32 = cpu_to_le32(param->u.rfm.connid);
+                cfpkt_add_body(pkt, &tmp32, 4);
+                /* Add volume name, including zero termination... */
+                cfpkt_add_body(pkt, param->u.rfm.volume,
+                               strlen(param->u.rfm.volume) + 1);
+                break;
+        case CFCTRL_SRV_UTIL:
+                tmp16 = cpu_to_le16(param->u.utility.fifosize_kb);
+                cfpkt_add_body(pkt, &tmp16, 2);
+                tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
+                cfpkt_add_body(pkt, &tmp16, 2);
+                memset(utility_name, 0, sizeof(utility_name));
+                strncpy(utility_name, param->u.utility.name,
+                        UTILITY_NAME_LENGTH - 1);
+                cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
+                tmp8 = param->u.utility.paramlen;
+                cfpkt_add_body(pkt, &tmp8, 1);
+                cfpkt_add_body(pkt, param->u.utility.params,
+                               param->u.utility.paramlen);
+                break;
+        default:
+                pr_warning("CAIF: %s():Request setup of bad link type = %d\n",
+                           __func__, param->linktype);
+        }
+        req = kmalloc(sizeof(*req), GFP_KERNEL);
+        if (!req) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return;
+        }
+        memset(req, 0, sizeof(*req));
+        req->client_layer = user_layer;
+        req->cmd = CFCTRL_CMD_LINK_SETUP;
+        req->param = *param;
+        cfctrl_insert_req(cfctrl, req);
+        init_info(cfpkt_info(pkt), cfctrl);
+        cfpkt_info(pkt)->dev_info->id = param->phyid;
+        ret =
+            cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+        if (ret < 0) {
+                pr_err("CAIF: %s(): Could not transmit linksetup request\n",
+                        __func__);
+                cfpkt_destroy(pkt);
+        }
+}
+int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
+                                struct cflayer *client)
+{
+        int ret;
+        struct cfctrl *cfctrl = container_obj(layer);
+        struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+        if (!pkt) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return -ENOMEM;
+        }
+        cfctrl_insert_req2(cfctrl, CFCTRL_CMD_LINK_DESTROY, channelid, client);
+        cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
+        cfpkt_addbdy(pkt, channelid);
+        init_info(cfpkt_info(pkt), cfctrl);
+        ret =
+            cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+        if (ret < 0) {
+                pr_err("CAIF: %s(): Could not transmit link-down request\n",
+                        __func__);
+                cfpkt_destroy(pkt);
+        }
+        return ret;
+}
+void cfctrl_sleep_req(struct cflayer *layer)
+{
+        int ret;
+        struct cfctrl *cfctrl = container_obj(layer);
+        struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+        if (!pkt) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return;
+        }
+        cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
+        init_info(cfpkt_info(pkt), cfctrl);
+        ret =
+            cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+        if (ret < 0)
+                cfpkt_destroy(pkt);
+}
+void cfctrl_wake_req(struct cflayer *layer)
+{
+        int ret;
+        struct cfctrl *cfctrl = container_obj(layer);
+        struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+        if (!pkt) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return;
+        }
+        cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
+        init_info(cfpkt_info(pkt), cfctrl);
+        ret =
+            cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+        if (ret < 0)
+                cfpkt_destroy(pkt);
+}
+void cfctrl_getstartreason_req(struct cflayer *layer)
+{
+        int ret;
+        struct cfctrl *cfctrl = container_obj(layer);
+        struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+        if (!pkt) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return;
+        }
+        cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
+        init_info(cfpkt_info(pkt), cfctrl);
+        ret =
+            cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+        if (ret < 0)
+                cfpkt_destroy(pkt);
+}
+static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
+{
+        u8 cmdrsp;
+        u8 cmd;
+        int ret = -1;
+        u16 tmp16;
+        u8 len;
+        u8 param[255];
+        u8 linkid;
+        struct cfctrl *cfctrl = container_obj(layer);
+        struct cfctrl_request_info rsp, *req;
+        cfpkt_extr_head(pkt, &cmdrsp, 1);
+        cmd = cmdrsp & CFCTRL_CMD_MASK;
+        if (cmd != CFCTRL_CMD_LINK_ERR
+            && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
+                if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) {
+                        pr_info("CAIF: %s() CAIF Protocol error:"
+                                "Response bit not set\n", __func__);
+                        goto error;
+                }
+        }
+        switch (cmd) {
+        case CFCTRL_CMD_LINK_SETUP:
+                {
+                        enum cfctrl_srv serv;
+                        enum cfctrl_srv servtype;
+                        u8 endpoint;
+                        u8 physlinkid;
+                        u8 prio;
+                        u8 tmp;
+                        u32 tmp32;
+                        u8 *cp;
+                        int i;
+                        struct cfctrl_link_param linkparam;
+                        memset(&linkparam, 0, sizeof(linkparam));
+                        cfpkt_extr_head(pkt, &tmp, 1);
+                        serv = tmp & CFCTRL_SRV_MASK;
+                        linkparam.linktype = serv;
+                        servtype = tmp >> 4;
+                        linkparam.chtype = servtype;
+                        cfpkt_extr_head(pkt, &tmp, 1);
+                        physlinkid = tmp & 0x07;
+                        prio = tmp >> 3;
+                        linkparam.priority = prio;
+                        linkparam.phyid = physlinkid;
+                        cfpkt_extr_head(pkt, &endpoint, 1);
+                        linkparam.endpoint = endpoint & 0x03;
+                        switch (serv) {
+                        case CFCTRL_SRV_VEI:
+                        case CFCTRL_SRV_DBG:
+                                /* Link ID */
+                                cfpkt_extr_head(pkt, &linkid, 1);
+                                break;
+                        case CFCTRL_SRV_VIDEO:
+                                cfpkt_extr_head(pkt, &tmp, 1);
+                                linkparam.u.video.connid = tmp;
+                                /* Link ID */
+                                cfpkt_extr_head(pkt, &linkid, 1);
+                                break;
+                        case CFCTRL_SRV_DATAGRAM:
+                                cfpkt_extr_head(pkt, &tmp32, 4);
+                                linkparam.u.datagram.connid =
+                                    le32_to_cpu(tmp32);
+                                /* Link ID */
+                                cfpkt_extr_head(pkt, &linkid, 1);
+                                break;
+                        case CFCTRL_SRV_RFM:
+                                /* Construct a frame, convert
+                                 * DatagramConnectionID
+                                 * to network format long and copy it out...
+                                 */
+                                cfpkt_extr_head(pkt, &tmp32, 4);
+                                linkparam.u.rfm.connid =
+                                  le32_to_cpu(tmp32);
+                                cp = (u8 *) linkparam.u.rfm.volume;
+                                for (cfpkt_extr_head(pkt, &tmp, 1);
+                                     cfpkt_more(pkt) && tmp != '\0';
+                                     cfpkt_extr_head(pkt, &tmp, 1))
+                                        *cp++ = tmp;
+                                *cp = '\0';
+                                /* Link ID */
+                                cfpkt_extr_head(pkt, &linkid, 1);
+                                break;
+                        case CFCTRL_SRV_UTIL:
+                                /* Construct a frame, convert
+                                 * DatagramConnectionID
+                                 * to network format long and copy it out...
+                                 */
+                                /* Fifosize KB */
+                                cfpkt_extr_head(pkt, &tmp16, 2);
+                                linkparam.u.utility.fifosize_kb =
+                                    le16_to_cpu(tmp16);
+                                /* Fifosize bufs */
+                                cfpkt_extr_head(pkt, &tmp16, 2);
+                                linkparam.u.utility.fifosize_bufs =
+                                    le16_to_cpu(tmp16);
+                                /* name */
+                                cp = (u8 *) linkparam.u.utility.name;
+                                caif_assert(sizeof(linkparam.u.utility.name)
+                                             >= UTILITY_NAME_LENGTH);
+                                for (i = 0;
+                                     i < UTILITY_NAME_LENGTH
+                                     && cfpkt_more(pkt); i++) {
+                                        cfpkt_extr_head(pkt, &tmp, 1);
+                                        *cp++ = tmp;
+                                }
+                                /* Length */
+                                cfpkt_extr_head(pkt, &len, 1);
+                                linkparam.u.utility.paramlen = len;
+                                /* Param Data */
+                                cp = linkparam.u.utility.params;
+                                while (cfpkt_more(pkt) && len--) {
+                                        cfpkt_extr_head(pkt, &tmp, 1);
+                                        *cp++ = tmp;
+                                }
+                                /* Link ID */
+                                cfpkt_extr_head(pkt, &linkid, 1);
+                                /* Length */
+                                cfpkt_extr_head(pkt, &len, 1);
+                                /* Param Data */
+                                cfpkt_extr_head(pkt, &param, len);
+                                break;
+                        default:
+                                pr_warning("CAIF: %s(): Request setup "
+                                           "- invalid link type (%d)",
+                                           __func__, serv);
+                                goto error;
+                        }
+                        rsp.cmd = cmd;
+                        rsp.param = linkparam;
+                        req = cfctrl_remove_req(cfctrl, &rsp);
+                        if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
+                                cfpkt_erroneous(pkt)) {
+                                pr_err("CAIF: %s(): Invalid O/E bit or parse "
+                                       "error on CAIF control channel",
+                                        __func__);
+                                cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
+                                                       0,
+                                                       req ? req->client_layer
+                                                       : NULL);
+                        } else {
+                                cfctrl->res.linksetup_rsp(cfctrl->serv.
+                                                          layer.up, linkid,
+                                                          serv, physlinkid,
+                                                          req ? req->
+                                                          client_layer : NULL);
+                        }
+                        if (req != NULL)
+                                kfree(req);
+                }
+                break;
+        case CFCTRL_CMD_LINK_DESTROY:
+                cfpkt_extr_head(pkt, &linkid, 1);
+                rsp.cmd = cmd;
+                rsp.channel_id = linkid;
+                req = cfctrl_remove_req(cfctrl, &rsp);
+                cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid,
+                                            req ? req->client_layer : NULL);
+                if (req != NULL)
+                        kfree(req);
+                break;
+        case CFCTRL_CMD_LINK_ERR:
+                pr_err("CAIF: %s(): Frame Error Indication received\n",
+                        __func__);
+                cfctrl->res.linkerror_ind();
+                break;
+        case CFCTRL_CMD_ENUM:
+                cfctrl->res.enum_rsp();
+                break;
+        case CFCTRL_CMD_SLEEP:
+                cfctrl->res.sleep_rsp();
+                break;
+        case CFCTRL_CMD_WAKE:
+                cfctrl->res.wake_rsp();
+                break;
+        case CFCTRL_CMD_LINK_RECONF:
+                cfctrl->res.restart_rsp();
+                break;
+        case CFCTRL_CMD_RADIO_SET:
+                cfctrl->res.radioset_rsp();
+                break;
+        default:
+                pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__);
+                goto error;
+                break;
+        }
+        ret = 0;
+error:
+        cfpkt_destroy(pkt);
+        return ret;
+}
+static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+                        int phyid)
+{
+        struct cfctrl *this = container_obj(layr);
+        switch (ctrl) {
+        case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
+        case CAIF_CTRLCMD_FLOW_OFF_IND:
+                spin_lock(&this->info_list_lock);
+                if (this->first_req != NULL) {
+                        pr_warning("CAIF: %s(): Received flow off in "
+                                   "control layer", __func__);
+                }
+                spin_unlock(&this->info_list_lock);
+                break;
+        default:
+                break;
+        }
+}
+#ifndef CAIF_NO_LOOP
+static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
+{
+        static int last_linkid;
+        u8 linkid, linktype, tmp;
+        switch (cmd) {
+        case CFCTRL_CMD_LINK_SETUP:
+                spin_lock(&ctrl->loop_linkid_lock);
+                for (linkid = last_linkid + 1; linkid < 255; linkid++)
+                        if (!ctrl->loop_linkused[linkid])
+                                goto found;
+                for (linkid = last_linkid - 1; linkid > 0; linkid--)
+                        if (!ctrl->loop_linkused[linkid])
+                                goto found;
+                spin_unlock(&ctrl->loop_linkid_lock);
+                return -EINVAL;
+found:
+                if (!ctrl->loop_linkused[linkid])
+                        ctrl->loop_linkused[linkid] = 1;
+                last_linkid = linkid;
+                cfpkt_add_trail(pkt, &linkid, 1);
+                spin_unlock(&ctrl->loop_linkid_lock);
+                cfpkt_peek_head(pkt, &linktype, 1);
+                if (linktype ==  CFCTRL_SRV_UTIL) {
+                        tmp = 0x01;
+                        cfpkt_add_trail(pkt, &tmp, 1);
+                        cfpkt_add_trail(pkt, &tmp, 1);
+                }
+                break;
+        case CFCTRL_CMD_LINK_DESTROY:
+                spin_lock(&ctrl->loop_linkid_lock);
+                cfpkt_peek_head(pkt, &linkid, 1);
+                ctrl->loop_linkused[linkid] = 0;
+                spin_unlock(&ctrl->loop_linkid_lock);
+                break;
+        default:
+                break;
+        }
+        return CAIF_SUCCESS;
+}
+#endif
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
new file mode 100644
index 000000000000..ab6b6dc34cf8
--- /dev/null
+++ b/net/caif/cfdbgl.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
+{
+        struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+        if (!dbg) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        caif_assert(offsetof(struct cfsrvl, layer) == 0);
+        memset(dbg, 0, sizeof(struct cfsrvl));
+        cfsrvl_init(dbg, channel_id, dev_info);
+        dbg->layer.receive = cfdbgl_receive;
+        dbg->layer.transmit = cfdbgl_transmit;
+        snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
+        return &dbg->layer;
+}
+static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+        return layr->up->receive(layr->up, pkt);
+}
+static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+        return layr->dn->transmit(layr->dn, pkt);
+}
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
new file mode 100644
index 000000000000..53194840ecb6
--- /dev/null
+++ b/net/caif/cfdgml.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+#define container_obj(layr) ((struct cfsrvl *) layr)
+#define DGM_CMD_BIT  0x80
+#define DGM_FLOW_OFF 0x81
+#define DGM_FLOW_ON  0x80
+#define DGM_CTRL_PKT_SIZE 1
+static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
+struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
+{
+        struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+        if (!dgm) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        caif_assert(offsetof(struct cfsrvl, layer) == 0);
+        memset(dgm, 0, sizeof(struct cfsrvl));
+        cfsrvl_init(dgm, channel_id, dev_info);
+        dgm->layer.receive = cfdgml_receive;
+        dgm->layer.transmit = cfdgml_transmit;
+        snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
+        dgm->layer.name[CAIF_LAYER_NAME_SZ - 1] = '\0';
+        return &dgm->layer;
+}
+static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u8 cmd = -1;
+        u8 dgmhdr[3];
+        int ret;
+        caif_assert(layr->up != NULL);
+        caif_assert(layr->receive != NULL);
+        caif_assert(layr->ctrlcmd != NULL);
+        if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
+                pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                cfpkt_destroy(pkt);
+                return -EPROTO;
+        }
+        if ((cmd & DGM_CMD_BIT) == 0) {
+                if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
+                        pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                        cfpkt_destroy(pkt);
+                        return -EPROTO;
+                }
+                ret = layr->up->receive(layr->up, pkt);
+                return ret;
+        }
+        switch (cmd) {
+        case DGM_FLOW_OFF:      /* FLOW OFF */
+                layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
+                cfpkt_destroy(pkt);
+                return 0;
+        case DGM_FLOW_ON:       /* FLOW ON */
+                layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
+                cfpkt_destroy(pkt);
+                return 0;
+        default:
+                cfpkt_destroy(pkt);
+                pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n",
+                        __func__, cmd, cmd);
+                return -EPROTO;
+        }
+}
+static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u32 zero = 0;
+        struct caif_payload_info *info;
+        struct cfsrvl *service = container_obj(layr);
+        int ret;
+        if (!cfsrvl_ready(service, &ret))
+                return ret;
+        cfpkt_add_head(pkt, &zero, 4);
+        /* Add info for MUX-layer to route the packet out. */
+        info = cfpkt_info(pkt);
+        info->channel_id = service->layer.id;
+        /* To optimize alignment, we add up the size of CAIF header
+         * before payload.
+         */
+        info->hdr_len = 4;
+        info->dev_info = &service->dev_info;
+        ret = layr->dn->transmit(layr->dn, pkt);
+        if (ret < 0) {
+                u32 tmp32;
+                cfpkt_extr_head(pkt, &tmp32, 4);
+        }
+        return ret;
+}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
new file mode 100644
index 000000000000..e86a4ca3b217
--- /dev/null
+++ b/net/caif/cffrml.c
@@ -0,0 +1,151 @@
+/*
+ * CAIF Framing Layer.
+ *
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/crc-ccitt.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cffrml.h>
+#define container_obj(layr) container_of(layr, struct cffrml, layer)
+struct cffrml {
+        struct cflayer layer;
+        bool dofcs;             /* !< FCS active */
+};
+static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+                                int phyid);
+static u32 cffrml_rcv_error;
+static u32 cffrml_rcv_checsum_error;
+struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
+{
+        struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
+        if (!this) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        caif_assert(offsetof(struct cffrml, layer) == 0);
+        memset(this, 0, sizeof(struct cflayer));
+        this->layer.receive = cffrml_receive;
+        this->layer.transmit = cffrml_transmit;
+        this->layer.ctrlcmd = cffrml_ctrlcmd;
+        snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "frm%d", phyid);
+        this->dofcs = use_fcs;
+        this->layer.id = phyid;
+        return (struct cflayer *) this;
+}
+void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up)
+{
+        this->up = up;
+}
+void cffrml_set_dnlayer(struct cflayer *this, struct cflayer *dn)
+{
+        this->dn = dn;
+}
+static u16 cffrml_checksum(u16 chks, void *buf, u16 len)
+{
+        /* FIXME: FCS should be moved to glue in order to use OS-Specific
+         * solutions
+         */
+        return crc_ccitt(chks, buf, len);
+}
+static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u16 tmp;
+        u16 len;
+        u16 hdrchks;
+        u16 pktchks;
+        struct cffrml *this;
+        this = container_obj(layr);
+        cfpkt_extr_head(pkt, &tmp, 2);
+        len = le16_to_cpu(tmp);
+        /* Subtract for FCS on length if FCS is not used. */
+        if (!this->dofcs)
+                len -= 2;
+        if (cfpkt_setlen(pkt, len) < 0) {
+                ++cffrml_rcv_error;
+                pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len);
+                cfpkt_destroy(pkt);
+                return -EPROTO;
+        }
+        /*
+         * Don't do extract if FCS is false, rather do setlen - then we don't
+         * get a cache-miss.
+         */
+        if (this->dofcs) {
+                cfpkt_extr_trail(pkt, &tmp, 2);
+                hdrchks = le16_to_cpu(tmp);
+                pktchks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
+                if (pktchks != hdrchks) {
+                        cfpkt_add_trail(pkt, &tmp, 2);
+                        ++cffrml_rcv_error;
+                        ++cffrml_rcv_checsum_error;
+                        pr_info("CAIF: %s(): Frame checksum error "
+                                "(0x%x != 0x%x)\n", __func__, hdrchks, pktchks);
+                        return -EILSEQ;
+                }
+        }
+        if (cfpkt_erroneous(pkt)) {
+                ++cffrml_rcv_error;
+                pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                cfpkt_destroy(pkt);
+                return -EPROTO;
+        }
+        return layr->up->receive(layr->up, pkt);
+}
+static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+        int tmp;
+        u16 chks;
+        u16 len;
+        int ret;
+        struct cffrml *this = container_obj(layr);
+        if (this->dofcs) {
+                chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
+                tmp = cpu_to_le16(chks);
+                cfpkt_add_trail(pkt, &tmp, 2);
+        } else {
+                cfpkt_pad_trail(pkt, 2);
+        }
+        len = cfpkt_getlen(pkt);
+        tmp = cpu_to_le16(len);
+        cfpkt_add_head(pkt, &tmp, 2);
+        cfpkt_info(pkt)->hdr_len += 2;
+        if (cfpkt_erroneous(pkt)) {
+                pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                return -EPROTO;
+        }
+        ret = layr->dn->transmit(layr->dn, pkt);
+        if (ret < 0) {
+                /* Remove header on faulty packet. */
+                cfpkt_extr_head(pkt, &tmp, 2);
+        }
+        return ret;
+}
+static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+                                        int phyid)
+{
+        if (layr->up->ctrlcmd)
+                layr->up->ctrlcmd(layr->up, ctrl, layr->id);
+}
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
new file mode 100644
index 000000000000..6fb9f9e96cf8
--- /dev/null
+++ b/net/caif/cfmuxl.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfmuxl.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cffrml.h>
+#define container_obj(layr) container_of(layr, struct cfmuxl, layer)
+#define CAIF_CTRL_CHANNEL 0
+#define UP_CACHE_SIZE 8
+#define DN_CACHE_SIZE 8
+struct cfmuxl {
+        struct cflayer layer;
+        struct list_head srvl_list;
+        struct list_head frml_list;
+        struct cflayer *up_cache[UP_CACHE_SIZE];
+        struct cflayer *dn_cache[DN_CACHE_SIZE];
+        /*
+         * Set when inserting or removing downwards layers.
+         */
+        spinlock_t transmit_lock;
+        /*
+         * Set when inserting or removing upwards layers.
+         */
+        spinlock_t receive_lock;
+};
+static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+                                int phyid);
+static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);
+struct cflayer *cfmuxl_create(void)
+{
+        struct cfmuxl *this = kmalloc(sizeof(struct cfmuxl), GFP_ATOMIC);
+        if (!this)
+                return NULL;
+        memset(this, 0, sizeof(*this));
+        this->layer.receive = cfmuxl_receive;
+        this->layer.transmit = cfmuxl_transmit;
+        this->layer.ctrlcmd = cfmuxl_ctrlcmd;
+        INIT_LIST_HEAD(&this->srvl_list);
+        INIT_LIST_HEAD(&this->frml_list);
+        spin_lock_init(&this->transmit_lock);
+        spin_lock_init(&this->receive_lock);
+        snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "mux");
+        return &this->layer;
+}
+int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid)
+{
+        struct cfmuxl *muxl = container_obj(layr);
+        spin_lock(&muxl->receive_lock);
+        list_add(&up->node, &muxl->srvl_list);
+        spin_unlock(&muxl->receive_lock);
+        return 0;
+}
+bool cfmuxl_is_phy_inuse(struct cflayer *layr, u8 phyid)
+{
+        struct list_head *node;
+        struct cflayer *layer;
+        struct cfmuxl *muxl = container_obj(layr);
+        bool match = false;
+        spin_lock(&muxl->receive_lock);
+        list_for_each(node, &muxl->srvl_list) {
+                layer = list_entry(node, struct cflayer, node);
+                if (cfsrvl_phyid_match(layer, phyid)) {
+                        match = true;
+                        break;
+                }
+        }
+        spin_unlock(&muxl->receive_lock);
+        return match;
+}
+u8 cfmuxl_get_phyid(struct cflayer *layr, u8 channel_id)
+{
+        struct cflayer *up;
+        int phyid;
+        struct cfmuxl *muxl = container_obj(layr);
+        spin_lock(&muxl->receive_lock);
+        up = get_up(muxl, channel_id);
+        if (up != NULL)
+                phyid = cfsrvl_getphyid(up);
+        else
+                phyid = 0;
+        spin_unlock(&muxl->receive_lock);
+        return phyid;
+}
+int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *dn, u8 phyid)
+{
+        struct cfmuxl *muxl = (struct cfmuxl *) layr;
+        spin_lock(&muxl->transmit_lock);
+        list_add(&dn->node, &muxl->frml_list);
+        spin_unlock(&muxl->transmit_lock);
+        return 0;
+}
+static struct cflayer *get_from_id(struct list_head *list, u16 id)
+{
+        struct list_head *node;
+        struct cflayer *layer;
+        list_for_each(node, list) {
+                layer = list_entry(node, struct cflayer, node);
+                if (layer->id == id)
+                        return layer;
+        }
+        return NULL;
+}
+struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid)
+{
+        struct cfmuxl *muxl = container_obj(layr);
+        struct cflayer *dn;
+        spin_lock(&muxl->transmit_lock);
+        memset(muxl->dn_cache, 0, sizeof(muxl->dn_cache));
+        dn = get_from_id(&muxl->frml_list, phyid);
+        if (dn == NULL) {
+                spin_unlock(&muxl->transmit_lock);
+                return NULL;
+        }
+        list_del(&dn->node);
+        caif_assert(dn != NULL);
+        spin_unlock(&muxl->transmit_lock);
+        return dn;
+}
+/* Invariant: lock is taken */
+static struct cflayer *get_up(struct cfmuxl *muxl, u16 id)
+{
+        struct cflayer *up;
+        int idx = id % UP_CACHE_SIZE;
+        up = muxl->up_cache[idx];
+        if (up == NULL || up->id != id) {
+                up = get_from_id(&muxl->srvl_list, id);
+                muxl->up_cache[idx] = up;
+        }
+        return up;
+}
+/* Invariant: lock is taken */
+static struct cflayer *get_dn(struct cfmuxl *muxl, struct dev_info *dev_info)
+{
+        struct cflayer *dn;
+        int idx = dev_info->id % DN_CACHE_SIZE;
+        dn = muxl->dn_cache[idx];
+        if (dn == NULL || dn->id != dev_info->id) {
+                dn = get_from_id(&muxl->frml_list, dev_info->id);
+                muxl->dn_cache[idx] = dn;
+        }
+        return dn;
+}
+struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
+{
+        struct cflayer *up;
+        struct cfmuxl *muxl = container_obj(layr);
+        spin_lock(&muxl->receive_lock);
+        up = get_up(muxl, id);
+        memset(muxl->up_cache, 0, sizeof(muxl->up_cache));
+        list_del(&up->node);
+        spin_unlock(&muxl->receive_lock);
+        return up;
+}
+static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+        int ret;
+        struct cfmuxl *muxl = container_obj(layr);
+        u8 id;
+        struct cflayer *up;
+        if (cfpkt_extr_head(pkt, &id, 1) < 0) {
+                pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__);
+                cfpkt_destroy(pkt);
+                return -EPROTO;
+        }
+        spin_lock(&muxl->receive_lock);
+        up = get_up(muxl, id);
+        spin_unlock(&muxl->receive_lock);
+        if (up == NULL) {
+                pr_info("CAIF: %s():Received data on unknown link ID = %d "
+                        "(0x%x)  up == NULL", __func__, id, id);
+                cfpkt_destroy(pkt);
+                /*
+                 * Don't return ERROR, since modem misbehaves and sends out
+                 * flow on before linksetup response.
+                 */
+                return /* CFGLU_EPROT; */ 0;
+        }
+        ret = up->receive(up, pkt);
+        return ret;
+}
+static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+        int ret;
+        struct cfmuxl *muxl = container_obj(layr);
+        u8 linkid;
+        struct cflayer *dn;
+        struct caif_payload_info *info = cfpkt_info(pkt);
+        dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
+        if (dn == NULL) {
+                pr_warning("CAIF: %s(): Send data on unknown phy "
+                           "ID = %d (0x%x)\n",
+                           __func__, info->dev_info->id, info->dev_info->id);
+                return -ENOTCONN;
+        }
+        info->hdr_len += 1;
+        linkid = info->channel_id;
+        cfpkt_add_head(pkt, &linkid, 1);
+        ret = dn->transmit(dn, pkt);
+        /* Remove MUX protocol header upon error. */
+        if (ret < 0)
+                cfpkt_extr_head(pkt, &linkid, 1);
+        return ret;
+}
+static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+                                int phyid)
+{
+        struct cfmuxl *muxl = container_obj(layr);
+        struct list_head *node;
+        struct cflayer *layer;
+        list_for_each(node, &muxl->srvl_list) {
+                layer = list_entry(node, struct cflayer, node);
+                if (cfsrvl_phyid_match(layer, phyid))
+                        layer->ctrlcmd(layer, ctrl, phyid);
+        }
+}
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
new file mode 100644
index 000000000000..83fff2ff6658
--- /dev/null
+++ b/net/caif/cfpkt_skbuff.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/hardirq.h>
+#include <net/caif/cfpkt.h>
+#define PKT_PREFIX CAIF_NEEDED_HEADROOM
+#define PKT_POSTFIX CAIF_NEEDED_TAILROOM
+#define PKT_LEN_WHEN_EXTENDING 128
+#define PKT_ERROR(pkt, errmsg) do {        \
+    cfpkt_priv(pkt)->erronous = true;      \
+    skb_reset_tail_pointer(&pkt->skb);     \
+    pr_warning("CAIF: " errmsg);\
+  } while (0)
+struct cfpktq {
+        struct sk_buff_head head;
+        atomic_t count;
+        /* Lock protects count updates */
+        spinlock_t lock;
+};
+/*
+ * net/caif/ is generic and does not
+ * understand SKB, so we do this typecast
+ */
+struct cfpkt {
+        struct sk_buff skb;
+};
+/* Private data inside SKB */
+struct cfpkt_priv_data {
+        struct dev_info dev_info;
+        bool erronous;
+};
+inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt)
+{
+        return (struct cfpkt_priv_data *) pkt->skb.cb;
+}
+inline bool is_erronous(struct cfpkt *pkt)
+{
+        return cfpkt_priv(pkt)->erronous;
+}
+inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt)
+{
+        return &pkt->skb;
+}
+inline struct cfpkt *skb_to_pkt(struct sk_buff *skb)
+{
+        return (struct cfpkt *) skb;
+}
+struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt)
+{
+        struct cfpkt *pkt = skb_to_pkt(nativepkt);
+        cfpkt_priv(pkt)->erronous = false;
+        return pkt;
+}
+EXPORT_SYMBOL(cfpkt_fromnative);
+void *cfpkt_tonative(struct cfpkt *pkt)
+{
+        return (void *) pkt;
+}
+EXPORT_SYMBOL(cfpkt_tonative);
+static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx)
+{
+        struct sk_buff *skb;
+        if (likely(in_interrupt()))
+                skb = alloc_skb(len + pfx, GFP_ATOMIC);
+        else
+                skb = alloc_skb(len + pfx, GFP_KERNEL);
+        if (unlikely(skb == NULL))
+                return NULL;
+        skb_reserve(skb, pfx);
+        return skb_to_pkt(skb);
+}
+inline struct cfpkt *cfpkt_create(u16 len)
+{
+        return cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
+}
+EXPORT_SYMBOL(cfpkt_create);
+void cfpkt_destroy(struct cfpkt *pkt)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        kfree_skb(skb);
+}
+EXPORT_SYMBOL(cfpkt_destroy);
+inline bool cfpkt_more(struct cfpkt *pkt)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        return skb->len > 0;
+}
+EXPORT_SYMBOL(cfpkt_more);
+int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        if (skb_headlen(skb) >= len) {
+                memcpy(data, skb->data, len);
+                return 0;
+        }
+        return !cfpkt_extr_head(pkt, data, len) &&
+            !cfpkt_add_head(pkt, data, len);
+}
+EXPORT_SYMBOL(cfpkt_peek_head);
+int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        u8 *from;
+        if (unlikely(is_erronous(pkt)))
+                return -EPROTO;
+        if (unlikely(len > skb->len)) {
+                PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n");
+                return -EPROTO;
+        }
+        if (unlikely(len > skb_headlen(skb))) {
+                if (unlikely(skb_linearize(skb) != 0)) {
+                        PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n");
+                        return -EPROTO;
+                }
+        }
+        from = skb_pull(skb, len);
+        from -= len;
+        memcpy(data, from, len);
+        return 0;
+}
+EXPORT_SYMBOL(cfpkt_extr_head);
+int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        u8 *data = dta;
+        u8 *from;
+        if (unlikely(is_erronous(pkt)))
+                return -EPROTO;
+        if (unlikely(skb_linearize(skb) != 0)) {
+                PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n");
+                return -EPROTO;
+        }
+        if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
+                PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n");
+                return -EPROTO;
+        }
+        from = skb_tail_pointer(skb) - len;
+        skb_trim(skb, skb->len - len);
+        memcpy(data, from, len);
+        return 0;
+}
+EXPORT_SYMBOL(cfpkt_extr_trail);
+int cfpkt_pad_trail(struct cfpkt *pkt, u16 len)
+{
+        return cfpkt_add_body(pkt, NULL, len);
+}
+EXPORT_SYMBOL(cfpkt_pad_trail);
+int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        struct sk_buff *lastskb;
+        u8 *to;
+        u16 addlen = 0;
+        if (unlikely(is_erronous(pkt)))
+                return -EPROTO;
+        lastskb = skb;
+        /* Check whether we need to add space at the tail */
+        if (unlikely(skb_tailroom(skb) < len)) {
+                if (likely(len < PKT_LEN_WHEN_EXTENDING))
+                        addlen = PKT_LEN_WHEN_EXTENDING;
+                else
+                        addlen = len;
+        }
+        /* Check whether we need to change the SKB before writing to the tail */
+        if (unlikely((addlen > 0) || skb_cloned(skb) || skb_shared(skb))) {
+                /* Make sure data is writable */
+                if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
+                        PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n");
+                        return -EPROTO;
+                }
+                /*
+                 * Is the SKB non-linear after skb_cow_data()? If so, we are
+                 * going to add data to the last SKB, so we need to adjust
+                 * lengths of the top SKB.
+                 */
+                if (lastskb != skb) {
+                        pr_warning("CAIF: %s(): Packet is non-linear\n",
+                                   __func__);
+                        skb->len += len;
+                        skb->data_len += len;
+                }
+        }
+        /* All set to put the last SKB and optionally write data there. */
+        to = skb_put(lastskb, len);
+        if (likely(data))
+                memcpy(to, data, len);
+        return 0;
+}
+EXPORT_SYMBOL(cfpkt_add_body);
+inline int cfpkt_addbdy(struct cfpkt *pkt, u8 data)
+{
+        return cfpkt_add_body(pkt, &data, 1);
+}
+EXPORT_SYMBOL(cfpkt_addbdy);
+int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        struct sk_buff *lastskb;
+        u8 *to;
+        const u8 *data = data2;
+        if (unlikely(is_erronous(pkt)))
+                return -EPROTO;
+        if (unlikely(skb_headroom(skb) < len)) {
+                PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n");
+                return -EPROTO;
+        }
+        /* Make sure data is writable */
+        if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
+                PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n");
+                return -EPROTO;
+        }
+        to = skb_push(skb, len);
+        memcpy(to, data, len);
+        return 0;
+}
+EXPORT_SYMBOL(cfpkt_add_head);
+inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len)
+{
+        return cfpkt_add_body(pkt, data, len);
+}
+EXPORT_SYMBOL(cfpkt_add_trail);
+inline u16 cfpkt_getlen(struct cfpkt *pkt)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        return skb->len;
+}
+EXPORT_SYMBOL(cfpkt_getlen);
+inline u16 cfpkt_iterate(struct cfpkt *pkt,
+                            u16 (*iter_func)(u16, void *, u16),
+                            u16 data)
+{
+        /*
+         * Don't care about the performance hit of linearizing,
+         * Checksum should not be used on high-speed interfaces anyway.
+         */
+        if (unlikely(is_erronous(pkt)))
+                return -EPROTO;
+        if (unlikely(skb_linearize(&pkt->skb) != 0)) {
+                PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n");
+                return -EPROTO;
+        }
+        return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
+}
+EXPORT_SYMBOL(cfpkt_iterate);
+int cfpkt_setlen(struct cfpkt *pkt, u16 len)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        if (unlikely(is_erronous(pkt)))
+                return -EPROTO;
+        if (likely(len <= skb->len)) {
+                if (unlikely(skb->data_len))
+                        ___pskb_trim(skb, len);
+                else
+                        skb_trim(skb, len);
+                        return cfpkt_getlen(pkt);
+        }
+        /* Need to expand SKB */
+        if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
+                PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n");
+        return cfpkt_getlen(pkt);
+}
+EXPORT_SYMBOL(cfpkt_setlen);
+struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len)
+{
+        struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
+        if (unlikely(data != NULL))
+                cfpkt_add_body(pkt, data, len);
+        return pkt;
+}
+EXPORT_SYMBOL(cfpkt_create_uplink);
+struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
+                             struct cfpkt *addpkt,
+                             u16 expectlen)
+{
+        struct sk_buff *dst = pkt_to_skb(dstpkt);
+        struct sk_buff *add = pkt_to_skb(addpkt);
+        u16 addlen = skb_headlen(add);
+        u16 neededtailspace;
+        struct sk_buff *tmp;
+        u16 dstlen;
+        u16 createlen;
+        if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
+                cfpkt_destroy(addpkt);
+                return dstpkt;
+        }
+        if (expectlen > addlen)
+                neededtailspace = expectlen;
+        else
+                neededtailspace = addlen;
+        if (dst->tail + neededtailspace > dst->end) {
+                /* Create a dumplicate of 'dst' with more tail space */
+                dstlen = skb_headlen(dst);
+                createlen = dstlen + neededtailspace;
+                tmp = pkt_to_skb(
+                        cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX));
+                if (!tmp)
+                        return NULL;
+                skb_set_tail_pointer(tmp, dstlen);
+                tmp->len = dstlen;
+                memcpy(tmp->data, dst->data, dstlen);
+                cfpkt_destroy(dstpkt);
+                dst = tmp;
+        }
+        memcpy(skb_tail_pointer(dst), add->data, skb_headlen(add));
+        cfpkt_destroy(addpkt);
+        dst->tail += addlen;
+        dst->len += addlen;
+        return skb_to_pkt(dst);
+}
+EXPORT_SYMBOL(cfpkt_append);
+struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
+{
+        struct sk_buff *skb2;
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        u8 *split = skb->data + pos;
+        u16 len2nd = skb_tail_pointer(skb) - split;
+        if (unlikely(is_erronous(pkt)))
+                return NULL;
+        if (skb->data + pos > skb_tail_pointer(skb)) {
+                PKT_ERROR(pkt,
+                          "cfpkt_split: trying to split beyond end of packet");
+                return NULL;
+        }
+        /* Create a new packet for the second part of the data */
+        skb2 = pkt_to_skb(
+                cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
+                                 PKT_PREFIX));
+        if (skb2 == NULL)
+                return NULL;
+        /* Reduce the length of the original packet */
+        skb_set_tail_pointer(skb, pos);
+        skb->len = pos;
+        memcpy(skb2->data, split, len2nd);
+        skb2->tail += len2nd;
+        skb2->len += len2nd;
+        return skb_to_pkt(skb2);
+}
+EXPORT_SYMBOL(cfpkt_split);
+char *cfpkt_log_pkt(struct cfpkt *pkt, char *buf, int buflen)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        char *p = buf;
+        int i;
+        /*
+         * Sanity check buffer length, it needs to be at least as large as
+         * the header info: ~=50+ bytes
+         */
+        if (buflen < 50)
+                return NULL;
+        snprintf(buf, buflen, "%s: pkt:%p len:%ld(%ld+%ld) {%ld,%ld} data: [",
+                is_erronous(pkt) ? "ERRONOUS-SKB" :
+                 (skb->data_len != 0 ? "COMPLEX-SKB" : "SKB"),
+                 skb,
+                 (long) skb->len,
+                 (long) (skb_tail_pointer(skb) - skb->data),
+                 (long) skb->data_len,
+                 (long) (skb->data - skb->head),
+                 (long) (skb_tail_pointer(skb) - skb->head));
+        p = buf + strlen(buf);
+        for (i = 0; i < skb_tail_pointer(skb) - skb->data && i < 300; i++) {
+                if (p > buf + buflen - 10) {
+                        sprintf(p, "...");
+                        p = buf + strlen(buf);
+                        break;
+                }
+                sprintf(p, "%02x,", skb->data[i]);
+                p = buf + strlen(buf);
+        }
+        sprintf(p, "]\n");
+        return buf;
+}
+EXPORT_SYMBOL(cfpkt_log_pkt);
+int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        struct sk_buff *lastskb;
+        caif_assert(buf != NULL);
+        if (unlikely(is_erronous(pkt)))
+                return -EPROTO;
+        /* Make sure SKB is writable */
+        if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
+                PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n");
+                return -EPROTO;
+        }
+        if (unlikely(skb_linearize(skb) != 0)) {
+                PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n");
+                return -EPROTO;
+        }
+        if (unlikely(skb_tailroom(skb) < buflen)) {
+                PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n");
+                return -EPROTO;
+        }
+        *buf = skb_put(skb, buflen);
+        return 1;
+}
+EXPORT_SYMBOL(cfpkt_raw_append);
+int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
+{
+        struct sk_buff *skb = pkt_to_skb(pkt);
+        caif_assert(buf != NULL);
+        if (unlikely(is_erronous(pkt)))
+                return -EPROTO;
+        if (unlikely(buflen > skb->len)) {
+                PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large "
+                                "- failed\n");
+                return -EPROTO;
+        }
+        if (unlikely(buflen > skb_headlen(skb))) {
+                if (unlikely(skb_linearize(skb) != 0)) {
+                        PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n");
+                        return -EPROTO;
+                }
+        }
+        *buf = skb->data;
+        skb_pull(skb, buflen);
+        return 1;
+}
+EXPORT_SYMBOL(cfpkt_raw_extract);
+inline bool cfpkt_erroneous(struct cfpkt *pkt)
+{
+        return cfpkt_priv(pkt)->erronous;
+}
+EXPORT_SYMBOL(cfpkt_erroneous);
+struct cfpktq *cfpktq_create(void)
+{
+        struct cfpktq *q = kmalloc(sizeof(struct cfpktq), GFP_ATOMIC);
+        if (!q)
+                return NULL;
+        skb_queue_head_init(&q->head);
+        atomic_set(&q->count, 0);
+        spin_lock_init(&q->lock);
+        return q;
+}
+EXPORT_SYMBOL(cfpktq_create);
+void cfpkt_queue(struct cfpktq *pktq, struct cfpkt *pkt, unsigned short prio)
+{
+        atomic_inc(&pktq->count);
+        spin_lock(&pktq->lock);
+        skb_queue_tail(&pktq->head, pkt_to_skb(pkt));
+        spin_unlock(&pktq->lock);
+}
+EXPORT_SYMBOL(cfpkt_queue);
+struct cfpkt *cfpkt_qpeek(struct cfpktq *pktq)
+{
+        struct cfpkt *tmp;
+        spin_lock(&pktq->lock);
+        tmp = skb_to_pkt(skb_peek(&pktq->head));
+        spin_unlock(&pktq->lock);
+        return tmp;
+}
+EXPORT_SYMBOL(cfpkt_qpeek);
+struct cfpkt *cfpkt_dequeue(struct cfpktq *pktq)
+{
+        struct cfpkt *pkt;
+        spin_lock(&pktq->lock);
+        pkt = skb_to_pkt(skb_dequeue(&pktq->head));
+        if (pkt) {
+                atomic_dec(&pktq->count);
+                caif_assert(atomic_read(&pktq->count) >= 0);
+        }
+        spin_unlock(&pktq->lock);
+        return pkt;
+}
+EXPORT_SYMBOL(cfpkt_dequeue);
+int cfpkt_qcount(struct cfpktq *pktq)
+{
+        return atomic_read(&pktq->count);
+}
+EXPORT_SYMBOL(cfpkt_qcount);
+struct cfpkt *cfpkt_clone_release(struct cfpkt *pkt)
+{
+        struct cfpkt *clone;
+        clone  = skb_to_pkt(skb_clone(pkt_to_skb(pkt), GFP_ATOMIC));
+        /* Free original packet. */
+        cfpkt_destroy(pkt);
+        if (!clone)
+                return NULL;
+        return clone;
+}
+EXPORT_SYMBOL(cfpkt_clone_release);
+struct caif_payload_info *cfpkt_info(struct cfpkt *pkt)
+{
+        return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb;
+}
+EXPORT_SYMBOL(cfpkt_info);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
new file mode 100644
index 000000000000..cd2830fec935
--- /dev/null
+++ b/net/caif/cfrfml.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
+#define RFM_SEGMENTATION_BIT 0x01
+#define RFM_PAYLOAD  0x00
+#define RFM_CMD_BIT  0x80
+#define RFM_FLOW_OFF 0x81
+#define RFM_FLOW_ON  0x80
+#define RFM_SET_PIN  0x82
+#define RFM_CTRL_PKT_SIZE 1
+static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl);
+struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
+{
+        struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+        if (!rfm) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        caif_assert(offsetof(struct cfsrvl, layer) == 0);
+        memset(rfm, 0, sizeof(struct cfsrvl));
+        cfsrvl_init(rfm, channel_id, dev_info);
+        rfm->layer.modemcmd = cfservl_modemcmd;
+        rfm->layer.receive = cfrfml_receive;
+        rfm->layer.transmit = cfrfml_transmit;
+        snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
+        return &rfm->layer;
+}
+static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
+{
+       return -EPROTO;
+}
+static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u8 tmp;
+        bool segmented;
+        int ret;
+        caif_assert(layr->up != NULL);
+        caif_assert(layr->receive != NULL);
+        /*
+         * RFM is taking care of segmentation and stripping of
+         * segmentation bit.
+         */
+        if (cfpkt_extr_head(pkt, &tmp, 1) < 0) {
+                pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                cfpkt_destroy(pkt);
+                return -EPROTO;
+        }
+        segmented = tmp & RFM_SEGMENTATION_BIT;
+        caif_assert(!segmented);
+        ret = layr->up->receive(layr->up, pkt);
+        return ret;
+}
+static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u8 tmp = 0;
+        int ret;
+        struct cfsrvl *service = container_obj(layr);
+        caif_assert(layr->dn != NULL);
+        caif_assert(layr->dn->transmit != NULL);
+        if (!cfsrvl_ready(service, &ret))
+                return ret;
+        if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+                pr_err("CAIF: %s():Packet too large - size=%d\n",
+                        __func__, cfpkt_getlen(pkt));
+                return -EOVERFLOW;
+        }
+        if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
+                pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                return -EPROTO;
+        }
+        /* Add info for MUX-layer to route the packet out. */
+        cfpkt_info(pkt)->channel_id = service->layer.id;
+        /*
+         * To optimize alignment, we add up the size of CAIF header before
+         * payload.
+         */
+        cfpkt_info(pkt)->hdr_len = 1;
+        cfpkt_info(pkt)->dev_info = &service->dev_info;
+        ret = layr->dn->transmit(layr->dn, pkt);
+        if (ret < 0)
+                cfpkt_extr_head(pkt, &tmp, 1);
+        return ret;
+}
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
new file mode 100644
index 000000000000..06029ea2da2f
--- /dev/null
+++ b/net/caif/cfserl.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfserl.h>
+#define container_obj(layr) ((struct cfserl *) layr)
+#define CFSERL_STX 0x02
+#define CAIF_MINIUM_PACKET_SIZE 4
+struct cfserl {
+        struct cflayer layer;
+        struct cfpkt *incomplete_frm;
+        /* Protects parallel processing of incoming packets */
+        spinlock_t sync;
+        bool usestx;
+};
+#define STXLEN(layr) (layr->usestx ? 1 : 0)
+static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+                                int phyid);
+struct cflayer *cfserl_create(int type, int instance, bool use_stx)
+{
+        struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
+        if (!this) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        caif_assert(offsetof(struct cfserl, layer) == 0);
+        memset(this, 0, sizeof(struct cfserl));
+        this->layer.receive = cfserl_receive;
+        this->layer.transmit = cfserl_transmit;
+        this->layer.ctrlcmd = cfserl_ctrlcmd;
+        this->layer.type = type;
+        this->usestx = use_stx;
+        spin_lock_init(&this->sync);
+        snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "ser1");
+        return &this->layer;
+}
+static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
+{
+        struct cfserl *layr = container_obj(l);
+        u16 pkt_len;
+        struct cfpkt *pkt = NULL;
+        struct cfpkt *tail_pkt = NULL;
+        u8 tmp8;
+        u16 tmp;
+        u8 stx = CFSERL_STX;
+        int ret;
+        u16 expectlen = 0;
+        caif_assert(newpkt != NULL);
+        spin_lock(&layr->sync);
+        if (layr->incomplete_frm != NULL) {
+                layr->incomplete_frm =
+                    cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
+                pkt = layr->incomplete_frm;
+        } else {
+                pkt = newpkt;
+        }
+        layr->incomplete_frm = NULL;
+        do {
+                /* Search for STX at start of pkt if STX is used */
+                if (layr->usestx) {
+                        cfpkt_extr_head(pkt, &tmp8, 1);
+                        if (tmp8 != CFSERL_STX) {
+                                while (cfpkt_more(pkt)
+                                       && tmp8 != CFSERL_STX) {
+                                        cfpkt_extr_head(pkt, &tmp8, 1);
+                                }
+                                if (!cfpkt_more(pkt)) {
+                                        cfpkt_destroy(pkt);
+                                        layr->incomplete_frm = NULL;
+                                        spin_unlock(&layr->sync);
+                                        return -EPROTO;
+                                }
+                        }
+                }
+                pkt_len = cfpkt_getlen(pkt);
+                /*
+                 *  pkt_len is the accumulated length of the packet data
+                 *  we have received so far.
+                 *  Exit if frame doesn't hold length.
+                 */
+                if (pkt_len < 2) {
+                        if (layr->usestx)
+                                cfpkt_add_head(pkt, &stx, 1);
+                        layr->incomplete_frm = pkt;
+                        spin_unlock(&layr->sync);
+                        return 0;
+                }
+                /*
+                 *  Find length of frame.
+                 *  expectlen is the length we need for a full frame.
+                 */
+                cfpkt_peek_head(pkt, &tmp, 2);
+                expectlen = le16_to_cpu(tmp) + 2;
+                /*
+                 * Frame error handling
+                 */
+                if (expectlen < CAIF_MINIUM_PACKET_SIZE
+                    || expectlen > CAIF_MAX_FRAMESIZE) {
+                        if (!layr->usestx) {
+                                if (pkt != NULL)
+                                        cfpkt_destroy(pkt);
+                                layr->incomplete_frm = NULL;
+                                expectlen = 0;
+                                spin_unlock(&layr->sync);
+                                return -EPROTO;
+                        }
+                        continue;
+                }
+                if (pkt_len < expectlen) {
+                        /* Too little received data */
+                        if (layr->usestx)
+                                cfpkt_add_head(pkt, &stx, 1);
+                        layr->incomplete_frm = pkt;
+                        spin_unlock(&layr->sync);
+                        return 0;
+                }
+                /*
+                 * Enough data for at least one frame.
+                 * Split the frame, if too long
+                 */
+                if (pkt_len > expectlen)
+                        tail_pkt = cfpkt_split(pkt, expectlen);
+                else
+                        tail_pkt = NULL;
+                /* Send the first part of packet upwards.*/
+                spin_unlock(&layr->sync);
+                ret = layr->layer.up->receive(layr->layer.up, pkt);
+                spin_lock(&layr->sync);
+                if (ret == -EILSEQ) {
+                        if (layr->usestx) {
+                                if (tail_pkt != NULL)
+                                        pkt = cfpkt_append(pkt, tail_pkt, 0);
+                                /* Start search for next STX if frame failed */
+                                continue;
+                        } else {
+                                cfpkt_destroy(pkt);
+                                pkt = NULL;
+                        }
+                }
+                pkt = tail_pkt;
+        } while (pkt != NULL);
+        spin_unlock(&layr->sync);
+        return 0;
+}
+static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
+{
+        struct cfserl *layr = container_obj(layer);
+        int ret;
+        u8 tmp8 = CFSERL_STX;
+        if (layr->usestx)
+                cfpkt_add_head(newpkt, &tmp8, 1);
+        ret = layer->dn->transmit(layer->dn, newpkt);
+        if (ret < 0)
+                cfpkt_extr_head(newpkt, &tmp8, 1);
+        return ret;
+}
+static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+                                int phyid)
+{
+        layr->up->ctrlcmd(layr->up, ctrl, phyid);
+}
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
new file mode 100644
index 000000000000..d470c51c6431
--- /dev/null
+++ b/net/caif/cfsrvl.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+#define SRVL_CTRL_PKT_SIZE 1
+#define SRVL_FLOW_OFF 0x81
+#define SRVL_FLOW_ON  0x80
+#define SRVL_SET_PIN  0x82
+#define SRVL_CTRL_PKT_SIZE 1
+#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
+static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+                                int phyid)
+{
+        struct cfsrvl *service = container_obj(layr);
+        caif_assert(layr->up != NULL);
+        caif_assert(layr->up->ctrlcmd != NULL);
+        switch (ctrl) {
+        case CAIF_CTRLCMD_INIT_RSP:
+                service->open = true;
+                layr->up->ctrlcmd(layr->up, ctrl, phyid);
+                break;
+        case CAIF_CTRLCMD_DEINIT_RSP:
+        case CAIF_CTRLCMD_INIT_FAIL_RSP:
+                service->open = false;
+                layr->up->ctrlcmd(layr->up, ctrl, phyid);
+                break;
+        case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
+                if (phyid != service->dev_info.id)
+                        break;
+                if (service->modem_flow_on)
+                        layr->up->ctrlcmd(layr->up,
+                                          CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
+                service->phy_flow_on = false;
+                break;
+        case _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND:
+                if (phyid != service->dev_info.id)
+                        return;
+                if (service->modem_flow_on) {
+                        layr->up->ctrlcmd(layr->up,
+                                           CAIF_CTRLCMD_FLOW_ON_IND,
+                                           phyid);
+                }
+                service->phy_flow_on = true;
+                break;
+        case CAIF_CTRLCMD_FLOW_OFF_IND:
+                if (service->phy_flow_on) {
+                        layr->up->ctrlcmd(layr->up,
+                                          CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
+                }
+                service->modem_flow_on = false;
+                break;
+        case CAIF_CTRLCMD_FLOW_ON_IND:
+                if (service->phy_flow_on) {
+                        layr->up->ctrlcmd(layr->up,
+                                          CAIF_CTRLCMD_FLOW_ON_IND, phyid);
+                }
+                service->modem_flow_on = true;
+                break;
+        case _CAIF_CTRLCMD_PHYIF_DOWN_IND:
+                /* In case interface is down, let's fake a remove shutdown */
+                layr->up->ctrlcmd(layr->up,
+                                CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, phyid);
+                break;
+        case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+                layr->up->ctrlcmd(layr->up, ctrl, phyid);
+                break;
+        default:
+                pr_warning("CAIF: %s(): "
+                           "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
+                /* We have both modem and phy flow on, send flow on */
+                layr->up->ctrlcmd(layr->up, ctrl, phyid);
+                service->phy_flow_on = true;
+                break;
+        }
+}
+static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
+{
+        struct cfsrvl *service = container_obj(layr);
+        caif_assert(layr != NULL);
+        caif_assert(layr->dn != NULL);
+        caif_assert(layr->dn->transmit != NULL);
+        switch (ctrl) {
+        case CAIF_MODEMCMD_FLOW_ON_REQ:
+                {
+                        struct cfpkt *pkt;
+                        struct caif_payload_info *info;
+                        u8 flow_on = SRVL_FLOW_ON;
+                        pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
+                        if (!pkt) {
+                                pr_warning("CAIF: %s(): Out of memory\n",
+                                        __func__);
+                                return -ENOMEM;
+                        }
+                        if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
+                                pr_err("CAIF: %s(): Packet is erroneous!\n",
+                                        __func__);
+                                cfpkt_destroy(pkt);
+                                return -EPROTO;
+                        }
+                        info = cfpkt_info(pkt);
+                        info->channel_id = service->layer.id;
+                        info->hdr_len = 1;
+                        info->dev_info = &service->dev_info;
+                        return layr->dn->transmit(layr->dn, pkt);
+                }
+        case CAIF_MODEMCMD_FLOW_OFF_REQ:
+                {
+                        struct cfpkt *pkt;
+                        struct caif_payload_info *info;
+                        u8 flow_off = SRVL_FLOW_OFF;
+                        pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
+                        if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
+                                pr_err("CAIF: %s(): Packet is erroneous!\n",
+                                        __func__);
+                                cfpkt_destroy(pkt);
+                                return -EPROTO;
+                        }
+                        info = cfpkt_info(pkt);
+                        info->channel_id = service->layer.id;
+                        info->hdr_len = 1;
+                        info->dev_info = &service->dev_info;
+                        return layr->dn->transmit(layr->dn, pkt);
+                }
+        default:
+          break;
+        }
+        return -EINVAL;
+}
+void cfservl_destroy(struct cflayer *layer)
+{
+        kfree(layer);
+}
+void cfsrvl_init(struct cfsrvl *service,
+                 u8 channel_id,
+                 struct dev_info *dev_info)
+{
+        caif_assert(offsetof(struct cfsrvl, layer) == 0);
+        service->open = false;
+        service->modem_flow_on = true;
+        service->phy_flow_on = true;
+        service->layer.id = channel_id;
+        service->layer.ctrlcmd = cfservl_ctrlcmd;
+        service->layer.modemcmd = cfservl_modemcmd;
+        service->dev_info = *dev_info;
+}
+bool cfsrvl_ready(struct cfsrvl *service, int *err)
+{
+        if (service->open && service->modem_flow_on && service->phy_flow_on)
+                return true;
+        if (!service->open) {
+                *err = -ENOTCONN;
+                return false;
+        }
+        caif_assert(!(service->modem_flow_on && service->phy_flow_on));
+        *err = -EAGAIN;
+        return false;
+}
+u8 cfsrvl_getphyid(struct cflayer *layer)
+{
+        struct cfsrvl *servl = container_obj(layer);
+        return servl->dev_info.id;
+}
+bool cfsrvl_phyid_match(struct cflayer *layer, int phyid)
+{
+        struct cfsrvl *servl = container_obj(layer);
+        return servl->dev_info.id == phyid;
+}
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
new file mode 100644
index 000000000000..5fd2c9ea8b42
--- /dev/null
+++ b/net/caif/cfutill.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+#define container_obj(layr) ((struct cfsrvl *) layr)
+#define UTIL_PAYLOAD  0x00
+#define UTIL_CMD_BIT  0x80
+#define UTIL_REMOTE_SHUTDOWN 0x82
+#define UTIL_FLOW_OFF 0x81
+#define UTIL_FLOW_ON  0x80
+#define UTIL_CTRL_PKT_SIZE 1
+static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
+struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
+{
+        struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+        if (!util) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        caif_assert(offsetof(struct cfsrvl, layer) == 0);
+        memset(util, 0, sizeof(struct cfsrvl));
+        cfsrvl_init(util, channel_id, dev_info);
+        util->layer.receive = cfutill_receive;
+        util->layer.transmit = cfutill_transmit;
+        snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
+        return &util->layer;
+}
+static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u8 cmd = -1;
+        struct cfsrvl *service = container_obj(layr);
+        caif_assert(layr != NULL);
+        caif_assert(layr->up != NULL);
+        caif_assert(layr->up->receive != NULL);
+        caif_assert(layr->up->ctrlcmd != NULL);
+        if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
+                pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                cfpkt_destroy(pkt);
+                return -EPROTO;
+        }
+        switch (cmd) {
+        case UTIL_PAYLOAD:
+                return layr->up->receive(layr->up, pkt);
+        case UTIL_FLOW_OFF:
+                layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
+                cfpkt_destroy(pkt);
+                return 0;
+        case UTIL_FLOW_ON:
+                layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
+                cfpkt_destroy(pkt);
+                return 0;
+        case UTIL_REMOTE_SHUTDOWN:      /* Remote Shutdown Request */
+                pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n",
+                        __func__);
+                layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
+                service->open = false;
+                cfpkt_destroy(pkt);
+                return 0;
+        default:
+                cfpkt_destroy(pkt);
+                pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n",
+                           __func__, cmd, cmd);
+                return -EPROTO;
+        }
+}
+static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u8 zero = 0;
+        struct caif_payload_info *info;
+        int ret;
+        struct cfsrvl *service = container_obj(layr);
+        caif_assert(layr != NULL);
+        caif_assert(layr->dn != NULL);
+        caif_assert(layr->dn->transmit != NULL);
+        if (!cfsrvl_ready(service, &ret))
+                return ret;
+        if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+                pr_err("CAIF: %s(): packet too large size=%d\n",
+                        __func__, cfpkt_getlen(pkt));
+                return -EOVERFLOW;
+        }
+        cfpkt_add_head(pkt, &zero, 1);
+        /* Add info for MUX-layer to route the packet out. */
+        info = cfpkt_info(pkt);
+        info->channel_id = service->layer.id;
+        /*
+         * To optimize alignment, we add up the size of CAIF header before
+         * payload.
+         */
+        info->hdr_len = 1;
+        info->dev_info = &service->dev_info;
+        ret = layr->dn->transmit(layr->dn, pkt);
+        if (ret < 0) {
+                u32 tmp32;
+                cfpkt_extr_head(pkt, &tmp32, 4);
+        }
+        return ret;
+}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
new file mode 100644
index 000000000000..0fd827f49491
--- /dev/null
+++ b/net/caif/cfveil.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+#define VEI_PAYLOAD  0x00
+#define VEI_CMD_BIT  0x80
+#define VEI_FLOW_OFF 0x81
+#define VEI_FLOW_ON  0x80
+#define VEI_SET_PIN  0x82
+#define VEI_CTRL_PKT_SIZE 1
+#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
+static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt);
+struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
+{
+        struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+        if (!vei) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        caif_assert(offsetof(struct cfsrvl, layer) == 0);
+        memset(vei, 0, sizeof(struct cfsrvl));
+        cfsrvl_init(vei, channel_id, dev_info);
+        vei->layer.receive = cfvei_receive;
+        vei->layer.transmit = cfvei_transmit;
+        snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
+        return &vei->layer;
+}
+static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u8 cmd;
+        int ret;
+        caif_assert(layr->up != NULL);
+        caif_assert(layr->receive != NULL);
+        caif_assert(layr->ctrlcmd != NULL);
+        if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
+                pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                cfpkt_destroy(pkt);
+                return -EPROTO;
+        }
+        switch (cmd) {
+        case VEI_PAYLOAD:
+                ret = layr->up->receive(layr->up, pkt);
+                return ret;
+        case VEI_FLOW_OFF:
+                layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
+                cfpkt_destroy(pkt);
+                return 0;
+        case VEI_FLOW_ON:
+                layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
+                cfpkt_destroy(pkt);
+                return 0;
+        case VEI_SET_PIN:       /* SET RS232 PIN */
+                cfpkt_destroy(pkt);
+                return 0;
+        default:                /* SET RS232 PIN */
+                pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n",
+                           __func__, cmd, cmd);
+                cfpkt_destroy(pkt);
+                return -EPROTO;
+        }
+}
+static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u8 tmp = 0;
+        struct caif_payload_info *info;
+        int ret;
+        struct cfsrvl *service = container_obj(layr);
+        if (!cfsrvl_ready(service, &ret))
+                return ret;
+        caif_assert(layr->dn != NULL);
+        caif_assert(layr->dn->transmit != NULL);
+        if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+                pr_warning("CAIF: %s(): Packet too large - size=%d\n",
+                           __func__, cfpkt_getlen(pkt));
+                return -EOVERFLOW;
+        }
+        if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
+                pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                return -EPROTO;
+        }
+        /* Add info-> for MUX-layer to route the packet out. */
+        info = cfpkt_info(pkt);
+        info->channel_id = service->layer.id;
+        info->hdr_len = 1;
+        info->dev_info = &service->dev_info;
+        ret = layr->dn->transmit(layr->dn, pkt);
+        if (ret < 0)
+                cfpkt_extr_head(pkt, &tmp, 1);
+        return ret;
+}
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
new file mode 100644
index 000000000000..89ad4ea239f1
--- /dev/null
+++ b/net/caif/cfvidl.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:      Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+#define container_obj(layr) ((struct cfsrvl *) layr)
+static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
+{
+        struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+        if (!vid) {
+                pr_warning("CAIF: %s(): Out of memory\n", __func__);
+                return NULL;
+        }
+        caif_assert(offsetof(struct cfsrvl, layer) == 0);
+        memset(vid, 0, sizeof(struct cfsrvl));
+        cfsrvl_init(vid, channel_id, dev_info);
+        vid->layer.receive = cfvidl_receive;
+        vid->layer.transmit = cfvidl_transmit;
+        snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
+        return &vid->layer;
+}
+static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+        u32 videoheader;
+        if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
+                pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+                cfpkt_destroy(pkt);
+                return -EPROTO;
+        }
+        return layr->up->receive(layr->up, pkt);
+}
+static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+        struct cfsrvl *service = container_obj(layr);
+        struct caif_payload_info *info;
+        u32 videoheader = 0;
+        int ret;
+        if (!cfsrvl_ready(service, &ret))
+                return ret;
+        cfpkt_add_head(pkt, &videoheader, 4);
+        /* Add info for MUX-layer to route the packet out */
+        info = cfpkt_info(pkt);
+        info->channel_id = service->layer.id;
+        info->dev_info = &service->dev_info;
+        ret = layr->dn->transmit(layr->dn, pkt);
+        if (ret < 0)
+                cfpkt_extr_head(pkt, &videoheader, 4);
+        return ret;
+}
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
new file mode 100644
index 000000000000..f622ff1d39ba
--- /dev/null
+++ b/net/caif/chnl_net.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Authors:     Sjur Brendeland/sjur.brandeland@stericsson.com
+ *              Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/moduleparam.h>
+#include <linux/ip.h>
+#include <linux/sched.h>
+#include <linux/sockios.h>
+#include <linux/caif/if_caif.h>
+#include <net/rtnetlink.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfcnfg.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/caif_dev.h>
+#define CAIF_CONNECT_TIMEOUT 30
+#define SIZE_MTU 1500
+#define SIZE_MTU_MAX 4080
+#define SIZE_MTU_MIN 68
+#define CAIF_NET_DEFAULT_QUEUE_LEN 500
+#undef pr_debug
+#define pr_debug pr_warning
+/*This list is protected by the rtnl lock. */
+static LIST_HEAD(chnl_net_list);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("caif");
+struct chnl_net {
+        struct cflayer chnl;
+        struct net_device_stats stats;
+        struct caif_connect_request conn_req;
+        struct list_head list_field;
+        struct net_device *netdev;
+        char name[256];
+        wait_queue_head_t netmgmt_wq;
+        /* Flow status to remember and control the transmission. */
+        bool flowenabled;
+        bool pending_close;
+};
+static void robust_list_del(struct list_head *delete_node)
+{
+        struct list_head *list_node;
+        struct list_head *n;
+        ASSERT_RTNL();
+        list_for_each_safe(list_node, n, &chnl_net_list) {
+                if (list_node == delete_node) {
+                        list_del(list_node);
+                        break;
+                }
+        }
+}
+static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
+{
+        struct sk_buff *skb;
+        struct chnl_net *priv  = NULL;
+        int pktlen;
+        int err = 0;
+        priv = container_of(layr, struct chnl_net, chnl);
+        if (!priv)
+                return -EINVAL;
+        /* Get length of CAIF packet. */
+        pktlen = cfpkt_getlen(pkt);
+        skb = (struct sk_buff *) cfpkt_tonative(pkt);
+        /* Pass some minimum information and
+         * send the packet to the net stack.
+         */
+        skb->dev = priv->netdev;
+        skb->protocol = htons(ETH_P_IP);
+        /* If we change the header in loop mode, the checksum is corrupted. */
+        if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
+                skb->ip_summed = CHECKSUM_UNNECESSARY;
+        else
+                skb->ip_summed = CHECKSUM_NONE;
+        /* FIXME: Drivers should call this in tasklet context. */
+        if (in_interrupt())
+                netif_rx(skb);
+        else
+                netif_rx_ni(skb);
+        /* Update statistics. */
+        priv->netdev->stats.rx_packets++;
+        priv->netdev->stats.rx_bytes += pktlen;
+        return err;
+}
+static int delete_device(struct chnl_net *dev)
+{
+        ASSERT_RTNL();
+        if (dev->netdev)
+                unregister_netdevice(dev->netdev);
+        return 0;
+}
+static void close_work(struct work_struct *work)
+{
+        struct chnl_net *dev = NULL;
+        struct list_head *list_node;
+        struct list_head *_tmp;
+        rtnl_lock();
+        list_for_each_safe(list_node, _tmp, &chnl_net_list) {
+                dev = list_entry(list_node, struct chnl_net, list_field);
+                if (!dev->pending_close)
+                        continue;
+                list_del(list_node);
+                delete_device(dev);
+        }
+        rtnl_unlock();
+}
+static DECLARE_WORK(close_worker, close_work);
+static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
+                                int phyid)
+{
+        struct chnl_net *priv;
+        pr_debug("CAIF: %s(): NET flowctrl func called flow: %s.\n",
+                __func__,
+                flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
+                flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
+                flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
+                flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" :
+                flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" :
+                flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ?
+                 "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND");
+        priv = container_of(layr, struct chnl_net, chnl);
+        switch (flow) {
+        case CAIF_CTRLCMD_FLOW_OFF_IND:
+        case CAIF_CTRLCMD_DEINIT_RSP:
+        case CAIF_CTRLCMD_INIT_FAIL_RSP:
+        case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+                priv->flowenabled = false;
+                netif_tx_disable(priv->netdev);
+                pr_warning("CAIF: %s(): done\n", __func__);
+                priv->pending_close = 1;
+                schedule_work(&close_worker);
+                break;
+        case CAIF_CTRLCMD_FLOW_ON_IND:
+        case CAIF_CTRLCMD_INIT_RSP:
+                priv->flowenabled = true;
+                netif_wake_queue(priv->netdev);
+                wake_up_interruptible(&priv->netmgmt_wq);
+                break;
+        default:
+                break;
+        }
+}
+static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+        struct chnl_net *priv;
+        struct cfpkt *pkt = NULL;
+        int len;
+        int result = -1;
+        /* Get our private data. */
+        priv = netdev_priv(dev);
+        if (skb->len > priv->netdev->mtu) {
+                pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__);
+                return -ENOSPC;
+        }
+        if (!priv->flowenabled) {
+                pr_debug("CAIF: %s(): dropping packets flow off\n", __func__);
+                return NETDEV_TX_BUSY;
+        }
+        if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
+                swap(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+        /* Store original SKB length. */
+        len = skb->len;
+        pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb);
+        pr_debug("CAIF: %s(): transmit inst %s %d,%p\n",
+                __func__, dev->name, priv->chnl.dn->id, &priv->chnl.dn);
+        /* Send the packet down the stack. */
+        result = priv->chnl.dn->transmit(priv->chnl.dn, pkt);
+        if (result) {
+                if (result == -EAGAIN)
+                        result = NETDEV_TX_BUSY;
+                return result;
+        }
+        /* Update statistics. */
+        dev->stats.tx_packets++;
+        dev->stats.tx_bytes += len;
+        return NETDEV_TX_OK;
+}
+static int chnl_net_open(struct net_device *dev)
+{
+        struct chnl_net *priv = NULL;
+        int result = -1;
+        ASSERT_RTNL();
+        priv = netdev_priv(dev);
+        pr_debug("CAIF: %s(): dev name: %s\n", __func__, priv->name);
+        if (!priv) {
+                pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__);
+                return -ENODEV;
+        }
+        result = caif_connect_client(&priv->conn_req, &priv->chnl);
+        if (result != 0) {
+                pr_debug("CAIF: %s(): err: "
+                         "Unable to register and open device, Err:%d\n",
+                        __func__,
+                        result);
+                return -ENODEV;
+        }
+        result = wait_event_interruptible(priv->netmgmt_wq, priv->flowenabled);
+        if (result == -ERESTARTSYS) {
+                pr_debug("CAIF: %s(): wait_event_interruptible"
+                         " woken by a signal\n", __func__);
+                return -ERESTARTSYS;
+        } else
+                pr_debug("CAIF: %s(): Flow on recieved\n", __func__);
+        return 0;
+}
+static int chnl_net_stop(struct net_device *dev)
+{
+        struct chnl_net *priv;
+        int result = -1;
+        ASSERT_RTNL();
+        priv = netdev_priv(dev);
+        result = caif_disconnect_client(&priv->chnl);
+        if (result != 0) {
+                pr_debug("CAIF: %s(): chnl_net_stop: err: "
+                         "Unable to STOP device, Err:%d\n",
+                         __func__, result);
+                return -EBUSY;
+        }
+        result = wait_event_interruptible(priv->netmgmt_wq,
+                                          !priv->flowenabled);
+        if (result == -ERESTARTSYS) {
+                pr_debug("CAIF: %s(): wait_event_interruptible woken by"
+                         " signal, signal_pending(current) = %d\n",
+                         __func__,
+                         signal_pending(current));
+        } else {
+                pr_debug("CAIF: %s(): disconnect received\n", __func__);
+        }
+        return 0;
+}
+static int chnl_net_init(struct net_device *dev)
+{
+        struct chnl_net *priv;
+        ASSERT_RTNL();
+        priv = netdev_priv(dev);
+        strncpy(priv->name, dev->name, sizeof(priv->name));
+        return 0;
+}
+static void chnl_net_uninit(struct net_device *dev)
+{
+        struct chnl_net *priv;
+        ASSERT_RTNL();
+        priv = netdev_priv(dev);
+        robust_list_del(&priv->list_field);
+}
+static const struct net_device_ops netdev_ops = {
+        .ndo_open = chnl_net_open,
+        .ndo_stop = chnl_net_stop,
+        .ndo_init = chnl_net_init,
+        .ndo_uninit = chnl_net_uninit,
+        .ndo_start_xmit = chnl_net_start_xmit,
+};
+static void ipcaif_net_setup(struct net_device *dev)
+{
+        struct chnl_net *priv;
+        dev->netdev_ops = &netdev_ops;
+        dev->destructor = free_netdev;
+        dev->flags |= IFF_NOARP;
+        dev->flags |= IFF_POINTOPOINT;
+        dev->needed_headroom = CAIF_NEEDED_HEADROOM;
+        dev->needed_tailroom = CAIF_NEEDED_TAILROOM;
+        dev->mtu = SIZE_MTU;
+        dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
+        priv = netdev_priv(dev);
+        priv->chnl.receive = chnl_recv_cb;
+        priv->chnl.ctrlcmd = chnl_flowctrl_cb;
+        priv->netdev = dev;
+        priv->conn_req.protocol = CAIFPROTO_DATAGRAM;
+        priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
+        priv->conn_req.priority = CAIF_PRIO_LOW;
+        /* Insert illegal value */
+        priv->conn_req.sockaddr.u.dgm.connection_id = -1;
+        priv->flowenabled = false;
+        ASSERT_RTNL();
+        init_waitqueue_head(&priv->netmgmt_wq);
+        list_add(&priv->list_field, &chnl_net_list);
+}
+static int ipcaif_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+        struct chnl_net *priv;
+        u8 loop;
+        priv = netdev_priv(dev);
+        NLA_PUT_U32(skb, IFLA_CAIF_IPV4_CONNID,
+                    priv->conn_req.sockaddr.u.dgm.connection_id);
+        NLA_PUT_U32(skb, IFLA_CAIF_IPV6_CONNID,
+                    priv->conn_req.sockaddr.u.dgm.connection_id);
+        loop = priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP;
+        NLA_PUT_U8(skb, IFLA_CAIF_LOOPBACK, loop);
+        return 0;
+nla_put_failure:
+        return -EMSGSIZE;
+}
+static void caif_netlink_parms(struct nlattr *data[],
+                                struct caif_connect_request *conn_req)
+{
+        if (!data) {
+                pr_warning("CAIF: %s: no params data found\n", __func__);
+                return;
+        }
+        if (data[IFLA_CAIF_IPV4_CONNID])
+                conn_req->sockaddr.u.dgm.connection_id =
+                        nla_get_u32(data[IFLA_CAIF_IPV4_CONNID]);
+        if (data[IFLA_CAIF_IPV6_CONNID])
+                conn_req->sockaddr.u.dgm.connection_id =
+                        nla_get_u32(data[IFLA_CAIF_IPV6_CONNID]);
+        if (data[IFLA_CAIF_LOOPBACK]) {
+                if (nla_get_u8(data[IFLA_CAIF_LOOPBACK]))
+                        conn_req->protocol = CAIFPROTO_DATAGRAM_LOOP;
+                else
+                        conn_req->protocol = CAIFPROTO_DATAGRAM;
+        }
+}
+static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
+                          struct nlattr *tb[], struct nlattr *data[])
+{
+        int ret;
+        struct chnl_net *caifdev;
+        ASSERT_RTNL();
+        caifdev = netdev_priv(dev);
+        caif_netlink_parms(data, &caifdev->conn_req);
+        ret = register_netdevice(dev);
+        if (ret)
+                pr_warning("CAIF: %s(): device rtml registration failed\n",
+                           __func__);
+        return ret;
+}
+static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[],
+                                struct nlattr *data[])
+{
+        struct chnl_net *caifdev;
+        ASSERT_RTNL();
+        caifdev = netdev_priv(dev);
+        caif_netlink_parms(data, &caifdev->conn_req);
+        netdev_state_change(dev);
+        return 0;
+}
+static size_t ipcaif_get_size(const struct net_device *dev)
+{
+        return
+                /* IFLA_CAIF_IPV4_CONNID */
+                nla_total_size(4) +
+                /* IFLA_CAIF_IPV6_CONNID */
+                nla_total_size(4) +
+                /* IFLA_CAIF_LOOPBACK */
+                nla_total_size(2) +
+                0;
+}
+static const struct nla_policy ipcaif_policy[IFLA_CAIF_MAX + 1] = {
+        [IFLA_CAIF_IPV4_CONNID]       = { .type = NLA_U32 },
+        [IFLA_CAIF_IPV6_CONNID]       = { .type = NLA_U32 },
+        [IFLA_CAIF_LOOPBACK]          = { .type = NLA_U8 }
+};
+static struct rtnl_link_ops ipcaif_link_ops __read_mostly = {
+        .kind           = "caif",
+        .priv_size      = sizeof(struct chnl_net),
+        .setup          = ipcaif_net_setup,
+        .maxtype        = IFLA_CAIF_MAX,
+        .policy         = ipcaif_policy,
+        .newlink        = ipcaif_newlink,
+        .changelink     = ipcaif_changelink,
+        .get_size       = ipcaif_get_size,
+        .fill_info      = ipcaif_fill_info,
+};
+static int __init chnl_init_module(void)
+{
+        return rtnl_link_register(&ipcaif_link_ops);
+}
+static void __exit chnl_exit_module(void)
+{
+        struct chnl_net *dev = NULL;
+        struct list_head *list_node;
+        struct list_head *_tmp;
+        rtnl_link_unregister(&ipcaif_link_ops);
+        rtnl_lock();
+        list_for_each_safe(list_node, _tmp, &chnl_net_list) {
+                dev = list_entry(list_node, struct chnl_net, list_field);
+                list_del(list_node);
+                delete_device(dev);
+        }
+        rtnl_unlock();
+}
+module_init(chnl_init_module);
+module_exit(chnl_exit_module);
diff --git a/net/core/Makefile b/net/core/Makefile
index 08791ac3e05a..51c3eec850ef 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y                += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
+obj-y                += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
                        neighbour.o rtnetlink.o utils.o link_watch.o filter.o
 obj-$(CONFIG_XFRM) += flow.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 1c8a0ce473a8..a10a21619ae3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -130,6 +130,7 @@
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <trace/events/napi.h>
+#include <linux/pci.h>
 #include "net-sysfs.h"
@@ -207,6 +208,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
        return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
+static inline void rps_lock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+        spin_lock(&queue->input_pkt_queue.lock);
+#endif
+}
+static inline void rps_unlock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+        spin_unlock(&queue->input_pkt_queue.lock);
+#endif
+}
 /* Device list insertion */
 static int list_netdevice(struct net_device *dev)
 {
@@ -773,14 +788,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
-        struct net_device *dev;
+        struct net_device *dev, *ret = NULL;
-        rtnl_lock();
+        rcu_read_lock();
-        dev = __dev_getfirstbyhwtype(net, type);
+        for_each_netdev_rcu(net, dev)
-        if (dev)
+                if (dev->type == type) {
-                dev_hold(dev);
+                        dev_hold(dev);
-        rtnl_unlock();
+                        ret = dev;
-        return dev;
+                        break;
+                }
+        rcu_read_unlock();
+        return ret;
 }
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
@@ -1085,9 +1103,9 @@ void netdev_state_change(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_state_change);
-void netdev_bonding_change(struct net_device *dev, unsigned long event)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
 {
-        call_netdevice_notifiers(event, dev);
+        return call_netdevice_notifiers(event, dev);
 }
 EXPORT_SYMBOL(netdev_bonding_change);
@@ -1784,18 +1802,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
 * 2. No high memory really exists on this machine.
 */
-static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
        int i;
+        if (!(dev->features & NETIF_F_HIGHDMA)) {
+                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+                        if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+                                return 1;
+        }
-        if (dev->features & NETIF_F_HIGHDMA)
+        if (PCI_DMA_BUS_IS_PHYS) {
-                return 0;
+                struct device *pdev = dev->dev.parent;
-        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-                if (PageHighMem(skb_shinfo(skb)->frags[i].page))
-                        return 1;
+                if (!pdev)
+                        return 0;
+                for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                        dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+                        if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
+                                return 1;
+                }
+        }
 #endif
        return 0;
 }
@@ -1932,7 +1959,7 @@ out_kfree_skb:
        return rc;
 }
-static u32 skb_tx_hashrnd;
+static u32 hashrnd __read_mostly;
 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 {
@@ -1950,7 +1977,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
        else
                hash = skb->protocol;
-        hash = jhash_1word(hash, skb_tx_hashrnd);
+        hash = jhash_1word(hash, hashrnd);
        return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
 }
@@ -1960,10 +1987,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
        if (unlikely(queue_index >= dev->real_num_tx_queues)) {
                if (net_ratelimit()) {
-                        WARN(1, "%s selects TX queue %d, but "
+                        netdev_warn(dev, "selects TX queue %d, but "
                             "real number of TX queues is %d\n",
-                             dev->name, queue_index,
+                             queue_index, dev->real_num_tx_queues);
-                             dev->real_num_tx_queues);
                }
                return 0;
        }
@@ -2176,6 +2202,178 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+#ifdef CONFIG_RPS
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+{
+        struct ipv6hdr *ip6;
+        struct iphdr *ip;
+        struct netdev_rx_queue *rxqueue;
+        struct rps_map *map;
+        int cpu = -1;
+        u8 ip_proto;
+        u32 addr1, addr2, ports, ihl;
+        rcu_read_lock();
+        if (skb_rx_queue_recorded(skb)) {
+                u16 index = skb_get_rx_queue(skb);
+                if (unlikely(index >= dev->num_rx_queues)) {
+                        if (net_ratelimit()) {
+                                netdev_warn(dev, "received packet on queue "
+                                    "%u, but number of RX queues is %u\n",
+                                     index, dev->num_rx_queues);
+                        }
+                        goto done;
+                }
+                rxqueue = dev->_rx + index;
+        } else
+                rxqueue = dev->_rx;
+        if (!rxqueue->rps_map)
+                goto done;
+        if (skb->rxhash)
+                goto got_hash; /* Skip hash computation on packet header */
+        switch (skb->protocol) {
+        case __constant_htons(ETH_P_IP):
+                if (!pskb_may_pull(skb, sizeof(*ip)))
+                        goto done;
+                ip = (struct iphdr *) skb->data;
+                ip_proto = ip->protocol;
+                addr1 = ip->saddr;
+                addr2 = ip->daddr;
+                ihl = ip->ihl;
+                break;
+        case __constant_htons(ETH_P_IPV6):
+                if (!pskb_may_pull(skb, sizeof(*ip6)))
+                        goto done;
+                ip6 = (struct ipv6hdr *) skb->data;
+                ip_proto = ip6->nexthdr;
+                addr1 = ip6->saddr.s6_addr32[3];
+                addr2 = ip6->daddr.s6_addr32[3];
+                ihl = (40 >> 2);
+                break;
+        default:
+                goto done;
+        }
+        ports = 0;
+        switch (ip_proto) {
+        case IPPROTO_TCP:
+        case IPPROTO_UDP:
+        case IPPROTO_DCCP:
+        case IPPROTO_ESP:
+        case IPPROTO_AH:
+        case IPPROTO_SCTP:
+        case IPPROTO_UDPLITE:
+                if (pskb_may_pull(skb, (ihl * 4) + 4))
+                        ports = *((u32 *) (skb->data + (ihl * 4)));
+                break;
+        default:
+                break;
+        }
+        skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+        if (!skb->rxhash)
+                skb->rxhash = 1;
+got_hash:
+        map = rcu_dereference(rxqueue->rps_map);
+        if (map) {
+                u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+                if (cpu_online(tcpu)) {
+                        cpu = tcpu;
+                        goto done;
+                }
+        }
+done:
+        rcu_read_unlock();
+        return cpu;
+}
+/*
+ * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
+ * to be sent to kick remote softirq processing.  There are two masks since
+ * the sending of IPIs must be done with interrupts enabled.  The select field
+ * indicates the current mask that enqueue_backlog uses to schedule IPIs.
+ * select is flipped before net_rps_action is called while still under lock,
+ * net_rps_action then uses the non-selected mask to send the IPIs and clears
+ * it without conflicting with enqueue_backlog operation.
+ */
+struct rps_remote_softirq_cpus {
+        cpumask_t mask[2];
+        int select;
+};
+static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
+/* Called from hardirq (IPI) context */
+static void trigger_softirq(void *data)
+{
+        struct softnet_data *queue = data;
+        __napi_schedule(&queue->backlog);
+        __get_cpu_var(netdev_rx_stat).received_rps++;
+}
+#endif /* CONFIG_SMP */
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+{
+        struct softnet_data *queue;
+        unsigned long flags;
+        queue = &per_cpu(softnet_data, cpu);
+        local_irq_save(flags);
+        __get_cpu_var(netdev_rx_stat).total++;
+        rps_lock(queue);
+        if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+                if (queue->input_pkt_queue.qlen) {
+enqueue:
+                        __skb_queue_tail(&queue->input_pkt_queue, skb);
+                        rps_unlock(queue);
+                        local_irq_restore(flags);
+                        return NET_RX_SUCCESS;
+                }
+                /* Schedule NAPI for backlog device */
+                if (napi_schedule_prep(&queue->backlog)) {
+#ifdef CONFIG_RPS
+                        if (cpu != smp_processor_id()) {
+                                struct rps_remote_softirq_cpus *rcpus =
+                                    &__get_cpu_var(rps_remote_softirq_cpus);
+                                cpu_set(cpu, rcpus->mask[rcpus->select]);
+                                __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+                        } else
+                                __napi_schedule(&queue->backlog);
+#else
+                        __napi_schedule(&queue->backlog);
+#endif
+                }
+                goto enqueue;
+        }
+        rps_unlock(queue);
+        __get_cpu_var(netdev_rx_stat).dropped++;
+        local_irq_restore(flags);
+        kfree_skb(skb);
+        return NET_RX_DROP;
+}
 /**
 *      netif_rx        -       post buffer to the network code
@@ -2194,8 +2392,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 int netif_rx(struct sk_buff *skb)
 {
-        struct softnet_data *queue;
+        int cpu;
-        unsigned long flags;
        /* if netpoll wants it, pretend we never saw it */
        if (netpoll_rx(skb))
@@ -2204,31 +2401,15 @@ int netif_rx(struct sk_buff *skb)
        if (!skb->tstamp.tv64)
                net_timestamp(skb);
-        /*
+#ifdef CONFIG_RPS
-         * The code is rearranged so that the path is the most
+        cpu = get_rps_cpu(skb->dev, skb);
-         * short when CPU is congested, but is still operating.
+        if (cpu < 0)
-         */
+                cpu = smp_processor_id();
-        local_irq_save(flags);
+#else
-        queue = &__get_cpu_var(softnet_data);
+        cpu = smp_processor_id();
+#endif
-        __get_cpu_var(netdev_rx_stat).total++;
-        if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-                if (queue->input_pkt_queue.qlen) {
-enqueue:
-                        __skb_queue_tail(&queue->input_pkt_queue, skb);
-                        local_irq_restore(flags);
-                        return NET_RX_SUCCESS;
-                }
-                napi_schedule(&queue->backlog);
-                goto enqueue;
-        }
-        __get_cpu_var(netdev_rx_stat).dropped++;
-        local_irq_restore(flags);
-        kfree_skb(skb);
+        return enqueue_to_backlog(skb, cpu);
-        return NET_RX_DROP;
 }
 EXPORT_SYMBOL(netif_rx);
@@ -2465,22 +2646,7 @@ void netif_nit_deliver(struct sk_buff *skb)
        rcu_read_unlock();
 }
-/**
+static int __netif_receive_skb(struct sk_buff *skb)
- *      netif_receive_skb - process receive buffer from network
- *      @skb: buffer to process
- *
- *      netif_receive_skb() is the main receive data processing function.
- *      It always succeeds. The buffer may be dropped during processing
- *      for congestion control or by the protocol layers.
- *
- *      This function may only be called from softirq context and interrupts
- *      should be enabled.
- *
- *      Return values (usually ignored):
- *      NET_RX_SUCCESS: no congestion
- *      NET_RX_DROP: packet was dropped
- */
-int netif_receive_skb(struct sk_buff *skb)
 {
        struct packet_type *ptype, *pt_prev;
        struct net_device *orig_dev;
@@ -2591,6 +2757,37 @@ out:
        rcu_read_unlock();
        return ret;
 }
+/**
+ *      netif_receive_skb - process receive buffer from network
+ *      @skb: buffer to process
+ *
+ *      netif_receive_skb() is the main receive data processing function.
+ *      It always succeeds. The buffer may be dropped during processing
+ *      for congestion control or by the protocol layers.
+ *
+ *      This function may only be called from softirq context and interrupts
+ *      should be enabled.
+ *
+ *      Return values (usually ignored):
+ *      NET_RX_SUCCESS: no congestion
+ *      NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+#ifdef CONFIG_RPS
+        int cpu;
+        cpu = get_rps_cpu(skb->dev, skb);
+        if (cpu < 0)
+                return __netif_receive_skb(skb);
+        else
+                return enqueue_to_backlog(skb, cpu);
+#else
+        return __netif_receive_skb(skb);
+#endif
+}
 EXPORT_SYMBOL(netif_receive_skb);
 /* Network device is going away, flush any packets still pending  */
@@ -2600,11 +2797,13 @@ static void flush_backlog(void *arg)
        struct softnet_data *queue = &__get_cpu_var(softnet_data);
        struct sk_buff *skb, *tmp;
+        rps_lock(queue);
        skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
                if (skb->dev == dev) {
                        __skb_unlink(skb, &queue->input_pkt_queue);
                        kfree_skb(skb);
                }
+        rps_unlock(queue);
 }
 static int napi_gro_complete(struct sk_buff *skb)
@@ -2918,15 +3117,18 @@ static int process_backlog(struct napi_struct *napi, int quota)
                struct sk_buff *skb;
                local_irq_disable();
+                rps_lock(queue);
                skb = __skb_dequeue(&queue->input_pkt_queue);
                if (!skb) {
                        __napi_complete(napi);
+                        rps_unlock(queue);
                        local_irq_enable();
                        break;
                }
+                rps_unlock(queue);
                local_irq_enable();
-                netif_receive_skb(skb);
+                __netif_receive_skb(skb);
        } while (++work < quota && jiffies == start_time);
        return work;
@@ -3015,6 +3217,24 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
+#ifdef CONFIG_RPS
+/*
+ * net_rps_action sends any pending IPI's for rps.  This is only called from
+ * softirq and interrupts must be enabled.
+ */
+static void net_rps_action(cpumask_t *mask)
+{
+        int cpu;
+        /* Send pending IPI's to kick RPS processing on remote cpus. */
+        for_each_cpu_mask_nr(cpu, *mask) {
+                struct softnet_data *queue = &per_cpu(softnet_data, cpu);
+                if (cpu_online(cpu))
+                        __smp_call_function_single(cpu, &queue->csd, 0);
+        }
+        cpus_clear(*mask);
+}
+#endif
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3022,6 +3242,10 @@ static void net_rx_action(struct softirq_action *h)
        unsigned long time_limit = jiffies + 2;
        int budget = netdev_budget;
        void *have;
+#ifdef CONFIG_RPS
+        int select;
+        struct rps_remote_softirq_cpus *rcpus;
+#endif
        local_irq_disable();
@@ -3084,7 +3308,17 @@ static void net_rx_action(struct softirq_action *h)
                netpoll_poll_unlock(have);
        }
 out:
+#ifdef CONFIG_RPS
+        rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
+        select = rcpus->select;
+        rcpus->select ^= 1;
+        local_irq_enable();
+        net_rps_action(&rcpus->mask[select]);
+#else
        local_irq_enable();
+#endif
 #ifdef CONFIG_NET_DMA
        /*
@@ -3330,10 +3564,10 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
 {
        struct netif_rx_stats *s = v;
-        seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+        seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
                   s->total, s->dropped, s->time_squeeze, 0,
                   0, 0, 0, 0, /* was fastroute */
-                   s->cpu_collision);
+                   s->cpu_collision, s->received_rps);
        return 0;
 }
@@ -3556,11 +3790,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
        slave->master = master;
-        synchronize_net();
+        if (old) {
+                synchronize_net();
-        if (old)
                dev_put(old);
+        }
        if (master)
                slave->flags |= IFF_SLAVE;
        else
@@ -3737,562 +3970,6 @@ void dev_set_rx_mode(struct net_device *dev)
        netif_addr_unlock_bh(dev);
 }
-/* hw addresses list handling functions */
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-                         int addr_len, unsigned char addr_type)
-{
-        struct netdev_hw_addr *ha;
-        int alloc_size;
-        if (addr_len > MAX_ADDR_LEN)
-                return -EINVAL;
-        list_for_each_entry(ha, &list->list, list) {
-                if (!memcmp(ha->addr, addr, addr_len) &&
-                    ha->type == addr_type) {
-                        ha->refcount++;
-                        return 0;
-                }
-        }
-        alloc_size = sizeof(*ha);
-        if (alloc_size < L1_CACHE_BYTES)
-                alloc_size = L1_CACHE_BYTES;
-        ha = kmalloc(alloc_size, GFP_ATOMIC);
-        if (!ha)
-                return -ENOMEM;
-        memcpy(ha->addr, addr, addr_len);
-        ha->type = addr_type;
-        ha->refcount = 1;
-        ha->synced = false;
-        list_add_tail_rcu(&ha->list, &list->list);
-        list->count++;
-        return 0;
-}
-static void ha_rcu_free(struct rcu_head *head)
-{
-        struct netdev_hw_addr *ha;
-        ha = container_of(head, struct netdev_hw_addr, rcu_head);
-        kfree(ha);
-}
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-                         int addr_len, unsigned char addr_type)
-{
-        struct netdev_hw_addr *ha;
-        list_for_each_entry(ha, &list->list, list) {
-                if (!memcmp(ha->addr, addr, addr_len) &&
-                    (ha->type == addr_type || !addr_type)) {
-                        if (--ha->refcount)
-                                return 0;
-                        list_del_rcu(&ha->list);
-                        call_rcu(&ha->rcu_head, ha_rcu_free);
-                        list->count--;
-                        return 0;
-                }
-        }
-        return -ENOENT;
-}
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-                                  struct netdev_hw_addr_list *from_list,
-                                  int addr_len,
-                                  unsigned char addr_type)
-{
-        int err;
-        struct netdev_hw_addr *ha, *ha2;
-        unsigned char type;
-        list_for_each_entry(ha, &from_list->list, list) {
-                type = addr_type ? addr_type : ha->type;
-                err = __hw_addr_add(to_list, ha->addr, addr_len, type);
-                if (err)
-                        goto unroll;
-        }
-        return 0;
-unroll:
-        list_for_each_entry(ha2, &from_list->list, list) {
-                if (ha2 == ha)
-                        break;
-                type = addr_type ? addr_type : ha2->type;
-                __hw_addr_del(to_list, ha2->addr, addr_len, type);
-        }
-        return err;
-}
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-                                   struct netdev_hw_addr_list *from_list,
-                                   int addr_len,
-                                   unsigned char addr_type)
-{
-        struct netdev_hw_addr *ha;
-        unsigned char type;
-        list_for_each_entry(ha, &from_list->list, list) {
-                type = addr_type ? addr_type : ha->type;
-                __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
-        }
-}
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-                          struct netdev_hw_addr_list *from_list,
-                          int addr_len)
-{
-        int err = 0;
-        struct netdev_hw_addr *ha, *tmp;
-        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-                if (!ha->synced) {
-                        err = __hw_addr_add(to_list, ha->addr,
-                                            addr_len, ha->type);
-                        if (err)
-                                break;
-                        ha->synced = true;
-                        ha->refcount++;
-                } else if (ha->refcount == 1) {
-                        __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
-                        __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
-                }
-        }
-        return err;
-}
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-                             struct netdev_hw_addr_list *from_list,
-                             int addr_len)
-{
-        struct netdev_hw_addr *ha, *tmp;
-        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-                if (ha->synced) {
-                        __hw_addr_del(to_list, ha->addr,
-                                      addr_len, ha->type);
-                        ha->synced = false;
-                        __hw_addr_del(from_list, ha->addr,
-                                      addr_len, ha->type);
-                }
-        }
-}
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
-{
-        struct netdev_hw_addr *ha, *tmp;
-        list_for_each_entry_safe(ha, tmp, &list->list, list) {
-                list_del_rcu(&ha->list);
-                call_rcu(&ha->rcu_head, ha_rcu_free);
-        }
-        list->count = 0;
-}
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
-{
-        INIT_LIST_HEAD(&list->list);
-        list->count = 0;
-}
-/* Device addresses handling functions */
-static void dev_addr_flush(struct net_device *dev)
-{
-        /* rtnl_mutex must be held here */
-        __hw_addr_flush(&dev->dev_addrs);
-        dev->dev_addr = NULL;
-}
-static int dev_addr_init(struct net_device *dev)
-{
-        unsigned char addr[MAX_ADDR_LEN];
-        struct netdev_hw_addr *ha;
-        int err;
-        /* rtnl_mutex must be held here */
-        __hw_addr_init(&dev->dev_addrs);
-        memset(addr, 0, sizeof(addr));
-        err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
-                            NETDEV_HW_ADDR_T_LAN);
-        if (!err) {
-                /*
-                 * Get the first (previously created) address from the list
-                 * and set dev_addr pointer to this location.
-                 */
-                ha = list_first_entry(&dev->dev_addrs.list,
-                                      struct netdev_hw_addr, list);
-                dev->dev_addr = ha->addr;
-        }
-        return err;
-}
-/**
- *      dev_addr_add    - Add a device address
- *      @dev: device
- *      @addr: address to add
- *      @addr_type: address type
- *
- *      Add a device address to the device or increase the reference count if
- *      it already exists.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
-                 unsigned char addr_type)
-{
-        int err;
-        ASSERT_RTNL();
-        err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
-        if (!err)
-                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_addr_add);
-/**
- *      dev_addr_del    - Release a device address.
- *      @dev: device
- *      @addr: address to delete
- *      @addr_type: address type
- *
- *      Release reference to a device address and remove it from the device
- *      if the reference count drops to zero.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
-                 unsigned char addr_type)
-{
-        int err;
-        struct netdev_hw_addr *ha;
-        ASSERT_RTNL();
-        /*
-         * We can not remove the first address from the list because
-         * dev->dev_addr points to that.
-         */
-        ha = list_first_entry(&dev->dev_addrs.list,
-                              struct netdev_hw_addr, list);
-        if (ha->addr == dev->dev_addr && ha->refcount == 1)
-                return -ENOENT;
-        err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
-                            addr_type);
-        if (!err)
-                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_addr_del);
-/**
- *      dev_addr_add_multiple   - Add device addresses from another device
- *      @to_dev: device to which addresses will be added
- *      @from_dev: device from which addresses will be added
- *      @addr_type: address type - 0 means type will be used from from_dev
- *
- *      Add device addresses of the one device to another.
- **
- *      The caller must hold the rtnl_mutex.
- */
-int dev_addr_add_multiple(struct net_device *to_dev,
-                          struct net_device *from_dev,
-                          unsigned char addr_type)
-{
-        int err;
-        ASSERT_RTNL();
-        if (from_dev->addr_len != to_dev->addr_len)
-                return -EINVAL;
-        err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-                                     to_dev->addr_len, addr_type);
-        if (!err)
-                call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_addr_add_multiple);
-/**
- *      dev_addr_del_multiple   - Delete device addresses by another device
- *      @to_dev: device where the addresses will be deleted
- *      @from_dev: device by which addresses the addresses will be deleted
- *      @addr_type: address type - 0 means type will used from from_dev
- *
- *      Deletes addresses in to device by the list of addresses in from device.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_addr_del_multiple(struct net_device *to_dev,
-                          struct net_device *from_dev,
-                          unsigned char addr_type)
-{
-        ASSERT_RTNL();
-        if (from_dev->addr_len != to_dev->addr_len)
-                return -EINVAL;
-        __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-                               to_dev->addr_len, addr_type);
-        call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-        return 0;
-}
-EXPORT_SYMBOL(dev_addr_del_multiple);
-/* multicast addresses handling functions */
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
-                      void *addr, int alen, int glbl)
-{
-        struct dev_addr_list *da;
-        for (; (da = *list) != NULL; list = &da->next) {
-                if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-                    alen == da->da_addrlen) {
-                        if (glbl) {
-                                int old_glbl = da->da_gusers;
-                                da->da_gusers = 0;
-                                if (old_glbl == 0)
-                                        break;
-                        }
-                        if (--da->da_users)
-                                return 0;
-                        *list = da->next;
-                        kfree(da);
-                        (*count)--;
-                        return 0;
-                }
-        }
-        return -ENOENT;
-}
-int __dev_addr_add(struct dev_addr_list **list, int *count,
-                   void *addr, int alen, int glbl)
-{
-        struct dev_addr_list *da;
-        for (da = *list; da != NULL; da = da->next) {
-                if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-                    da->da_addrlen == alen) {
-                        if (glbl) {
-                                int old_glbl = da->da_gusers;
-                                da->da_gusers = 1;
-                                if (old_glbl)
-                                        return 0;
-                        }
-                        da->da_users++;
-                        return 0;
-                }
-        }
-        da = kzalloc(sizeof(*da), GFP_ATOMIC);
-        if (da == NULL)
-                return -ENOMEM;
-        memcpy(da->da_addr, addr, alen);
-        da->da_addrlen = alen;
-        da->da_users = 1;
-        da->da_gusers = glbl ? 1 : 0;
-        da->next = *list;
-        *list = da;
-        (*count)++;
-        return 0;
-}
-/**
- *      dev_unicast_delete      - Release secondary unicast address.
- *      @dev: device
- *      @addr: address to delete
- *
- *      Release reference to a secondary unicast address and remove it
- *      from the device if the reference count drops to zero.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_unicast_delete(struct net_device *dev, void *addr)
-{
-        int err;
-        ASSERT_RTNL();
-        netif_addr_lock_bh(dev);
-        err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
-                            NETDEV_HW_ADDR_T_UNICAST);
-        if (!err)
-                __dev_set_rx_mode(dev);
-        netif_addr_unlock_bh(dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_unicast_delete);
-/**
- *      dev_unicast_add         - add a secondary unicast address
- *      @dev: device
- *      @addr: address to add
- *
- *      Add a secondary unicast address to the device or increase
- *      the reference count if it already exists.
- *
- *      The caller must hold the rtnl_mutex.
- */
-int dev_unicast_add(struct net_device *dev, void *addr)
-{
-        int err;
-        ASSERT_RTNL();
-        netif_addr_lock_bh(dev);
-        err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
-                            NETDEV_HW_ADDR_T_UNICAST);
-        if (!err)
-                __dev_set_rx_mode(dev);
-        netif_addr_unlock_bh(dev);
-        return err;
-}
-EXPORT_SYMBOL(dev_unicast_add);
-int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
-                    struct dev_addr_list **from, int *from_count)
-{
-        struct dev_addr_list *da, *next;
-        int err = 0;
-        da = *from;
-        while (da != NULL) {
-                next = da->next;
-                if (!da->da_synced) {
-                        err = __dev_addr_add(to, to_count,
-                                             da->da_addr, da->da_addrlen, 0);
-                        if (err < 0)
-                                break;
-                        da->da_synced = 1;
-                        da->da_users++;
-                } else if (da->da_users == 1) {
-                        __dev_addr_delete(to, to_count,
-                                          da->da_addr, da->da_addrlen, 0);
-                        __dev_addr_delete(from, from_count,
-                                          da->da_addr, da->da_addrlen, 0);
-                }
-                da = next;
-        }
-        return err;
-}
-EXPORT_SYMBOL_GPL(__dev_addr_sync);
-void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
-                       struct dev_addr_list **from, int *from_count)
-{
-        struct dev_addr_list *da, *next;
-        da = *from;
-        while (da != NULL) {
-                next = da->next;
-                if (da->da_synced) {
-                        __dev_addr_delete(to, to_count,
-                                          da->da_addr, da->da_addrlen, 0);
-                        da->da_synced = 0;
-                        __dev_addr_delete(from, from_count,
-                                          da->da_addr, da->da_addrlen, 0);
-                }
-                da = next;
-        }
-}
-EXPORT_SYMBOL_GPL(__dev_addr_unsync);
-/**
- *      dev_unicast_sync - Synchronize device's unicast list to another device
- *      @to: destination device
- *      @from: source device
- *
- *      Add newly added addresses to the destination device and release
- *      addresses that have no users left. The source device must be
- *      locked by netif_tx_lock_bh.
- *
- *      This function is intended to be called from the dev->set_rx_mode
- *      function of layered software devices.
- */
-int dev_unicast_sync(struct net_device *to, struct net_device *from)
-{
-        int err = 0;
-        if (to->addr_len != from->addr_len)
-                return -EINVAL;
-        netif_addr_lock_bh(to);
-        err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
-        if (!err)
-                __dev_set_rx_mode(to);
-        netif_addr_unlock_bh(to);
-        return err;
-}
-EXPORT_SYMBOL(dev_unicast_sync);
-/**
- *      dev_unicast_unsync - Remove synchronized addresses from the destination device
- *      @to: destination device
- *      @from: source device
- *
- *      Remove all addresses that were added to the destination device by
- *      dev_unicast_sync(). This function is intended to be called from the
- *      dev->stop function of layered software devices.
- */
-void dev_unicast_unsync(struct net_device *to, struct net_device *from)
-{
-        if (to->addr_len != from->addr_len)
-                return;
-        netif_addr_lock_bh(from);
-        netif_addr_lock(to);
-        __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
-        __dev_set_rx_mode(to);
-        netif_addr_unlock(to);
-        netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_unicast_unsync);
-static void dev_unicast_flush(struct net_device *dev)
-{
-        netif_addr_lock_bh(dev);
-        __hw_addr_flush(&dev->uc);
-        netif_addr_unlock_bh(dev);
-}
-static void dev_unicast_init(struct net_device *dev)
-{
-        __hw_addr_init(&dev->uc);
-}
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
-        struct dev_addr_list *tmp;
-        while (*list != NULL) {
-                tmp = *list;
-                *list = tmp->next;
-                if (tmp->da_users > tmp->da_gusers)
-                        printk("__dev_addr_discard: address leakage! "
-                               "da_users=%d\n", tmp->da_users);
-                kfree(tmp);
-        }
-}
-static void dev_addr_discard(struct net_device *dev)
-{
-        netif_addr_lock_bh(dev);
-        __dev_addr_discard(&dev->mc_list);
-        netdev_mc_count(dev) = 0;
-        netif_addr_unlock_bh(dev);
-}
 /**
 *      dev_get_flags - get flags reported to userspace
 *      @dev: device
@@ -4603,8 +4280,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                        return -EINVAL;
                if (!netif_device_present(dev))
                        return -ENODEV;
-                return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
+                return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
-                                  dev->addr_len, 1);
        case SIOCDELMULTI:
                if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4612,8 +4288,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                        return -EINVAL;
                if (!netif_device_present(dev))
                        return -ENODEV;
-                return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
+                return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
-                                     dev->addr_len, 1);
        case SIOCSIFTXQLEN:
                if (ifr->ifr_qlen < 0)
@@ -4920,8 +4595,8 @@ static void rollback_registered_many(struct list_head *head)
                /*
                 *      Flush the unicast and multicast chains
                 */
-                dev_unicast_flush(dev);
+                dev_uc_flush(dev);
-                dev_addr_discard(dev);
+                dev_mc_flush(dev);
                if (dev->netdev_ops->ndo_uninit)
                        dev->netdev_ops->ndo_uninit(dev);
@@ -5070,6 +4745,24 @@ int register_netdevice(struct net_device *dev)
        dev->iflink = -1;
+#ifdef CONFIG_RPS
+        if (!dev->num_rx_queues) {
+                /*
+                 * Allocate a single RX queue if driver never called
+                 * alloc_netdev_mq
+                 */
+                dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
+                if (!dev->_rx) {
+                        ret = -ENOMEM;
+                        goto out;
+                }
+                dev->_rx->first = dev->_rx;
+                atomic_set(&dev->_rx->count, 1);
+                dev->num_rx_queues = 1;
+        }
+#endif
        /* Init, if this function is available */
        if (dev->netdev_ops->ndo_init) {
                ret = dev->netdev_ops->ndo_init(dev);
@@ -5430,6 +5123,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
        struct net_device *dev;
        size_t alloc_size;
        struct net_device *p;
+#ifdef CONFIG_RPS
+        struct netdev_rx_queue *rx;
+        int i;
+#endif
        BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -5455,13 +5152,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                goto free_p;
        }
+#ifdef CONFIG_RPS
+        rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+        if (!rx) {
+                printk(KERN_ERR "alloc_netdev: Unable to allocate "
+                       "rx queues.\n");
+                goto free_tx;
+        }
+        atomic_set(&rx->count, queue_count);
+        /*
+         * Set a pointer to first element in the array which holds the
+         * reference count.
+         */
+        for (i = 0; i < queue_count; i++)
+                rx[i].first = rx;
+#endif
        dev = PTR_ALIGN(p, NETDEV_ALIGN);
        dev->padded = (char *)dev - (char *)p;
        if (dev_addr_init(dev))
-                goto free_tx;
+                goto free_rx;
-        dev_unicast_init(dev);
+        dev_mc_init(dev);
+        dev_uc_init(dev);
        dev_net_set(dev, &init_net);
@@ -5469,6 +5185,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
        dev->num_tx_queues = queue_count;
        dev->real_num_tx_queues = queue_count;
+#ifdef CONFIG_RPS
+        dev->_rx = rx;
+        dev->num_rx_queues = queue_count;
+#endif
        dev->gso_max_size = GSO_MAX_SIZE;
        netdev_init_queues(dev);
@@ -5483,9 +5204,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
        strcpy(dev->name, name);
        return dev;
+free_rx:
+#ifdef CONFIG_RPS
+        kfree(rx);
 free_tx:
+#endif
        kfree(tx);
 free_p:
        kfree(p);
        return NULL;
@@ -5687,8 +5411,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        /*
         *      Flush the unicast and multicast chains
         */
-        dev_unicast_flush(dev);
+        dev_uc_flush(dev);
-        dev_addr_discard(dev);
+        dev_mc_flush(dev);
        netdev_unregister_kobject(dev);
@@ -5988,6 +5712,12 @@ static int __init net_dev_init(void)
                queue->completion_queue = NULL;
                INIT_LIST_HEAD(&queue->poll_list);
+#ifdef CONFIG_RPS
+                queue->csd.func = trigger_softirq;
+                queue->csd.info = queue;
+                queue->csd.flags = 0;
+#endif
                queue->backlog.poll = process_backlog;
                queue->backlog.weight = weight_p;
                queue->backlog.gro_list = NULL;
@@ -6026,7 +5756,7 @@ subsys_initcall(net_dev_init);
 static int __init initialize_hashrnd(void)
 {
-        get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+        get_random_bytes(&hashrnd, sizeof(hashrnd));
        return 0;
 }
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 000000000000..508f9c18992f
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,741 @@
+/*
+ * net/core/dev_addr_lists.c - Functions for handling net device lists
+ * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This file contains functions for working with unicast, multicast and device
+ * addresses lists.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+/*
+ * General list handling functions
+ */
+static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
+                            unsigned char *addr, int addr_len,
+                            unsigned char addr_type, bool global)
+{
+        struct netdev_hw_addr *ha;
+        int alloc_size;
+        if (addr_len > MAX_ADDR_LEN)
+                return -EINVAL;
+        list_for_each_entry(ha, &list->list, list) {
+                if (!memcmp(ha->addr, addr, addr_len) &&
+                    ha->type == addr_type) {
+                        if (global) {
+                                /* check if addr is already used as global */
+                                if (ha->global_use)
+                                        return 0;
+                                else
+                                        ha->global_use = true;
+                        }
+                        ha->refcount++;
+                        return 0;
+                }
+        }
+        alloc_size = sizeof(*ha);
+        if (alloc_size < L1_CACHE_BYTES)
+                alloc_size = L1_CACHE_BYTES;
+        ha = kmalloc(alloc_size, GFP_ATOMIC);
+        if (!ha)
+                return -ENOMEM;
+        memcpy(ha->addr, addr, addr_len);
+        ha->type = addr_type;
+        ha->refcount = 1;
+        ha->global_use = global;
+        ha->synced = false;
+        list_add_tail_rcu(&ha->list, &list->list);
+        list->count++;
+        return 0;
+}
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+                         int addr_len, unsigned char addr_type)
+{
+        return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+}
+static void ha_rcu_free(struct rcu_head *head)
+{
+        struct netdev_hw_addr *ha;
+        ha = container_of(head, struct netdev_hw_addr, rcu_head);
+        kfree(ha);
+}
+static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
+                            unsigned char *addr, int addr_len,
+                            unsigned char addr_type, bool global)
+{
+        struct netdev_hw_addr *ha;
+        list_for_each_entry(ha, &list->list, list) {
+                if (!memcmp(ha->addr, addr, addr_len) &&
+                    (ha->type == addr_type || !addr_type)) {
+                        if (global) {
+                                if (!ha->global_use)
+                                        break;
+                                else
+                                        ha->global_use = false;
+                        }
+                        if (--ha->refcount)
+                                return 0;
+                        list_del_rcu(&ha->list);
+                        call_rcu(&ha->rcu_head, ha_rcu_free);
+                        list->count--;
+                        return 0;
+                }
+        }
+        return -ENOENT;
+}
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+                         int addr_len, unsigned char addr_type)
+{
+        return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+}
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+                           struct netdev_hw_addr_list *from_list,
+                           int addr_len, unsigned char addr_type)
+{
+        int err;
+        struct netdev_hw_addr *ha, *ha2;
+        unsigned char type;
+        list_for_each_entry(ha, &from_list->list, list) {
+                type = addr_type ? addr_type : ha->type;
+                err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+                if (err)
+                        goto unroll;
+        }
+        return 0;
+unroll:
+        list_for_each_entry(ha2, &from_list->list, list) {
+                if (ha2 == ha)
+                        break;
+                type = addr_type ? addr_type : ha2->type;
+                __hw_addr_del(to_list, ha2->addr, addr_len, type);
+        }
+        return err;
+}
+EXPORT_SYMBOL(__hw_addr_add_multiple);
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+                            struct netdev_hw_addr_list *from_list,
+                            int addr_len, unsigned char addr_type)
+{
+        struct netdev_hw_addr *ha;
+        unsigned char type;
+        list_for_each_entry(ha, &from_list->list, list) {
+                type = addr_type ? addr_type : ha->type;
+                __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+        }
+}
+EXPORT_SYMBOL(__hw_addr_del_multiple);
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+                   struct netdev_hw_addr_list *from_list,
+                   int addr_len)
+{
+        int err = 0;
+        struct netdev_hw_addr *ha, *tmp;
+        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+                if (!ha->synced) {
+                        err = __hw_addr_add(to_list, ha->addr,
+                                            addr_len, ha->type);
+                        if (err)
+                                break;
+                        ha->synced = true;
+                        ha->refcount++;
+                } else if (ha->refcount == 1) {
+                        __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+                        __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
+                }
+        }
+        return err;
+}
+EXPORT_SYMBOL(__hw_addr_sync);
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+                      struct netdev_hw_addr_list *from_list,
+                      int addr_len)
+{
+        struct netdev_hw_addr *ha, *tmp;
+        list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+                if (ha->synced) {
+                        __hw_addr_del(to_list, ha->addr,
+                                      addr_len, ha->type);
+                        ha->synced = false;
+                        __hw_addr_del(from_list, ha->addr,
+                                      addr_len, ha->type);
+                }
+        }
+}
+EXPORT_SYMBOL(__hw_addr_unsync);
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
+{
+        struct netdev_hw_addr *ha, *tmp;
+        list_for_each_entry_safe(ha, tmp, &list->list, list) {
+                list_del_rcu(&ha->list);
+                call_rcu(&ha->rcu_head, ha_rcu_free);
+        }
+        list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_flush);
+void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+        INIT_LIST_HEAD(&list->list);
+        list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_init);
+/*
+ * Device addresses handling functions
+ */
+/**
+ *      dev_addr_flush - Flush device address list
+ *      @dev: device
+ *
+ *      Flush device address list and reset ->dev_addr.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+void dev_addr_flush(struct net_device *dev)
+{
+        /* rtnl_mutex must be held here */
+        __hw_addr_flush(&dev->dev_addrs);
+        dev->dev_addr = NULL;
+}
+EXPORT_SYMBOL(dev_addr_flush);
+/**
+ *      dev_addr_init - Init device address list
+ *      @dev: device
+ *
+ *      Init device address list and create the first element,
+ *      used by ->dev_addr.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_init(struct net_device *dev)
+{
+        unsigned char addr[MAX_ADDR_LEN];
+        struct netdev_hw_addr *ha;
+        int err;
+        /* rtnl_mutex must be held here */
+        __hw_addr_init(&dev->dev_addrs);
+        memset(addr, 0, sizeof(addr));
+        err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
+                            NETDEV_HW_ADDR_T_LAN);
+        if (!err) {
+                /*
+                 * Get the first (previously created) address from the list
+                 * and set dev_addr pointer to this location.
+                 */
+                ha = list_first_entry(&dev->dev_addrs.list,
+                                      struct netdev_hw_addr, list);
+                dev->dev_addr = ha->addr;
+        }
+        return err;
+}
+EXPORT_SYMBOL(dev_addr_init);
+/**
+ *      dev_addr_add - Add a device address
+ *      @dev: device
+ *      @addr: address to add
+ *      @addr_type: address type
+ *
+ *      Add a device address to the device or increase the reference count if
+ *      it already exists.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+                 unsigned char addr_type)
+{
+        int err;
+        ASSERT_RTNL();
+        err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
+        if (!err)
+                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+/**
+ *      dev_addr_del - Release a device address.
+ *      @dev: device
+ *      @addr: address to delete
+ *      @addr_type: address type
+ *
+ *      Release reference to a device address and remove it from the device
+ *      if the reference count drops to zero.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+                 unsigned char addr_type)
+{
+        int err;
+        struct netdev_hw_addr *ha;
+        ASSERT_RTNL();
+        /*
+         * We can not remove the first address from the list because
+         * dev->dev_addr points to that.
+         */
+        ha = list_first_entry(&dev->dev_addrs.list,
+                              struct netdev_hw_addr, list);
+        if (ha->addr == dev->dev_addr && ha->refcount == 1)
+                return -ENOENT;
+        err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
+                            addr_type);
+        if (!err)
+                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+/**
+ *      dev_addr_add_multiple - Add device addresses from another device
+ *      @to_dev: device to which addresses will be added
+ *      @from_dev: device from which addresses will be added
+ *      @addr_type: address type - 0 means type will be used from from_dev
+ *
+ *      Add device addresses of the one device to another.
+ **
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+                          struct net_device *from_dev,
+                          unsigned char addr_type)
+{
+        int err;
+        ASSERT_RTNL();
+        if (from_dev->addr_len != to_dev->addr_len)
+                return -EINVAL;
+        err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+                                     to_dev->addr_len, addr_type);
+        if (!err)
+                call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+/**
+ *      dev_addr_del_multiple - Delete device addresses by another device
+ *      @to_dev: device where the addresses will be deleted
+ *      @from_dev: device by which addresses the addresses will be deleted
+ *      @addr_type: address type - 0 means type will used from from_dev
+ *
+ *      Deletes addresses in to device by the list of addresses in from device.
+ *
+ *      The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+                          struct net_device *from_dev,
+                          unsigned char addr_type)
+{
+        ASSERT_RTNL();
+        if (from_dev->addr_len != to_dev->addr_len)
+                return -EINVAL;
+        __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+                               to_dev->addr_len, addr_type);
+        call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+        return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+/*
+ * Unicast list handling functions
+ */
+/**
+ *      dev_uc_add - Add a secondary unicast address
+ *      @dev: device
+ *      @addr: address to add
+ *
+ *      Add a secondary unicast address to the device or increase
+ *      the reference count if it already exists.
+ */
+int dev_uc_add(struct net_device *dev, unsigned char *addr)
+{
+        int err;
+        netif_addr_lock_bh(dev);
+        err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+                            NETDEV_HW_ADDR_T_UNICAST);
+        if (!err)
+                __dev_set_rx_mode(dev);
+        netif_addr_unlock_bh(dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_uc_add);
+/**
+ *      dev_uc_del - Release secondary unicast address.
+ *      @dev: device
+ *      @addr: address to delete
+ *
+ *      Release reference to a secondary unicast address and remove it
+ *      from the device if the reference count drops to zero.
+ */
+int dev_uc_del(struct net_device *dev, unsigned char *addr)
+{
+        int err;
+        netif_addr_lock_bh(dev);
+        err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+                            NETDEV_HW_ADDR_T_UNICAST);
+        if (!err)
+                __dev_set_rx_mode(dev);
+        netif_addr_unlock_bh(dev);
+        return err;
+}
+EXPORT_SYMBOL(dev_uc_del);
+/**
+ *      dev_uc_sync - Synchronize device's unicast list to another device
+ *      @to: destination device
+ *      @from: source device
+ *
+ *      Add newly added addresses to the destination device and release
+ *      addresses that have no users left. The source device must be
+ *      locked by netif_tx_lock_bh.
+ *
+ *      This function is intended to be called from the dev->set_rx_mode
+ *      function of layered software devices.
+ */
+int dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+        int err = 0;
+        if (to->addr_len != from->addr_len)
+                return -EINVAL;
+        netif_addr_lock_bh(to);
+        err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+        if (!err)
+                __dev_set_rx_mode(to);
+        netif_addr_unlock_bh(to);
+        return err;
+}
+EXPORT_SYMBOL(dev_uc_sync);
+/**
+ *      dev_uc_unsync - Remove synchronized addresses from the destination device
+ *      @to: destination device
+ *      @from: source device
+ *
+ *      Remove all addresses that were added to the destination device by
+ *      dev_uc_sync(). This function is intended to be called from the
+ *      dev->stop function of layered software devices.
+ */
+void dev_uc_unsync(struct net_device *to, struct net_device *from)
+{
+        if (to->addr_len != from->addr_len)
+                return;
+        netif_addr_lock_bh(from);
+        netif_addr_lock(to);
+        __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
+        __dev_set_rx_mode(to);
+        netif_addr_unlock(to);
+        netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_uc_unsync);
+/**
+ *      dev_uc_flush - Flush unicast addresses
+ *      @dev: device
+ *
+ *      Flush unicast addresses.
+ */
+void dev_uc_flush(struct net_device *dev)
+{
+        netif_addr_lock_bh(dev);
+        __hw_addr_flush(&dev->uc);
+        netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_uc_flush);
+/**
+ *      dev_uc_flush - Init unicast address list
+ *      @dev: device
+ *
+ *      Init unicast address list.
+ */
+void dev_uc_init(struct net_device *dev)
+{
+        __hw_addr_init(&dev->uc);
+}
+EXPORT_SYMBOL(dev_uc_init);
+/*
+ * Multicast list handling functions
+ */
+static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+                        bool global)
+{
+        int err;
+        netif_addr_lock_bh(dev);
+        err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+                               NETDEV_HW_ADDR_T_MULTICAST, global);
+        if (!err)
+                __dev_set_rx_mode(dev);
+        netif_addr_unlock_bh(dev);
+        return err;
+}
+/**
+ *      dev_mc_add - Add a multicast address
+ *      @dev: device
+ *      @addr: address to add
+ *
+ *      Add a multicast address to the device or increase
+ *      the reference count if it already exists.
+ */
+int dev_mc_add(struct net_device *dev, unsigned char *addr)
+{
+        return __dev_mc_add(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_add);
+/**
+ *      dev_mc_add_global - Add a global multicast address
+ *      @dev: device
+ *      @addr: address to add
+ *
+ *      Add a global multicast address to the device.
+ */
+int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+{
+        return __dev_mc_add(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_add_global);
+static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+                        bool global)
+{
+        int err;
+        netif_addr_lock_bh(dev);
+        err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
+                               NETDEV_HW_ADDR_T_MULTICAST, global);
+        if (!err)
+                __dev_set_rx_mode(dev);
+        netif_addr_unlock_bh(dev);
+        return err;
+}
+/**
+ *      dev_mc_del - Delete a multicast address.
+ *      @dev: device
+ *      @addr: address to delete
+ *
+ *      Release reference to a multicast address and remove it
+ *      from the device if the reference count drops to zero.
+ */
+int dev_mc_del(struct net_device *dev, unsigned char *addr)
+{
+        return __dev_mc_del(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_del);
+/**
+ *      dev_mc_del_global - Delete a global multicast address.
+ *      @dev: device
+ *      @addr: address to delete
+ *
+ *      Release reference to a multicast address and remove it
+ *      from the device if the reference count drops to zero.
+ */
+int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+{
+        return __dev_mc_del(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_del_global);
+/**
+ *      dev_mc_sync - Synchronize device's unicast list to another device
+ *      @to: destination device
+ *      @from: source device
+ *
+ *      Add newly added addresses to the destination device and release
+ *      addresses that have no users left. The source device must be
+ *      locked by netif_tx_lock_bh.
+ *
+ *      This function is intended to be called from the dev->set_multicast_list
+ *      or dev->set_rx_mode function of layered software devices.
+ */
+int dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+        int err = 0;
+        if (to->addr_len != from->addr_len)
+                return -EINVAL;
+        netif_addr_lock_bh(to);
+        err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+        if (!err)
+                __dev_set_rx_mode(to);
+        netif_addr_unlock_bh(to);
+        return err;
+}
+EXPORT_SYMBOL(dev_mc_sync);
+/**
+ *      dev_mc_unsync - Remove synchronized addresses from the destination device
+ *      @to: destination device
+ *      @from: source device
+ *
+ *      Remove all addresses that were added to the destination device by
+ *      dev_mc_sync(). This function is intended to be called from the
+ *      dev->stop function of layered software devices.
+ */
+void dev_mc_unsync(struct net_device *to, struct net_device *from)
+{
+        if (to->addr_len != from->addr_len)
+                return;
+        netif_addr_lock_bh(from);
+        netif_addr_lock(to);
+        __hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
+        __dev_set_rx_mode(to);
+        netif_addr_unlock(to);
+        netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_mc_unsync);
+/**
+ *      dev_mc_flush - Flush multicast addresses
+ *      @dev: device
+ *
+ *      Flush multicast addresses.
+ */
+void dev_mc_flush(struct net_device *dev)
+{
+        netif_addr_lock_bh(dev);
+        __hw_addr_flush(&dev->mc);
+        netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_mc_flush);
+/**
+ *      dev_mc_flush - Init multicast address list
+ *      @dev: device
+ *
+ *      Init multicast address list.
+ */
+void dev_mc_init(struct net_device *dev)
+{
+        __hw_addr_init(&dev->mc);
+}
+EXPORT_SYMBOL(dev_mc_init);
+#ifdef CONFIG_PROC_FS
+#include <linux/seq_file.h>
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+        struct netdev_hw_addr *ha;
+        struct net_device *dev = v;
+        if (v == SEQ_START_TOKEN)
+                return 0;
+        netif_addr_lock_bh(dev);
+        netdev_for_each_mc_addr(ha, dev) {
+                int i;
+                seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+                           dev->name, ha->refcount, ha->global_use);
+                for (i = 0; i < dev->addr_len; i++)
+                        seq_printf(seq, "%02x", ha->addr[i]);
+                seq_putc(seq, '\n');
+        }
+        netif_addr_unlock_bh(dev);
+        return 0;
+}
+static const struct seq_operations dev_mc_seq_ops = {
+        .start = dev_seq_start,
+        .next  = dev_seq_next,
+        .stop  = dev_seq_stop,
+        .show  = dev_mc_seq_show,
+};
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+        return seq_open_net(inode, file, &dev_mc_seq_ops,
+                            sizeof(struct seq_net_private));
+}
+static const struct file_operations dev_mc_seq_fops = {
+        .owner   = THIS_MODULE,
+        .open    = dev_mc_seq_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release_net,
+};
+#endif
+static int __net_init dev_mc_net_init(struct net *net)
+{
+        if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+                return -ENOMEM;
+        return 0;
+}
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+        proc_net_remove(net, "dev_mcast");
+}
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+        .init = dev_mc_net_init,
+        .exit = dev_mc_net_exit,
+};
+void __init dev_mcast_init(void)
+{
+        register_pernet_subsys(&dev_mc_net_ops);
+}
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 3dc295beb483..000000000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- *      Linux NET3:     Multicast List maintenance.
- *
- *      Authors:
- *              Tim Kordas <tjk@nostromo.eeap.cwru.edu>
- *              Richard Underwood <richard@wuzz.demon.co.uk>
- *
- *      Stir fried together from the IP multicast and CAP patches above
- *              Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- *      Fixes:
- *              Alan Cox        :       Update the device on a real delete
- *                                      rather than any time but...
- *              Alan Cox        :       IFF_ALLMULTI support.
- *              Alan Cox        :       New format set_multicast_list() calls.
- *              Gleb Natapov    :       Remove dev_mc_lock.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-/*
- *      Device multicast list maintenance.
- *
- *      This is used both by IP and by the user level maintenance functions.
- *      Unlike BSD we maintain a usage count on a given multicast address so
- *      that a casual user application can add/delete multicasts used by
- *      protocols without doing damage to the protocols when it deletes the
- *      entries. It also helps IP as it tracks overlapping maps.
- *
- *      Device mc lists are changed by bh at least if IPv6 is enabled,
- *      so that it must be bh protected.
- *
- *      We block accesses to device mc filters with netif_tx_lock.
- */
-/*
- *      Delete a device level multicast
- */
-int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
-{
-        int err;
-        netif_addr_lock_bh(dev);
-        err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
-                                addr, alen, glbl);
-        if (!err) {
-                /*
-                 *      We have altered the list, so the card
-                 *      loaded filter is now wrong. Fix it
-                 */
-                __dev_set_rx_mode(dev);
-        }
-        netif_addr_unlock_bh(dev);
-        return err;
-}
-/*
- *      Add a device level multicast
- */
-int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
-{
-        int err;
-        netif_addr_lock_bh(dev);
-        if (alen != dev->addr_len)
-                err = -EINVAL;
-        else
-                err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
-        if (!err)
-                __dev_set_rx_mode(dev);
-        netif_addr_unlock_bh(dev);
-        return err;
-}
-/**
- *      dev_mc_sync     - Synchronize device's multicast list to another device
- *      @to: destination device
- *      @from: source device
- *
- *      Add newly added addresses to the destination device and release
- *      addresses that have no users left. The source device must be
- *      locked by netif_tx_lock_bh.
- *
- *      This function is intended to be called from the dev->set_multicast_list
- *      or dev->set_rx_mode function of layered software devices.
- */
-int dev_mc_sync(struct net_device *to, struct net_device *from)
-{
-        int err = 0;
-        netif_addr_lock_bh(to);
-        err = __dev_addr_sync(&to->mc_list, &to->mc_count,
-                              &from->mc_list, &from->mc_count);
-        if (!err)
-                __dev_set_rx_mode(to);
-        netif_addr_unlock_bh(to);
-        return err;
-}
-EXPORT_SYMBOL(dev_mc_sync);
-/**
- *      dev_mc_unsync   - Remove synchronized addresses from the destination
- *                        device
- *      @to: destination device
- *      @from: source device
- *
- *      Remove all addresses that were added to the destination device by
- *      dev_mc_sync(). This function is intended to be called from the
- *      dev->stop function of layered software devices.
- */
-void dev_mc_unsync(struct net_device *to, struct net_device *from)
-{
-        netif_addr_lock_bh(from);
-        netif_addr_lock(to);
-        __dev_addr_unsync(&to->mc_list, &to->mc_count,
-                          &from->mc_list, &from->mc_count);
-        __dev_set_rx_mode(to);
-        netif_addr_unlock(to);
-        netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_mc_unsync);
-#ifdef CONFIG_PROC_FS
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
-        struct dev_addr_list *m;
-        struct net_device *dev = v;
-        if (v == SEQ_START_TOKEN)
-                return 0;
-        netif_addr_lock_bh(dev);
-        for (m = dev->mc_list; m; m = m->next) {
-                int i;
-                seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-                           dev->name, m->dmi_users, m->dmi_gusers);
-                for (i = 0; i < m->dmi_addrlen; i++)
-                        seq_printf(seq, "%02x", m->dmi_addr[i]);
-                seq_putc(seq, '\n');
-        }
-        netif_addr_unlock_bh(dev);
-        return 0;
-}
-static const struct seq_operations dev_mc_seq_ops = {
-        .start = dev_seq_start,
-        .next  = dev_seq_next,
-        .stop  = dev_seq_stop,
-        .show  = dev_mc_seq_show,
-};
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-        return seq_open_net(inode, file, &dev_mc_seq_ops,
-                            sizeof(struct seq_net_private));
-}
-static const struct file_operations dev_mc_seq_fops = {
-        .owner   = THIS_MODULE,
-        .open    = dev_mc_seq_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release_net,
-};
-#endif
-static int __net_init dev_mc_net_init(struct net *net)
-{
-        if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
-                return -ENOMEM;
-        return 0;
-}
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
-        proc_net_remove(net, "dev_mcast");
-}
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
-        .init = dev_mc_net_init,
-        .exit = dev_mc_net_exit,
-};
-void __init dev_mcast_init(void)
-{
-        register_pernet_subsys(&dev_mc_net_ops);
-}
-EXPORT_SYMBOL(dev_mc_add);
-EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/core/dst.c b/net/core/dst.c
index f307bc18f6a0..b8c22f0f9373 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -44,7 +44,7 @@ static atomic_t			 dst_total = ATOMIC_INIT(0);
 */
 static struct {
        spinlock_t              lock;
-        struct dst_entry        *list;
+        struct dst_entry        *list;
        unsigned long           timer_inc;
        unsigned long           timer_expires;
 } dst_garbage = {
@@ -52,7 +52,7 @@ static struct {
        .timer_inc = DST_GC_MAX,
 };
 static void dst_gc_task(struct work_struct *work);
-static void ___dst_free(struct dst_entry * dst);
+static void ___dst_free(struct dst_entry *dst);
 static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
@@ -136,8 +136,8 @@ loop:
                }
                expires = dst_garbage.timer_expires;
                /*
-                 * if the next desired timer is more than 4 seconds in the future
+                 * if the next desired timer is more than 4 seconds in the
-                 * then round the timer to whole seconds
+                 * future then round the timer to whole seconds
                 */
                if (expires > 4*HZ)
                        expires = round_jiffies_relative(expires);
@@ -152,7 +152,8 @@ loop:
                " expires: %lu elapsed: %lu us\n",
                atomic_read(&dst_total), delayed, work_performed,
                expires,
-                elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC);
+                elapsed.tv_sec * USEC_PER_SEC +
+                  elapsed.tv_nsec / NSEC_PER_USEC);
 #endif
 }
@@ -163,9 +164,9 @@ int dst_discard(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard);
-void * dst_alloc(struct dst_ops * ops)
+void *dst_alloc(struct dst_ops *ops)
 {
-        struct dst_entry * dst;
+        struct dst_entry *dst;
        if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
                if (ops->gc(ops))
@@ -185,19 +186,20 @@ void * dst_alloc(struct dst_ops * ops)
        atomic_inc(&ops->entries);
        return dst;
 }
+EXPORT_SYMBOL(dst_alloc);
-static void ___dst_free(struct dst_entry * dst)
+static void ___dst_free(struct dst_entry *dst)
 {
        /* The first case (dev==NULL) is required, when
           protocol module is unloaded.
         */
-        if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+        if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
                dst->input = dst->output = dst_discard;
-        }
        dst->obsolete = 2;
 }
+EXPORT_SYMBOL(__dst_free);
-void __dst_free(struct dst_entry * dst)
+void __dst_free(struct dst_entry *dst)
 {
        spin_lock_bh(&dst_garbage.lock);
        ___dst_free(dst);
@@ -262,15 +264,16 @@ again:
        }
        return NULL;
 }
+EXPORT_SYMBOL(dst_destroy);
 void dst_release(struct dst_entry *dst)
 {
        if (dst) {
-               int newrefcnt;
+                int newrefcnt;
                smp_mb__before_atomic_dec();
-               newrefcnt = atomic_dec_return(&dst->__refcnt);
+                newrefcnt = atomic_dec_return(&dst->__refcnt);
-               WARN_ON(newrefcnt < 0);
+                WARN_ON(newrefcnt < 0);
        }
 }
 EXPORT_SYMBOL(dst_release);
@@ -306,7 +309,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
        }
 }
-static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int dst_dev_event(struct notifier_block *this, unsigned long event,
+                         void *ptr)
 {
        struct net_device *dev = ptr;
        struct dst_entry *dst, *last = NULL;
@@ -329,9 +333,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
                        last->next = dst;
                else
                        dst_busy_list = dst;
-                for (; dst; dst = dst->next) {
+                for (; dst; dst = dst->next)
                        dst_ifdown(dst, dev, event != NETDEV_DOWN);
-                }
                mutex_unlock(&dst_gc_mutex);
                break;
        }
@@ -346,7 +349,3 @@ void __init dst_init(void)
 {
        register_netdevice_notifier(&dst_dev_notifier);
 }
-EXPORT_SYMBOL(__dst_free);
-EXPORT_SYMBOL(dst_alloc);
-EXPORT_SYMBOL(dst_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9d55c57f318a..1a7db92037fa 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -18,8 +18,8 @@
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
 #include <linux/bitops.h>
+#include <linux/uaccess.h>
 #include <linux/slab.h>
-#include <asm/uaccess.h>
 /*
 * Some useful ethtool_ops methods that're device independent.
@@ -31,6 +31,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
 {
        return netif_carrier_ok(dev) ? 1 : 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_link);
 u32 ethtool_op_get_rx_csum(struct net_device *dev)
 {
@@ -63,6 +64,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
 int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 {
@@ -73,11 +75,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
 u32 ethtool_op_get_sg(struct net_device *dev)
 {
        return (dev->features & NETIF_F_SG) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_sg);
 int ethtool_op_set_sg(struct net_device *dev, u32 data)
 {
@@ -88,11 +92,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_sg);
 u32 ethtool_op_get_tso(struct net_device *dev)
 {
        return (dev->features & NETIF_F_TSO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_tso);
 int ethtool_op_set_tso(struct net_device *dev, u32 data)
 {
@@ -103,11 +109,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tso);
 u32 ethtool_op_get_ufo(struct net_device *dev)
 {
        return (dev->features & NETIF_F_UFO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_ufo);
 int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 {
@@ -117,12 +125,13 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
                dev->features &= ~NETIF_F_UFO;
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_ufo);
 /* the following list of flags are the same as their associated
 * NETIF_F_xxx values in include/linux/netdevice.h
 */
 static const u32 flags_dup_features =
-        (ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
+        (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
 u32 ethtool_op_get_flags(struct net_device *dev)
 {
@@ -133,6 +142,7 @@ u32 ethtool_op_get_flags(struct net_device *dev)
        return dev->features & flags_dup_features;
 }
+EXPORT_SYMBOL(ethtool_op_get_flags);
 int ethtool_op_set_flags(struct net_device *dev, u32 data)
 {
@@ -153,9 +163,15 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
                features &= ~NETIF_F_NTUPLE;
        }
+        if (data & ETH_FLAG_RXHASH)
+                features |= NETIF_F_RXHASH;
+        else
+                features &= ~NETIF_F_RXHASH;
        dev->features = features;
        return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_flags);
 void ethtool_ntuple_flush(struct net_device *dev)
 {
@@ -201,7 +217,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
        return dev->ethtool_ops->set_settings(dev, &cmd);
 }
-static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
+                                                  void __user *useraddr)
 {
        struct ethtool_drvinfo info;
        const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -241,7 +258,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _
 }
 static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
-                                          void __user *useraddr)
+                                                    void __user *useraddr)
 {
        struct ethtool_sset_info info;
        const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -300,7 +317,8 @@ out:
        return ret;
 }
-static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+                                                void __user *useraddr)
 {
        struct ethtool_rxnfc cmd;
@@ -313,7 +331,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __u
        return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
-static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+                                                void __user *useraddr)
 {
        struct ethtool_rxnfc info;
        const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -358,8 +377,8 @@ err_out:
 }
 static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
-                              struct ethtool_rx_ntuple_flow_spec *spec,
+                        struct ethtool_rx_ntuple_flow_spec *spec,
-                              struct ethtool_rx_ntuple_flow_spec_container *fsc)
+                        struct ethtool_rx_ntuple_flow_spec_container *fsc)
 {
        /* don't add filters forever */
@@ -385,7 +404,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
        list->count++;
 }
-static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
+                                                    void __user *useraddr)
 {
        struct ethtool_rx_ntuple cmd;
        const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -510,125 +530,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
                case UDP_V4_FLOW:
                case SCTP_V4_FLOW:
                        sprintf(p, "\tSrc IP addr: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.ip4src);
+                                fsc->fs.h_u.tcp_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc IP mask: 0x%x\n",
-                                fsc->fs.m_u.tcp_ip4_spec.ip4src);
+                                fsc->fs.m_u.tcp_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP addr: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+                                fsc->fs.h_u.tcp_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP mask: 0x%x\n",
-                                fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+                                fsc->fs.m_u.tcp_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.psrc,
+                                fsc->fs.h_u.tcp_ip4_spec.psrc,
-                                fsc->fs.m_u.tcp_ip4_spec.psrc);
+                                fsc->fs.m_u.tcp_ip4_spec.psrc);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.pdst,
+                                fsc->fs.h_u.tcp_ip4_spec.pdst,
-                                fsc->fs.m_u.tcp_ip4_spec.pdst);
+                                fsc->fs.m_u.tcp_ip4_spec.pdst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.tcp_ip4_spec.tos,
+                                fsc->fs.h_u.tcp_ip4_spec.tos,
-                                fsc->fs.m_u.tcp_ip4_spec.tos);
+                                fsc->fs.m_u.tcp_ip4_spec.tos);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        break;
                case AH_ESP_V4_FLOW:
                case ESP_V4_FLOW:
                        sprintf(p, "\tSrc IP addr: 0x%x\n",
-                                fsc->fs.h_u.ah_ip4_spec.ip4src);
+                                fsc->fs.h_u.ah_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc IP mask: 0x%x\n",
-                                fsc->fs.m_u.ah_ip4_spec.ip4src);
+                                fsc->fs.m_u.ah_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP addr: 0x%x\n",
-                                fsc->fs.h_u.ah_ip4_spec.ip4dst);
+                                fsc->fs.h_u.ah_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP mask: 0x%x\n",
-                                fsc->fs.m_u.ah_ip4_spec.ip4dst);
+                                fsc->fs.m_u.ah_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSPI: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.ah_ip4_spec.spi,
+                                fsc->fs.h_u.ah_ip4_spec.spi,
-                                fsc->fs.m_u.ah_ip4_spec.spi);
+                                fsc->fs.m_u.ah_ip4_spec.spi);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.ah_ip4_spec.tos,
+                                fsc->fs.h_u.ah_ip4_spec.tos,
-                                fsc->fs.m_u.ah_ip4_spec.tos);
+                                fsc->fs.m_u.ah_ip4_spec.tos);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        break;
                case IP_USER_FLOW:
                        sprintf(p, "\tSrc IP addr: 0x%x\n",
-                                fsc->fs.h_u.raw_ip4_spec.ip4src);
+                                fsc->fs.h_u.raw_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc IP mask: 0x%x\n",
-                                fsc->fs.m_u.raw_ip4_spec.ip4src);
+                                fsc->fs.m_u.raw_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP addr: 0x%x\n",
-                                fsc->fs.h_u.raw_ip4_spec.ip4dst);
+                                fsc->fs.h_u.raw_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP mask: 0x%x\n",
-                                fsc->fs.m_u.raw_ip4_spec.ip4dst);
+                                fsc->fs.m_u.raw_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        break;
                case IPV4_FLOW:
                        sprintf(p, "\tSrc IP addr: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.ip4src);
+                                fsc->fs.h_u.usr_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tSrc IP mask: 0x%x\n",
-                                fsc->fs.m_u.usr_ip4_spec.ip4src);
+                                fsc->fs.m_u.usr_ip4_spec.ip4src);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP addr: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.ip4dst);
+                                fsc->fs.h_u.usr_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tDest IP mask: 0x%x\n",
-                                fsc->fs.m_u.usr_ip4_spec.ip4dst);
+                                fsc->fs.m_u.usr_ip4_spec.ip4dst);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+                                fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
-                                fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+                                fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.tos,
+                                fsc->fs.h_u.usr_ip4_spec.tos,
-                                fsc->fs.m_u.usr_ip4_spec.tos);
+                                fsc->fs.m_u.usr_ip4_spec.tos);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.ip_ver,
+                                fsc->fs.h_u.usr_ip4_spec.ip_ver,
-                                fsc->fs.m_u.usr_ip4_spec.ip_ver);
+                                fsc->fs.m_u.usr_ip4_spec.ip_ver);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
-                                fsc->fs.h_u.usr_ip4_spec.proto,
+                                fsc->fs.h_u.usr_ip4_spec.proto,
-                                fsc->fs.m_u.usr_ip4_spec.proto);
+                                fsc->fs.m_u.usr_ip4_spec.proto);
                        p += ETH_GSTRING_LEN;
                        num_strings++;
                        break;
                };
                sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
-                        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+                        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
                p += ETH_GSTRING_LEN;
                num_strings++;
                sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
@@ -641,7 +661,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
                        sprintf(p, "\tAction: Drop\n");
                else
                        sprintf(p, "\tAction: Direct to queue %d\n",
-                                fsc->fs.action);
+                                fsc->fs.action);
                p += ETH_GSTRING_LEN;
                num_strings++;
 unknown_filter:
@@ -853,7 +873,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
        return ret;
 }
-static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
+                                                   void __user *useraddr)
 {
        struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
@@ -867,7 +888,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void
        return 0;
 }
-static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+                                                   void __user *useraddr)
 {
        struct ethtool_coalesce coalesce;
@@ -971,6 +993,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
        return dev->ethtool_ops->set_tx_csum(dev, edata.data);
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
 static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
 {
@@ -1042,7 +1065,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
        edata.data = dev->features & NETIF_F_GSO;
        if (copy_to_user(useraddr, &edata, sizeof(edata)))
-                 return -EFAULT;
+                return -EFAULT;
        return 0;
 }
@@ -1065,7 +1088,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
        edata.data = dev->features & NETIF_F_GRO;
        if (copy_to_user(useraddr, &edata, sizeof(edata)))
-                 return -EFAULT;
+                return -EFAULT;
        return 0;
 }
@@ -1277,7 +1300,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
        return actor(dev, edata.data);
 }
-static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
+                                                   char __user *useraddr)
 {
        struct ethtool_flash efl;
@@ -1306,11 +1330,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
        if (!dev->ethtool_ops)
                return -EOPNOTSUPP;
-        if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+        if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
                return -EFAULT;
        /* Allow some commands to be done by anyone */
-        switch(ethcmd) {
+        switch (ethcmd) {
        case ETHTOOL_GDRVINFO:
        case ETHTOOL_GMSGLVL:
        case ETHTOOL_GCOALESCE:
@@ -1338,10 +1362,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
                        return -EPERM;
        }
-        if (dev->ethtool_ops->begin)
+        if (dev->ethtool_ops->begin) {
-                if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+                rc = dev->ethtool_ops->begin(dev);
+                if (rc  < 0)
                        return rc;
+        }
        old_features = dev->features;
        switch (ethcmd) {
@@ -1531,16 +1556,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
        return rc;
 }
-EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL(ethtool_op_get_sg);
-EXPORT_SYMBOL(ethtool_op_get_tso);
-EXPORT_SYMBOL(ethtool_op_set_sg);
-EXPORT_SYMBOL(ethtool_op_set_tso);
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-EXPORT_SYMBOL(ethtool_op_set_flags);
-EXPORT_SYMBOL(ethtool_op_get_flags);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d2c3e7dc2e5f..05cce4ec84dd 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -109,7 +109,7 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
        struct fib_rules_ops *ops;
        int err;
-        ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL);
+        ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
        if (ops == NULL)
                return ERR_PTR(-ENOMEM);
@@ -124,7 +124,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
        return ops;
 }
 EXPORT_SYMBOL_GPL(fib_rules_register);
 void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -158,7 +157,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
        call_rcu(&ops->rcu, fib_rules_put_rcu);
 }
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
@@ -221,7 +219,6 @@ out:
        return err;
 }
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
@@ -614,7 +611,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
                        break;
                cb->args[1] = 0;
-        skip:
+skip:
                idx++;
        }
        rcu_read_unlock();
@@ -686,7 +683,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
        struct fib_rules_ops *ops;
        ASSERT_RTNL();
-        rcu_read_lock();
        switch (event) {
        case NETDEV_REGISTER:
@@ -700,8 +696,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
                break;
        }
-        rcu_read_unlock();
        return NOTIFY_DONE;
 }
diff --git a/net/core/flow.c b/net/core/flow.c
index 96015871ecea..161900674009 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,113 +26,158 @@
 #include <linux/security.h>
 struct flow_cache_entry {
-        struct flow_cache_entry *next;
+        union {
-        u16                     family;
+                struct hlist_node       hlist;
-        u8                      dir;
+                struct list_head        gc_list;
-        u32                     genid;
+        } u;
-        struct flowi            key;
+        u16                             family;
-        void                    *object;
+        u8                              dir;
-        atomic_t                *object_ref;
+        u32                             genid;
+        struct flowi                    key;
+        struct flow_cache_object        *object;
 };
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
+struct flow_cache_percpu {
+        struct hlist_head               *hash_table;
-static u32 flow_hash_shift;
+        int                             hash_count;
-#define flow_hash_size  (1 << flow_hash_shift)
+        u32                             hash_rnd;
-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
+        int                             hash_rnd_recalc;
+        struct tasklet_struct           flush_tasklet;
-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
+};
-static struct kmem_cache *flow_cachep __read_mostly;
-static int flow_lwm, flow_hwm;
+struct flow_flush_info {
+        struct flow_cache               *cache;
+        atomic_t                        cpuleft;
+        struct completion               completion;
+};
-struct flow_percpu_info {
+struct flow_cache {
-        int hash_rnd_recalc;
+        u32                             hash_shift;
-        u32 hash_rnd;
+        unsigned long                   order;
-        int count;
+        struct flow_cache_percpu        *percpu;
+        struct notifier_block           hotcpu_notifier;
+        int                             low_watermark;
+        int                             high_watermark;
+        struct timer_list               rnd_timer;
 };
-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
-#define flow_hash_rnd_recalc(cpu) \
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
-        (per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
+static struct flow_cache flow_cache_global;
-#define flow_hash_rnd(cpu) \
+static struct kmem_cache *flow_cachep;
-        (per_cpu(flow_hash_info, cpu).hash_rnd)
-#define flow_count(cpu) \
-        (per_cpu(flow_hash_info, cpu).count)
-static struct timer_list flow_hash_rnd_timer;
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
-#define FLOW_HASH_RND_PERIOD    (10 * 60 * HZ)
+#define flow_cache_hash_size(cache)     (1 << (cache)->hash_shift)
+#define FLOW_HASH_RND_PERIOD            (10 * 60 * HZ)
-struct flow_flush_info {
-        atomic_t cpuleft;
-        struct completion completion;
-};
-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
 static void flow_cache_new_hashrnd(unsigned long arg)
 {
+        struct flow_cache *fc = (void *) arg;
        int i;
        for_each_possible_cpu(i)
-                flow_hash_rnd_recalc(i) = 1;
+                per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
-        flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+        fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-        add_timer(&flow_hash_rnd_timer);
+        add_timer(&fc->rnd_timer);
+}
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+        if (atomic_read(&flow_cache_genid) != fle->genid)
+                return 0;
+        if (fle->object && !fle->object->ops->check(fle->object))
+                return 0;
+        return 1;
 }
-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle)
 {
        if (fle->object)
-                atomic_dec(fle->object_ref);
+                fle->object->ops->delete(fle->object);
        kmem_cache_free(flow_cachep, fle);
-        flow_count(cpu)--;
 }
-static void __flow_cache_shrink(int cpu, int shrink_to)
+static void flow_cache_gc_task(struct work_struct *work)
 {
-        struct flow_cache_entry *fle, **flp;
+        struct list_head gc_list;
-        int i;
+        struct flow_cache_entry *fce, *n;
-        for (i = 0; i < flow_hash_size; i++) {
+        INIT_LIST_HEAD(&gc_list);
-                int k = 0;
+        spin_lock_bh(&flow_cache_gc_lock);
+        list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+        spin_unlock_bh(&flow_cache_gc_lock);
-                flp = &flow_table(cpu)[i];
+        list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
-                while ((fle = *flp) != NULL && k < shrink_to) {
+                flow_entry_kill(fce);
-                        k++;
+}
-                        flp = &fle->next;
+static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
-                }
-                while ((fle = *flp) != NULL) {
+static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
-                        *flp = fle->next;
+                                     int deleted, struct list_head *gc_list)
-                        flow_entry_kill(cpu, fle);
+{
-                }
+        if (deleted) {
+                fcp->hash_count -= deleted;
+                spin_lock_bh(&flow_cache_gc_lock);
+                list_splice_tail(gc_list, &flow_cache_gc_list);
+                spin_unlock_bh(&flow_cache_gc_lock);
+                schedule_work(&flow_cache_gc_work);
        }
 }
-static void flow_cache_shrink(int cpu)
+static void __flow_cache_shrink(struct flow_cache *fc,
+                                struct flow_cache_percpu *fcp,
+                                int shrink_to)
 {
-        int shrink_to = flow_lwm / flow_hash_size;
+        struct flow_cache_entry *fle;
+        struct hlist_node *entry, *tmp;
+        LIST_HEAD(gc_list);
+        int i, deleted = 0;
+        for (i = 0; i < flow_cache_hash_size(fc); i++) {
+                int saved = 0;
+                hlist_for_each_entry_safe(fle, entry, tmp,
+                                          &fcp->hash_table[i], u.hlist) {
+                        if (saved < shrink_to &&
+                            flow_entry_valid(fle)) {
+                                saved++;
+                        } else {
+                                deleted++;
+                                hlist_del(&fle->u.hlist);
+                                list_add_tail(&fle->u.gc_list, &gc_list);
+                        }
+                }
+        }
-        __flow_cache_shrink(cpu, shrink_to);
+        flow_cache_queue_garbage(fcp, deleted, &gc_list);
 }
-static void flow_new_hash_rnd(int cpu)
+static void flow_cache_shrink(struct flow_cache *fc,
+                              struct flow_cache_percpu *fcp)
 {
-        get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+        int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
-        flow_hash_rnd_recalc(cpu) = 0;
-        __flow_cache_shrink(cpu, 0);
+        __flow_cache_shrink(fc, fcp, shrink_to);
 }
-static u32 flow_hash_code(struct flowi *key, int cpu)
+static void flow_new_hash_rnd(struct flow_cache *fc,
+                              struct flow_cache_percpu *fcp)
+{
+        get_random_bytes(&fcp->hash_rnd, sizeof(u32));
+        fcp->hash_rnd_recalc = 0;
+        __flow_cache_shrink(fc, fcp, 0);
+}
+static u32 flow_hash_code(struct flow_cache *fc,
+                          struct flow_cache_percpu *fcp,
+                          struct flowi *key)
 {
        u32 *k = (u32 *) key;
-        return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+        return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
-                (flow_hash_size - 1));
+                & (flow_cache_hash_size(fc) - 1));
 }
 #if (BITS_PER_LONG == 64)
@@ -165,114 +210,117 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
        return 0;
 }
-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+struct flow_cache_object *
-                        flow_resolve_t resolver)
+flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+                  flow_resolve_t resolver, void *ctx)
 {
-        struct flow_cache_entry *fle, **head;
+        struct flow_cache *fc = &flow_cache_global;
+        struct flow_cache_percpu *fcp;
+        struct flow_cache_entry *fle, *tfle;
+        struct hlist_node *entry;
+        struct flow_cache_object *flo;
        unsigned int hash;
-        int cpu;
        local_bh_disable();
-        cpu = smp_processor_id();
+        fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
        fle = NULL;
+        flo = NULL;
        /* Packet really early in init?  Making flow_cache_init a
         * pre-smp initcall would solve this.  --RR */
-        if (!flow_table(cpu))
+        if (!fcp->hash_table)
                goto nocache;
-        if (flow_hash_rnd_recalc(cpu))
+        if (fcp->hash_rnd_recalc)
-                flow_new_hash_rnd(cpu);
+                flow_new_hash_rnd(fc, fcp);
-        hash = flow_hash_code(key, cpu);
-        head = &flow_table(cpu)[hash];
+        hash = flow_hash_code(fc, fcp, key);
-        for (fle = *head; fle; fle = fle->next) {
+        hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
-                if (fle->family == family &&
+                if (tfle->family == family &&
-                    fle->dir == dir &&
+                    tfle->dir == dir &&
-                    flow_key_compare(key, &fle->key) == 0) {
+                    flow_key_compare(key, &tfle->key) == 0) {
-                        if (fle->genid == atomic_read(&flow_cache_genid)) {
+                        fle = tfle;
-                                void *ret = fle->object;
-                                if (ret)
-                                        atomic_inc(fle->object_ref);
-                                local_bh_enable();
-                                return ret;
-                        }
                        break;
                }
        }
-        if (!fle) {
+        if (unlikely(!fle)) {
-                if (flow_count(cpu) > flow_hwm)
+                if (fcp->hash_count > fc->high_watermark)
-                        flow_cache_shrink(cpu);
+                        flow_cache_shrink(fc, fcp);
                fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
                if (fle) {
-                        fle->next = *head;
-                        *head = fle;
                        fle->family = family;
                        fle->dir = dir;
                        memcpy(&fle->key, key, sizeof(*key));
                        fle->object = NULL;
-                        flow_count(cpu)++;
+                        hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
+                        fcp->hash_count++;
                }
+        } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+                flo = fle->object;
+                if (!flo)
+                        goto ret_object;
+                flo = flo->ops->get(flo);
+                if (flo)
+                        goto ret_object;
+        } else if (fle->object) {
+                flo = fle->object;
+                flo->ops->delete(flo);
+                fle->object = NULL;
        }
 nocache:
-        {
+        flo = NULL;
-                int err;
+        if (fle) {
-                void *obj;
+                flo = fle->object;
-                atomic_t *obj_ref;
+                fle->object = NULL;
-                err = resolver(net, key, family, dir, &obj, &obj_ref);
-                if (fle && !err) {
-                        fle->genid = atomic_read(&flow_cache_genid);
-                        if (fle->object)
-                                atomic_dec(fle->object_ref);
-                        fle->object = obj;
-                        fle->object_ref = obj_ref;
-                        if (obj)
-                                atomic_inc(fle->object_ref);
-                }
-                local_bh_enable();
-                if (err)
-                        obj = ERR_PTR(err);
-                return obj;
        }
+        flo = resolver(net, key, family, dir, flo, ctx);
+        if (fle) {
+                fle->genid = atomic_read(&flow_cache_genid);
+                if (!IS_ERR(flo))
+                        fle->object = flo;
+                else
+                        fle->genid--;
+        } else {
+                if (flo && !IS_ERR(flo))
+                        flo->ops->delete(flo);
+        }
+ret_object:
+        local_bh_enable();
+        return flo;
 }
 static void flow_cache_flush_tasklet(unsigned long data)
 {
        struct flow_flush_info *info = (void *)data;
-        int i;
+        struct flow_cache *fc = info->cache;
-        int cpu;
+        struct flow_cache_percpu *fcp;
+        struct flow_cache_entry *fle;
-        cpu = smp_processor_id();
+        struct hlist_node *entry, *tmp;
-        for (i = 0; i < flow_hash_size; i++) {
+        LIST_HEAD(gc_list);
-                struct flow_cache_entry *fle;
+        int i, deleted = 0;
-                fle = flow_table(cpu)[i];
+        fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
-                for (; fle; fle = fle->next) {
+        for (i = 0; i < flow_cache_hash_size(fc); i++) {
-                        unsigned genid = atomic_read(&flow_cache_genid);
+                hlist_for_each_entry_safe(fle, entry, tmp,
+                                          &fcp->hash_table[i], u.hlist) {
-                        if (!fle->object || fle->genid == genid)
+                        if (flow_entry_valid(fle))
                                continue;
-                        fle->object = NULL;
+                        deleted++;
-                        atomic_dec(fle->object_ref);
+                        hlist_del(&fle->u.hlist);
+                        list_add_tail(&fle->u.gc_list, &gc_list);
                }
        }
+        flow_cache_queue_garbage(fcp, deleted, &gc_list);
        if (atomic_dec_and_test(&info->cpuleft))
                complete(&info->completion);
 }
-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
 static void flow_cache_flush_per_cpu(void *data)
 {
        struct flow_flush_info *info = data;
@@ -280,8 +328,7 @@ static void flow_cache_flush_per_cpu(void *data)
        struct tasklet_struct *tasklet;
        cpu = smp_processor_id();
+        tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
-        tasklet = flow_flush_tasklet(cpu);
        tasklet->data = (unsigned long)info;
        tasklet_schedule(tasklet);
 }
@@ -294,6 +341,7 @@ void flow_cache_flush(void)
        /* Don't want cpus going down or up during this. */
        get_online_cpus();
        mutex_lock(&flow_flush_sem);
+        info.cache = &flow_cache_global;
        atomic_set(&info.cpuleft, num_online_cpus());
        init_completion(&info.completion);
@@ -307,62 +355,75 @@ void flow_cache_flush(void)
        put_online_cpus();
 }
-static void __init flow_cache_cpu_prepare(int cpu)
+static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
+                                          struct flow_cache_percpu *fcp)
 {
-        struct tasklet_struct *tasklet;
+        fcp->hash_table = (struct hlist_head *)
-        unsigned long order;
+                __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
+        if (!fcp->hash_table)
-        for (order = 0;
+                panic("NET: failed to allocate flow cache order %lu\n", fc->order);
-             (PAGE_SIZE << order) <
-                     (sizeof(struct flow_cache_entry *)*flow_hash_size);
+        fcp->hash_rnd_recalc = 1;
-             order++)
+        fcp->hash_count = 0;
-                /* NOTHING */;
+        tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
-        flow_table(cpu) = (struct flow_cache_entry **)
-                __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
-        if (!flow_table(cpu))
-                panic("NET: failed to allocate flow cache order %lu\n", order);
-        flow_hash_rnd_recalc(cpu) = 1;
-        flow_count(cpu) = 0;
-        tasklet = flow_flush_tasklet(cpu);
-        tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
 }
 static int flow_cache_cpu(struct notifier_block *nfb,
                          unsigned long action,
                          void *hcpu)
 {
+        struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+        int cpu = (unsigned long) hcpu;
+        struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
        if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
-                __flow_cache_shrink((unsigned long)hcpu, 0);
+                __flow_cache_shrink(fc, fcp, 0);
        return NOTIFY_OK;
 }
-static int __init flow_cache_init(void)
+static int flow_cache_init(struct flow_cache *fc)
 {
+        unsigned long order;
        int i;
-        flow_cachep = kmem_cache_create("flow_cache",
+        fc->hash_shift = 10;
-                                        sizeof(struct flow_cache_entry),
+        fc->low_watermark = 2 * flow_cache_hash_size(fc);
-                                        0, SLAB_PANIC,
+        fc->high_watermark = 4 * flow_cache_hash_size(fc);
-                                        NULL);
-        flow_hash_shift = 10;
+        for (order = 0;
-        flow_lwm = 2 * flow_hash_size;
+             (PAGE_SIZE << order) <
-        flow_hwm = 4 * flow_hash_size;
+                     (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
+             order++)
+                /* NOTHING */;
+        fc->order = order;
+        fc->percpu = alloc_percpu(struct flow_cache_percpu);
-        setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
+        setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
-        flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+                    (unsigned long) fc);
-        add_timer(&flow_hash_rnd_timer);
+        fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+        add_timer(&fc->rnd_timer);
        for_each_possible_cpu(i)
-                flow_cache_cpu_prepare(i);
+                flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
+        fc->hotcpu_notifier = (struct notifier_block){
+                .notifier_call = flow_cache_cpu,
+        };
+        register_hotcpu_notifier(&fc->hotcpu_notifier);
-        hotcpu_notifier(flow_cache_cpu, 0);
        return 0;
 }
-module_init(flow_cache_init);
+static int __init flow_cache_init_global(void)
+{
+        flow_cachep = kmem_cache_create("flow_cache",
+                                        sizeof(struct flow_cache_entry),
+                                        0, SLAB_PANIC, NULL);
+        return flow_cache_init(&flow_cache_global);
+}
+module_init(flow_cache_init_global);
 EXPORT_SYMBOL(flow_cache_genid);
 EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 59cfc7d8fc45..96ed6905b823 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -467,6 +467,217 @@ static struct attribute_group wireless_group = {
 };
 #endif
+#ifdef CONFIG_RPS
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+        struct attribute attr;
+        ssize_t (*show)(struct netdev_rx_queue *queue,
+            struct rx_queue_attribute *attr, char *buf);
+        ssize_t (*store)(struct netdev_rx_queue *queue,
+            struct rx_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_rx_queue_attr(_attr) container_of(_attr,             \
+    struct rx_queue_attribute, attr)
+#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
+static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
+                                  char *buf)
+{
+        struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+        struct netdev_rx_queue *queue = to_rx_queue(kobj);
+        if (!attribute->show)
+                return -EIO;
+        return attribute->show(queue, attribute, buf);
+}
+static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
+                                   const char *buf, size_t count)
+{
+        struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+        struct netdev_rx_queue *queue = to_rx_queue(kobj);
+        if (!attribute->store)
+                return -EIO;
+        return attribute->store(queue, attribute, buf, count);
+}
+static struct sysfs_ops rx_queue_sysfs_ops = {
+        .show = rx_queue_attr_show,
+        .store = rx_queue_attr_store,
+};
+static ssize_t show_rps_map(struct netdev_rx_queue *queue,
+                            struct rx_queue_attribute *attribute, char *buf)
+{
+        struct rps_map *map;
+        cpumask_var_t mask;
+        size_t len = 0;
+        int i;
+        if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+                return -ENOMEM;
+        rcu_read_lock();
+        map = rcu_dereference(queue->rps_map);
+        if (map)
+                for (i = 0; i < map->len; i++)
+                        cpumask_set_cpu(map->cpus[i], mask);
+        len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+        if (PAGE_SIZE - len < 3) {
+                rcu_read_unlock();
+                free_cpumask_var(mask);
+                return -EINVAL;
+        }
+        rcu_read_unlock();
+        free_cpumask_var(mask);
+        len += sprintf(buf + len, "\n");
+        return len;
+}
+static void rps_map_release(struct rcu_head *rcu)
+{
+        struct rps_map *map = container_of(rcu, struct rps_map, rcu);
+        kfree(map);
+}
+ssize_t store_rps_map(struct netdev_rx_queue *queue,
+                      struct rx_queue_attribute *attribute,
+                      const char *buf, size_t len)
+{
+        struct rps_map *old_map, *map;
+        cpumask_var_t mask;
+        int err, cpu, i;
+        static DEFINE_SPINLOCK(rps_map_lock);
+        if (!capable(CAP_NET_ADMIN))
+                return -EPERM;
+        if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+                return -ENOMEM;
+        err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+        if (err) {
+                free_cpumask_var(mask);
+                return err;
+        }
+        map = kzalloc(max_t(unsigned,
+            RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+            GFP_KERNEL);
+        if (!map) {
+                free_cpumask_var(mask);
+                return -ENOMEM;
+        }
+        i = 0;
+        for_each_cpu_and(cpu, mask, cpu_online_mask)
+                map->cpus[i++] = cpu;
+        if (i)
+                map->len = i;
+        else {
+                kfree(map);
+                map = NULL;
+        }
+        spin_lock(&rps_map_lock);
+        old_map = queue->rps_map;
+        rcu_assign_pointer(queue->rps_map, map);
+        spin_unlock(&rps_map_lock);
+        if (old_map)
+                call_rcu(&old_map->rcu, rps_map_release);
+        free_cpumask_var(mask);
+        return len;
+}
+static struct rx_queue_attribute rps_cpus_attribute =
+        __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+static struct attribute *rx_queue_default_attrs[] = {
+        &rps_cpus_attribute.attr,
+        NULL
+};
+static void rx_queue_release(struct kobject *kobj)
+{
+        struct netdev_rx_queue *queue = to_rx_queue(kobj);
+        struct rps_map *map = queue->rps_map;
+        struct netdev_rx_queue *first = queue->first;
+        if (map)
+                call_rcu(&map->rcu, rps_map_release);
+        if (atomic_dec_and_test(&first->count))
+                kfree(first);
+}
+static struct kobj_type rx_queue_ktype = {
+        .sysfs_ops = &rx_queue_sysfs_ops,
+        .release = rx_queue_release,
+        .default_attrs = rx_queue_default_attrs,
+};
+static int rx_queue_add_kobject(struct net_device *net, int index)
+{
+        struct netdev_rx_queue *queue = net->_rx + index;
+        struct kobject *kobj = &queue->kobj;
+        int error = 0;
+        kobj->kset = net->queues_kset;
+        error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+            "rx-%u", index);
+        if (error) {
+                kobject_put(kobj);
+                return error;
+        }
+        kobject_uevent(kobj, KOBJ_ADD);
+        return error;
+}
+static int rx_queue_register_kobjects(struct net_device *net)
+{
+        int i;
+        int error = 0;
+        net->queues_kset = kset_create_and_add("queues",
+            NULL, &net->dev.kobj);
+        if (!net->queues_kset)
+                return -ENOMEM;
+        for (i = 0; i < net->num_rx_queues; i++) {
+                error = rx_queue_add_kobject(net, i);
+                if (error)
+                        break;
+        }
+        if (error)
+                while (--i >= 0)
+                        kobject_put(&net->_rx[i].kobj);
+        return error;
+}
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+        int i;
+        for (i = 0; i < net->num_rx_queues; i++)
+                kobject_put(&net->_rx[i].kobj);
+        kset_unregister(net->queues_kset);
+}
+#endif /* CONFIG_RPS */
 #endif /* CONFIG_SYSFS */
 #ifdef CONFIG_HOTPLUG
@@ -530,6 +741,10 @@ void netdev_unregister_kobject(struct net_device * net)
        if (!net_eq(dev_net(net), &init_net))
                return;
+#ifdef CONFIG_RPS
+        rx_queue_remove_kobjects(net);
+#endif
        device_del(dev);
 }
@@ -538,6 +753,7 @@ int netdev_register_kobject(struct net_device *net)
 {
        struct device *dev = &(net->dev);
        const struct attribute_group **groups = net->sysfs_groups;
+        int error = 0;
        dev->class = &net_class;
        dev->platform_data = net;
@@ -564,7 +780,19 @@ int netdev_register_kobject(struct net_device *net)
        if (!net_eq(dev_net(net), &init_net))
                return 0;
-        return device_add(dev);
+        error = device_add(dev);
+        if (error)
+                return error;
+#ifdef CONFIG_RPS
+        error = rx_queue_register_kobjects(net);
+        if (error) {
+                device_del(dev);
+                return error;
+        }
+#endif
+        return error;
 }
 int netdev_class_create_file(struct class_attribute *class_attr)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 43923811bd6a..2ad68da418df 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -169,7 +169,7 @@
 #include <asm/dma.h>
 #include <asm/div64.h>          /* do_div */
-#define VERSION         "2.72"
+#define VERSION         "2.73"
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -190,6 +190,7 @@
 #define F_IPSEC_ON    (1<<12)   /* ipsec on for flows */
 #define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
 #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
+#define F_NODE          (1<<15) /* Node memory alloc*/
 /* Thread control flag bits */
 #define T_STOP        (1<<0)    /* Stop run */
@@ -372,6 +373,7 @@ struct pktgen_dev {
        u16 queue_map_min;
        u16 queue_map_max;
+        int node;               /* Memory node */
 #ifdef CONFIG_XFRM
        __u8    ipsmode;                /* IPSEC mode (config) */
@@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
        if (pkt_dev->traffic_class)
                seq_printf(seq, "     traffic_class: 0x%02x\n", pkt_dev->traffic_class);
+        if (pkt_dev->node >= 0)
+                seq_printf(seq, "     node: %d\n", pkt_dev->node);
        seq_printf(seq, "     Flags: ");
        if (pkt_dev->flags & F_IPV6)
@@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
        if (pkt_dev->flags & F_SVID_RND)
                seq_printf(seq, "SVID_RND  ");
+        if (pkt_dev->flags & F_NODE)
+                seq_printf(seq, "NODE_ALLOC  ");
        seq_puts(seq, "\n");
        /* not really stopped, more like last-running-at */
@@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file,
                        pkt_dev->dst_mac_count);
                return count;
        }
+        if (!strcmp(name, "node")) {
+                len = num_arg(&user_buffer[i], 10, &value);
+                if (len < 0)
+                        return len;
+                i += len;
+                if (node_possible(value)) {
+                        pkt_dev->node = value;
+                        sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+                }
+                else
+                        sprintf(pg_result, "ERROR: node not possible");
+                return count;
+        }
        if (!strcmp(name, "flag")) {
                char f[32];
                memset(f, 0, 32);
@@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file,
                else if (strcmp(f, "!IPV6") == 0)
                        pkt_dev->flags &= ~F_IPV6;
+                else if (strcmp(f, "NODE_ALLOC") == 0)
+                        pkt_dev->flags |= F_NODE;
+                else if (strcmp(f, "!NODE_ALLOC") == 0)
+                        pkt_dev->flags &= ~F_NODE;
                else {
                        sprintf(pg_result,
                                "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
                                f,
                                "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
-                                "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n");
+                                "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
                        return count;
                }
                sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
        mod_cur_headers(pkt_dev);
        datalen = (odev->hard_header_len + 16) & ~0xf;
-        skb = __netdev_alloc_skb(odev,
-                                 pkt_dev->cur_pkt_size + 64
+        if (pkt_dev->flags & F_NODE) {
-                                 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+                int node;
+                if (pkt_dev->node >= 0)
+                        node = pkt_dev->node;
+                else
+                        node =  numa_node_id();
+                skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
+                                  + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
+                if (likely(skb)) {
+                        skb_reserve(skb, NET_SKB_PAD);
+                        skb->dev = odev;
+                }
+        }
+        else
+          skb = __netdev_alloc_skb(odev,
+                                   pkt_dev->cur_pkt_size + 64
+                                   + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
        if (!skb) {
                sprintf(pkt_dev->result, "No memory");
                return NULL;
@@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
        pkt_dev->svlan_p = 0;
        pkt_dev->svlan_cfi = 0;
        pkt_dev->svlan_id = 0xffff;
+        pkt_dev->node = -1;
        err = pktgen_setup_dev(pkt_dev, ifname);
        if (err)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4568120d8533..bf919b6acea2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -600,7 +600,41 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
        a->rx_compressed = b->rx_compressed;
        a->tx_compressed = b->tx_compressed;
-};
+}
+static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
+{
+        struct rtnl_link_stats64 a;
+        a.rx_packets = b->rx_packets;
+        a.tx_packets = b->tx_packets;
+        a.rx_bytes = b->rx_bytes;
+        a.tx_bytes = b->tx_bytes;
+        a.rx_errors = b->rx_errors;
+        a.tx_errors = b->tx_errors;
+        a.rx_dropped = b->rx_dropped;
+        a.tx_dropped = b->tx_dropped;
+        a.multicast = b->multicast;
+        a.collisions = b->collisions;
+        a.rx_length_errors = b->rx_length_errors;
+        a.rx_over_errors = b->rx_over_errors;
+        a.rx_crc_errors = b->rx_crc_errors;
+        a.rx_frame_errors = b->rx_frame_errors;
+        a.rx_fifo_errors = b->rx_fifo_errors;
+        a.rx_missed_errors = b->rx_missed_errors;
+        a.tx_aborted_errors = b->tx_aborted_errors;
+        a.tx_carrier_errors = b->tx_carrier_errors;
+        a.tx_fifo_errors = b->tx_fifo_errors;
+        a.tx_heartbeat_errors = b->tx_heartbeat_errors;
+        a.tx_window_errors = b->tx_window_errors;
+        a.rx_compressed = b->rx_compressed;
+        a.tx_compressed = b->tx_compressed;
+        memcpy(v, &a, sizeof(a));
+}
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
 {
@@ -619,6 +653,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
               + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
               + nla_total_size(sizeof(struct rtnl_link_ifmap))
               + nla_total_size(sizeof(struct rtnl_link_stats))
+               + nla_total_size(sizeof(struct rtnl_link_stats64))
               + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
               + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
               + nla_total_size(4) /* IFLA_TXQLEN */
@@ -698,6 +733,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
        stats = dev_get_stats(dev);
        copy_rtnl_link_stats(nla_data(attr), stats);
+        attr = nla_reserve(skb, IFLA_STATS64,
+                        sizeof(struct rtnl_link_stats64));
+        if (attr == NULL)
+                goto nla_put_failure;
+        copy_rtnl_link_stats64(nla_data(attr), stats);
        if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
                int i;
                struct ifla_vf_info ivi;
@@ -1473,6 +1514,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
        case NETDEV_POST_INIT:
        case NETDEV_REGISTER:
        case NETDEV_CHANGE:
+        case NETDEV_PRE_TYPE_CHANGE:
        case NETDEV_GOING_DOWN:
        case NETDEV_UNREGISTER:
        case NETDEV_UNREGISTER_BATCH:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e060c91e..bdea0efdf8cb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -534,6 +534,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
        new->network_header     = old->network_header;
        new->mac_header         = old->mac_header;
        skb_dst_set(new, dst_clone(skb_dst(old)));
+        new->rxhash             = old->rxhash;
 #ifdef CONFIG_XFRM
        new->sp                 = secpath_get(old->sp);
 #endif
@@ -581,6 +582,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
        C(len);
        C(data_len);
        C(mac_len);
+        C(rxhash);
        n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
        n->cloned = 1;
        n->nohdr = 0;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index bcd7632299f5..d3235899c7e3 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -208,7 +208,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
                goto restart_timer;
        }
-        ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
+        ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
                       ccid3_tx_state_name(hc->tx_state));
        if (hc->tx_state == TFRC_SSTATE_FBACK)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5ef32c2f0d6a..53f8e12d0c10 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -189,7 +189,7 @@ enum {
 #define DCCP_MIB_MAX    __DCCP_MIB_MAX
 struct dccp_mib {
        unsigned long   mibs[DCCP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
 #define DCCP_INC_STATS(field)       SNMP_INC_STATS(dccp_statistics, field)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 9ec717426024..58f7bc156850 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -415,7 +415,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
                if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
                               dp->dccps_awl, dp->dccps_awh)) {
                        dccp_pr_debug("invalid ackno: S.AWL=%llu, "
-                                      "P.ackno=%llu, S.AWH=%llu \n",
+                                      "P.ackno=%llu, S.AWH=%llu\n",
                                      (unsigned long long)dp->dccps_awl,
                           (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
                                      (unsigned long long)dp->dccps_awh);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index cead68eb254c..615dbe3b43f9 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -350,7 +350,7 @@ static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int de
        if (dn_db->dev->type == ARPHRD_ETHER) {
                if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) {
                        dn_dn2eth(mac_addr, ifa1->ifa_local);
-                        dev_mc_delete(dev, mac_addr, ETH_ALEN, 0);
+                        dev_mc_del(dev, mac_addr);
                }
        }
@@ -381,7 +381,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
        if (dev->type == ARPHRD_ETHER) {
                if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
                        dn_dn2eth(mac_addr, ifa->ifa_local);
-                        dev_mc_add(dev, mac_addr, ETH_ALEN, 0);
+                        dev_mc_add(dev, mac_addr);
                }
        }
@@ -1001,9 +1001,9 @@ static int dn_eth_up(struct net_device *dev)
        struct dn_dev *dn_db = dev->dn_ptr;
        if (dn_db->parms.forwarding == 0)
-                dev_mc_add(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+                dev_mc_add(dev, dn_rt_all_end_mcast);
        else
-                dev_mc_add(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+                dev_mc_add(dev, dn_rt_all_rt_mcast);
        dn_db->use_long = 1;
@@ -1015,9 +1015,9 @@ static void dn_eth_down(struct net_device *dev)
        struct dn_dev *dn_db = dev->dn_ptr;
        if (dn_db->parms.forwarding == 0)
-                dev_mc_delete(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+                dev_mc_del(dev, dn_rt_all_end_mcast);
        else
-                dev_mc_delete(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+                dev_mc_del(dev, dn_rt_all_rt_mcast);
 }
 static void dn_dev_set_timer(struct net_device *dev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2175e6d5cc8d..8fdca56bb08f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -67,7 +67,7 @@ static int dsa_slave_open(struct net_device *dev)
                return -ENETDOWN;
        if (compare_ether_addr(dev->dev_addr, master->dev_addr)) {
-                err = dev_unicast_add(master, dev->dev_addr);
+                err = dev_uc_add(master, dev->dev_addr);
                if (err < 0)
                        goto out;
        }
@@ -90,7 +90,7 @@ clear_allmulti:
                dev_set_allmulti(master, -1);
 del_unicast:
        if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-                dev_unicast_delete(master, dev->dev_addr);
+                dev_uc_del(master, dev->dev_addr);
 out:
        return err;
 }
@@ -101,14 +101,14 @@ static int dsa_slave_close(struct net_device *dev)
        struct net_device *master = p->parent->dst->master_netdev;
        dev_mc_unsync(master, dev);
-        dev_unicast_unsync(master, dev);
+        dev_uc_unsync(master, dev);
        if (dev->flags & IFF_ALLMULTI)
                dev_set_allmulti(master, -1);
        if (dev->flags & IFF_PROMISC)
                dev_set_promiscuity(master, -1);
        if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-                dev_unicast_delete(master, dev->dev_addr);
+                dev_uc_del(master, dev->dev_addr);
        return 0;
 }
@@ -130,7 +130,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
        struct net_device *master = p->parent->dst->master_netdev;
        dev_mc_sync(master, dev);
-        dev_unicast_sync(master, dev);
+        dev_uc_sync(master, dev);
 }
 static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
@@ -147,13 +147,13 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
                goto out;
        if (compare_ether_addr(addr->sa_data, master->dev_addr)) {
-                err = dev_unicast_add(master, addr->sa_data);
+                err = dev_uc_add(master, addr->sa_data);
                if (err < 0)
                        return err;
        }
        if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-                dev_unicast_delete(master, dev->dev_addr);
+                dev_uc_del(master, dev->dev_addr);
 out:
        memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 0c94a1ac2946..c9a1c68767ff 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -587,9 +587,15 @@ choice
        config DEFAULT_HTCP
                bool "Htcp" if TCP_CONG_HTCP=y
+        config DEFAULT_HYBLA
+                bool "Hybla" if TCP_CONG_HYBLA=y
        config DEFAULT_VEGAS
                bool "Vegas" if TCP_CONG_VEGAS=y
+        config DEFAULT_VENO
+                bool "Veno" if TCP_CONG_VENO=y
        config DEFAULT_WESTWOOD
                bool "Westwood" if TCP_CONG_WESTWOOD=y
@@ -610,8 +616,10 @@ config DEFAULT_TCP_CONG
        default "bic" if DEFAULT_BIC
        default "cubic" if DEFAULT_CUBIC
        default "htcp" if DEFAULT_HTCP
+        default "hybla" if DEFAULT_HYBLA
        default "vegas" if DEFAULT_VEGAS
        default "westwood" if DEFAULT_WESTWOOD
+        default "veno" if DEFAULT_VENO
        default "reno" if DEFAULT_RENO
        default "cubic"
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f71357422380..a0beb32beaa3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1407,10 +1407,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field);
 int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
 {
        BUG_ON(ptr == NULL);
-        ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+        ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
        if (!ptr[0])
                goto err0;
-        ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+        ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
        if (!ptr[1])
                goto err1;
        return 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 90e3d6379a42..382bc768ed56 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1096,10 +1096,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
        case NETDEV_DOWN:
                ip_mc_down(in_dev);
                break;
-        case NETDEV_BONDING_OLDTYPE:
+        case NETDEV_PRE_TYPE_CHANGE:
                ip_mc_unmap(in_dev);
                break;
-        case NETDEV_BONDING_NEWTYPE:
+        case NETDEV_POST_TYPE_CHANGE:
                ip_mc_remap(in_dev);
                break;
        case NETDEV_CHANGEMTU:
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ac4dec132735..f3d339f728b0 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -331,9 +331,10 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
        if (ip_append_data(sk, icmp_glue_bits, icmp_param,
                           icmp_param->data_len+icmp_param->head_len,
                           icmp_param->head_len,
-                           ipc, rt, MSG_DONTWAIT) < 0)
+                           ipc, rt, MSG_DONTWAIT) < 0) {
+                ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
                ip_flush_pending_frames(sk);
-        else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
+        } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
                struct icmphdr *icmph = icmp_hdr(skb);
                __wsum csum = 0;
                struct sk_buff *skb1;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 15d3eeda92f5..5fff865a4fa7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -998,7 +998,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
           --ANK
           */
        if (arp_mc_map(addr, buf, dev, 0) == 0)
-                dev_mc_add(dev, buf, dev->addr_len, 0);
+                dev_mc_add(dev, buf);
 }
 /*
@@ -1011,7 +1011,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
        struct net_device *dev = in_dev->dev;
        if (arp_mc_map(addr, buf, dev, 0) == 0)
-                dev_mc_delete(dev, buf, dev->addr_len, 0);
+                dev_mc_del(dev, buf);
 }
 #ifdef CONFIG_IP_MULTICAST
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1e64dabbd232..b0aa0546a3b3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -287,12 +287,8 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
                   __be16 port, u32 info, u8 *payload)
 {
-        struct inet_sock *inet = inet_sk(sk);
        struct sock_exterr_skb *serr;
-        if (!inet->recverr)
-                return;
        skb = skb_clone(skb, GFP_ATOMIC);
        if (!skb)
                return;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 067ce9e043dc..b9d84e800cf4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -976,7 +976,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
        /* Is it a reply for the device we are configuring? */
        if (b->xid != ic_dev_xid) {
                if (net_ratelimit())
-                        printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n");
+                        printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet\n");
                goto drop_unlock;
        }
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index ab828400ed71..a992dc826f1c 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -88,7 +88,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
                list_del(&c->list);
                write_unlock_bh(&clusterip_lock);
-                dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
+                dev_mc_del(c->dev, c->clustermac);
                dev_put(c->dev);
                /* In case anyone still accesses the file, the open/close
@@ -397,7 +397,7 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
                                dev_put(dev);
                                return false;
                        }
-                        dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
+                        dev_mc_add(config->dev, config->clustermac);
                }
        }
        cipinfo->config = config;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4f1f337f4337..3dc9914c1dce 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -251,6 +251,7 @@ static const struct snmp_mib snmp4_net_list[] = {
        SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
        SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
        SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
+        SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
        SNMP_MIB_SENTINEL
 };
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f240f57b2199..4000b10610b7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4319,7 +4319,7 @@ static void tcp_ofo_queue(struct sock *sk)
                }
                if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
-                        SOCK_DEBUG(sk, "ofo packet was already received \n");
+                        SOCK_DEBUG(sk, "ofo packet was already received\n");
                        __skb_unlink(skb, &tp->out_of_order_queue);
                        __kfree_skb(skb);
                        continue;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 5fabff9ac6d6..794c2e122a41 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -672,6 +672,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
        if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
            TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
                inet_rsk(req)->acked = 1;
+                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
                return NULL;
        }
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e4a1483fba77..1705476670ef 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net,
        return 0;
 }
-static struct dst_entry *
-__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
-{
-        struct dst_entry *dst;
-        read_lock_bh(&policy->lock);
-        for (dst = policy->bundles; dst; dst = dst->next) {
-                struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-                if (xdst->u.rt.fl.oif == fl->oif &&     /*XXX*/
-                    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
-                    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
-                    xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
-                    xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
-                        dst_clone(dst);
-                        break;
-                }
-        }
-        read_unlock_bh(&policy->lock);
-        return dst;
-}
 static int xfrm4_get_tos(struct flowi *fl)
 {
        return fl->fl4_tos;
@@ -259,7 +238,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
        .dst_ops =              &xfrm4_dst_ops,
        .dst_lookup =           xfrm4_dst_lookup,
        .get_saddr =            xfrm4_get_saddr,
-        .find_bundle =          __xfrm4_find_bundle,
        .decode_session =       _decode_session4,
        .get_tos =              xfrm4_get_tos,
        .init_path =            xfrm4_init_path,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 413054f02aab..1b00bfef268e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -82,7 +82,7 @@
 #include <linux/random.h>
 #endif
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/proc_fs.h>
@@ -98,7 +98,11 @@
 #endif
 #define INFINITY_LIFE_TIME      0xFFFFFFFF
-#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
+#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
+#define ADDRCONF_TIMER_FUZZ_MINUS       (HZ > 50 ? HZ/50 : 1)
+#define ADDRCONF_TIMER_FUZZ             (HZ / 4)
+#define ADDRCONF_TIMER_FUZZ_MAX         (HZ)
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
@@ -127,8 +131,8 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
 /*
 *      Configured unicast address hash table
 */
-static struct inet6_ifaddr              *inet6_addr_lst[IN6_ADDR_HSIZE];
+static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_RWLOCK(addrconf_hash_lock);
+static DEFINE_SPINLOCK(addrconf_hash_lock);
 static void addrconf_verify(unsigned long);
@@ -138,8 +142,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock);
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
-static void addrconf_bonding_change(struct net_device *dev,
+static void addrconf_type_change(struct net_device *dev,
-                                    unsigned long event);
+                                 unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
@@ -152,8 +156,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 static void inet6_prefix_notify(int event, struct inet6_dev *idev,
                                struct prefix_info *pinfo);
-static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
-                              struct net_device *dev);
+                               struct net_device *dev);
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
@@ -250,8 +254,7 @@ static void addrconf_del_timer(struct inet6_ifaddr *ifp)
                __in6_ifa_put(ifp);
 }
-enum addrconf_timer_t
+enum addrconf_timer_t {
-{
        AC_NONE,
        AC_DAD,
        AC_RS,
@@ -271,7 +274,8 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
        case AC_RS:
                ifp->timer.function = addrconf_rs_timer;
                break;
-        default:;
+        default:
+                break;
        }
        ifp->timer.expires = jiffies + when;
        add_timer(&ifp->timer);
@@ -318,7 +322,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 {
        struct net_device *dev = idev->dev;
-        WARN_ON(idev->addr_list != NULL);
+        WARN_ON(!list_empty(&idev->addr_list));
        WARN_ON(idev->mc_list != NULL);
 #ifdef NET_REFCNT_DEBUG
@@ -326,7 +330,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 #endif
        dev_put(dev);
        if (!idev->dead) {
-                printk("Freeing alive inet6 device %p\n", idev);
+                pr_warning("Freeing alive inet6 device %p\n", idev);
                return;
        }
        snmp6_free_dev(idev);
@@ -351,6 +355,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
        rwlock_init(&ndev->lock);
        ndev->dev = dev;
+        INIT_LIST_HEAD(&ndev->addr_list);
        memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
        ndev->cnf.mtu6 = dev->mtu;
        ndev->cnf.sysctl = NULL;
@@ -402,6 +408,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 #endif
 #ifdef CONFIG_IPV6_PRIVACY
+        INIT_LIST_HEAD(&ndev->tempaddr_list);
        setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
        if ((dev->flags&IFF_LOOPBACK) ||
            dev->type == ARPHRD_TUNNEL ||
@@ -439,8 +446,10 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
        ASSERT_RTNL();
-        if ((idev = __in6_dev_get(dev)) == NULL) {
+        idev = __in6_dev_get(dev);
-                if ((idev = ipv6_add_dev(dev)) == NULL)
+        if (!idev) {
+                idev = ipv6_add_dev(dev);
+                if (!idev)
                        return NULL;
        }
@@ -466,7 +475,8 @@ static void dev_forward_change(struct inet6_dev *idev)
                else
                        ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
        }
-        for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+        list_for_each_entry(ifa, &idev->addr_list, if_list) {
                if (ifa->flags&IFA_F_TENTATIVE)
                        continue;
                if (idev->cnf.forwarding)
@@ -523,12 +533,16 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 }
 #endif
-/* Nobody refers to this ifaddr, destroy it */
+static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
+{
+        struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
+        kfree(ifp);
+}
+/* Nobody refers to this ifaddr, destroy it */
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
-        WARN_ON(ifp->if_next != NULL);
+        WARN_ON(!hlist_unhashed(&ifp->addr_lst));
-        WARN_ON(ifp->lst_next != NULL);
 #ifdef NET_REFCNT_DEBUG
        printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
@@ -537,54 +551,45 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
        in6_dev_put(ifp->idev);
        if (del_timer(&ifp->timer))
-                printk("Timer is still running, when freeing ifa=%p\n", ifp);
+                pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
        if (!ifp->dead) {
-                printk("Freeing alive inet6 address %p\n", ifp);
+                pr_warning("Freeing alive inet6 address %p\n", ifp);
                return;
        }
        dst_release(&ifp->rt->u.dst);
-        kfree(ifp);
+        call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
 }
 static void
 ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 {
-        struct inet6_ifaddr *ifa, **ifap;
+        struct list_head *p;
        int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
        /*
         * Each device address list is sorted in order of scope -
         * global before linklocal.
         */
-        for (ifap = &idev->addr_list; (ifa = *ifap) != NULL;
+        list_for_each(p, &idev->addr_list) {
-             ifap = &ifa->if_next) {
+                struct inet6_ifaddr *ifa
+                        = list_entry(p, struct inet6_ifaddr, if_list);
                if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
                        break;
        }
-        ifp->if_next = *ifap;
+        list_add_tail(&ifp->if_list, p);
-        *ifap = ifp;
 }
-/*
+static u32 ipv6_addr_hash(const struct in6_addr *addr)
- *      Hash function taken from net_alias.c
- */
-static u8 ipv6_addr_hash(const struct in6_addr *addr)
 {
-        __u32 word;
        /*
         * We perform the hash function over the last 64 bits of the address
         * This will include the IEEE address token on links that support it.
         */
+        return jhash_2words(addr->s6_addr32[2],  addr->s6_addr32[3], 0)
-        word = (__force u32)(addr->s6_addr32[2] ^ addr->s6_addr32[3]);
+                & (IN6_ADDR_HSIZE - 1);
-        word ^= (word >> 16);
-        word ^= (word >> 8);
-        return ((word ^ (word >> 4)) & 0x0f);
 }
 /* On success it returns ifp with increased reference count */
@@ -595,7 +600,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 {
        struct inet6_ifaddr *ifa = NULL;
        struct rt6_info *rt;
-        int hash;
+        unsigned int hash;
        int err = 0;
        int addr_type = ipv6_addr_type(addr);
@@ -616,7 +621,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
                goto out2;
        }
-        write_lock(&addrconf_hash_lock);
+        spin_lock(&addrconf_hash_lock);
        /* Ignore adding duplicate addresses on an interface */
        if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
@@ -643,6 +648,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
        spin_lock_init(&ifa->lock);
        init_timer(&ifa->timer);
+        INIT_HLIST_NODE(&ifa->addr_lst);
        ifa->timer.data = (unsigned long) ifa;
        ifa->scope = scope;
        ifa->prefix_len = pfxlen;
@@ -669,10 +675,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
        /* Add to big hash table */
        hash = ipv6_addr_hash(addr);
-        ifa->lst_next = inet6_addr_lst[hash];
+        hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
-        inet6_addr_lst[hash] = ifa;
        in6_ifa_hold(ifa);
-        write_unlock(&addrconf_hash_lock);
+        spin_unlock(&addrconf_hash_lock);
        write_lock(&idev->lock);
        /* Add to inet6_dev unicast addr list. */
@@ -680,8 +685,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 #ifdef CONFIG_IPV6_PRIVACY
        if (ifa->flags&IFA_F_TEMPORARY) {
-                ifa->tmp_next = idev->tempaddr_list;
+                list_add(&ifa->tmp_list, &idev->tempaddr_list);
-                idev->tempaddr_list = ifa;
                in6_ifa_hold(ifa);
        }
 #endif
@@ -700,7 +704,7 @@ out2:
        return ifa;
 out:
-        write_unlock(&addrconf_hash_lock);
+        spin_unlock(&addrconf_hash_lock);
        goto out2;
 }
@@ -708,7 +712,7 @@ out:
 static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 {
-        struct inet6_ifaddr *ifa, **ifap;
+        struct inet6_ifaddr *ifa, *ifn;
        struct inet6_dev *idev = ifp->idev;
        int hash;
        int deleted = 0, onlink = 0;
@@ -718,42 +722,28 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
        ifp->dead = 1;
-        write_lock_bh(&addrconf_hash_lock);
+        spin_lock_bh(&addrconf_hash_lock);
-        for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
+        hlist_del_init_rcu(&ifp->addr_lst);
-             ifap = &ifa->lst_next) {
+        __in6_ifa_put(ifp);
-                if (ifa == ifp) {
+        spin_unlock_bh(&addrconf_hash_lock);
-                        *ifap = ifa->lst_next;
-                        __in6_ifa_put(ifp);
-                        ifa->lst_next = NULL;
-                        break;
-                }
-        }
-        write_unlock_bh(&addrconf_hash_lock);
        write_lock_bh(&idev->lock);
 #ifdef CONFIG_IPV6_PRIVACY
        if (ifp->flags&IFA_F_TEMPORARY) {
-                for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
+                list_del(&ifp->tmp_list);
-                     ifap = &ifa->tmp_next) {
+                if (ifp->ifpub) {
-                        if (ifa == ifp) {
+                        in6_ifa_put(ifp->ifpub);
-                                *ifap = ifa->tmp_next;
+                        ifp->ifpub = NULL;
-                                if (ifp->ifpub) {
-                                        in6_ifa_put(ifp->ifpub);
-                                        ifp->ifpub = NULL;
-                                }
-                                __in6_ifa_put(ifp);
-                                ifa->tmp_next = NULL;
-                                break;
-                        }
                }
+                __in6_ifa_put(ifp);
        }
 #endif
-        for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
+        list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
                if (ifa == ifp) {
-                        *ifap = ifa->if_next;
+                        list_del_init(&ifp->if_list);
                        __in6_ifa_put(ifp);
-                        ifa->if_next = NULL;
                        if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
                                break;
                        deleted = 1;
@@ -786,7 +776,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
                                }
                        }
                }
-                ifap = &ifa->if_next;
        }
        write_unlock_bh(&idev->lock);
@@ -1165,7 +1154,7 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
                        continue;
                read_lock_bh(&idev->lock);
-                for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) {
+                list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
                        int i;
                        /*
@@ -1243,7 +1232,6 @@ try_nextdev:
        in6_ifa_put(hiscore->ifa);
        return 0;
 }
 EXPORT_SYMBOL(ipv6_dev_get_saddr);
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
@@ -1253,12 +1241,14 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
        int err = -EADDRNOTAVAIL;
        rcu_read_lock();
-        if ((idev = __in6_dev_get(dev)) != NULL) {
+        idev = __in6_dev_get(dev);
+        if (idev) {
                struct inet6_ifaddr *ifp;
                read_lock_bh(&idev->lock);
-                for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+                list_for_each_entry(ifp, &idev->addr_list, if_list) {
-                        if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
+                        if (ifp->scope == IFA_LINK &&
+                            !(ifp->flags & banned_flags)) {
                                ipv6_addr_copy(addr, &ifp->addr);
                                err = 0;
                                break;
@@ -1276,7 +1266,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
        struct inet6_ifaddr *ifp;
        read_lock_bh(&idev->lock);
-        for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
+        list_for_each_entry(ifp, &idev->addr_list, if_list)
                cnt++;
        read_unlock_bh(&idev->lock);
        return cnt;
@@ -1285,11 +1275,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
                  struct net_device *dev, int strict)
 {
-        struct inet6_ifaddr * ifp;
+        struct inet6_ifaddr *ifp = NULL;
-        u8 hash = ipv6_addr_hash(addr);
+        struct hlist_node *node;
+        unsigned int hash = ipv6_addr_hash(addr);
-        read_lock_bh(&addrconf_hash_lock);
+        rcu_read_lock_bh();
-        for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+        hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
                if (!net_eq(dev_net(ifp->idev->dev), net))
                        continue;
                if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1299,27 +1290,28 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
                                break;
                }
        }
-        read_unlock_bh(&addrconf_hash_lock);
+        rcu_read_unlock_bh();
        return ifp != NULL;
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
-static
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
-int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+                               struct net_device *dev)
-                       struct net_device *dev)
 {
-        struct inet6_ifaddr * ifp;
+        unsigned int hash = ipv6_addr_hash(addr);
-        u8 hash = ipv6_addr_hash(addr);
+        struct inet6_ifaddr *ifp;
+        struct hlist_node *node;
-        for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+        hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
                if (!net_eq(dev_net(ifp->idev->dev), net))
                        continue;
                if (ipv6_addr_equal(&ifp->addr, addr)) {
                        if (dev == NULL || ifp->idev->dev == dev)
-                                break;
+                                return true;
                }
        }
-        return ifp != NULL;
+        return false;
 }
 int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
@@ -1333,7 +1325,7 @@ int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
        idev = __in6_dev_get(dev);
        if (idev) {
                read_lock_bh(&idev->lock);
-                for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
+                list_for_each_entry(ifa, &idev->addr_list, if_list) {
                        onlink = ipv6_prefix_equal(addr, &ifa->addr,
                                                   ifa->prefix_len);
                        if (onlink)
@@ -1350,24 +1342,26 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
                                     struct net_device *dev, int strict)
 {
-        struct inet6_ifaddr * ifp;
+        struct inet6_ifaddr *ifp, *result = NULL;
-        u8 hash = ipv6_addr_hash(addr);
+        unsigned int hash = ipv6_addr_hash(addr);
+        struct hlist_node *node;
-        read_lock_bh(&addrconf_hash_lock);
+        rcu_read_lock_bh();
-        for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+        hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
                if (!net_eq(dev_net(ifp->idev->dev), net))
                        continue;
                if (ipv6_addr_equal(&ifp->addr, addr)) {
                        if (dev == NULL || ifp->idev->dev == dev ||
                            !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+                                result = ifp;
                                in6_ifa_hold(ifp);
                                break;
                        }
                }
        }
-        read_unlock_bh(&addrconf_hash_lock);
+        rcu_read_unlock_bh();
-        return ifp;
+        return result;
 }
 /* Gets referenced address, destroys ifaddr */
@@ -1570,7 +1564,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
        struct inet6_ifaddr *ifp;
        read_lock_bh(&idev->lock);
-        for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+        list_for_each_entry(ifp, &idev->addr_list, if_list) {
                if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
                        memcpy(eui, ifp->addr.s6_addr+8, 8);
                        err = 0;
@@ -1738,7 +1732,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
        ASSERT_RTNL();
-        if ((idev = ipv6_find_idev(dev)) == NULL)
+        idev = ipv6_find_idev(dev);
+        if (!idev)
                return NULL;
        /* Add default multicast route */
@@ -1971,7 +1966,7 @@ ok:
 #ifdef CONFIG_IPV6_PRIVACY
                        read_lock_bh(&in6_dev->lock);
                        /* update all temporary addresses in the list */
-                        for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
+                        list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) {
                                /*
                                 * When adjusting the lifetimes of an existing
                                 * temporary address, only lower the lifetimes.
@@ -2174,7 +2169,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
                return -ENXIO;
        read_lock_bh(&idev->lock);
-        for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
+        list_for_each_entry(ifp, &idev->addr_list, if_list) {
                if (ifp->prefix_len == plen &&
                    ipv6_addr_equal(pfx, &ifp->addr)) {
                        in6_ifa_hold(ifp);
@@ -2185,7 +2180,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
                        /* If the last address is deleted administratively,
                           disable IPv6 on this interface.
                         */
-                        if (idev->addr_list == NULL)
+                        if (list_empty(&idev->addr_list))
                                addrconf_ifdown(idev->dev, 1);
                        return 0;
                }
@@ -2446,7 +2441,8 @@ static void addrconf_ip6_tnl_config(struct net_device *dev)
        ASSERT_RTNL();
-        if ((idev = addrconf_add_dev(dev)) == NULL) {
+        idev = addrconf_add_dev(dev);
+        if (!idev) {
                printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
                return;
        }
@@ -2461,7 +2457,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
        int run_pending = 0;
        int err;
-        switch(event) {
+        switch (event) {
        case NETDEV_REGISTER:
                if (!idev && dev->mtu >= IPV6_MIN_MTU) {
                        idev = ipv6_add_dev(dev);
@@ -2469,6 +2465,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
                                return notifier_from_errno(-ENOMEM);
                }
                break;
        case NETDEV_UP:
        case NETDEV_CHANGE:
                if (dev->flags & IFF_SLAVE)
@@ -2498,10 +2495,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
                        }
                        if (idev) {
-                                if (idev->if_flags & IF_READY) {
+                                if (idev->if_flags & IF_READY)
                                        /* device is already configured. */
                                        break;
-                                }
                                idev->if_flags |= IF_READY;
                        }
@@ -2513,7 +2509,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
                        run_pending = 1;
                }
-                switch(dev->type) {
+                switch (dev->type) {
 #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
                case ARPHRD_SIT:
                        addrconf_sit_config(dev);
@@ -2530,25 +2526,30 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
                        addrconf_dev_config(dev);
                        break;
                }
                if (idev) {
                        if (run_pending)
                                addrconf_dad_run(idev);
-                        /* If the MTU changed during the interface down, when the
+                        /*
-                           interface up, the changed MTU must be reflected in the
+                         * If the MTU changed during the interface down,
-                           idev as well as routers.
+                         * when the interface up, the changed MTU must be
+                         * reflected in the idev as well as routers.
                         */
-                        if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
+                        if (idev->cnf.mtu6 != dev->mtu &&
+                            dev->mtu >= IPV6_MIN_MTU) {
                                rt6_mtu_change(dev, dev->mtu);
                                idev->cnf.mtu6 = dev->mtu;
                        }
                        idev->tstamp = jiffies;
                        inet6_ifinfo_notify(RTM_NEWLINK, idev);
-                        /* If the changed mtu during down is lower than IPV6_MIN_MTU
-                           stop IPv6 on this interface.
+                        /*
+                         * If the changed mtu during down is lower than
+                         * IPV6_MIN_MTU stop IPv6 on this interface.
                         */
                        if (dev->mtu < IPV6_MIN_MTU)
-                                addrconf_ifdown(dev, event != NETDEV_DOWN);
+                                addrconf_ifdown(dev, 1);
                }
                break;
@@ -2565,7 +2566,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
                                break;
                }
-                /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
+                /*
+                 * MTU falled under IPV6_MIN_MTU.
+                 * Stop IPv6 on this interface.
+                 */
        case NETDEV_DOWN:
        case NETDEV_UNREGISTER:
@@ -2585,9 +2589,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
                                return notifier_from_errno(err);
                }
                break;
-        case NETDEV_BONDING_OLDTYPE:
-        case NETDEV_BONDING_NEWTYPE:
+        case NETDEV_PRE_TYPE_CHANGE:
-                addrconf_bonding_change(dev, event);
+        case NETDEV_POST_TYPE_CHANGE:
+                addrconf_type_change(dev, event);
                break;
        }
@@ -2599,28 +2604,27 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 */
 static struct notifier_block ipv6_dev_notf = {
        .notifier_call = addrconf_notify,
-        .priority = 0
 };
-static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
+static void addrconf_type_change(struct net_device *dev, unsigned long event)
 {
        struct inet6_dev *idev;
        ASSERT_RTNL();
        idev = __in6_dev_get(dev);
-        if (event == NETDEV_BONDING_NEWTYPE)
+        if (event == NETDEV_POST_TYPE_CHANGE)
                ipv6_mc_remap(idev);
-        else if (event == NETDEV_BONDING_OLDTYPE)
+        else if (event == NETDEV_PRE_TYPE_CHANGE)
                ipv6_mc_unmap(idev);
 }
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
-        struct inet6_dev *idev;
-        struct inet6_ifaddr *ifa, *keep_list, **bifa;
        struct net *net = dev_net(dev);
-        int i;
+        struct inet6_dev *idev;
+        struct inet6_ifaddr *ifa;
+        LIST_HEAD(keep_list);
        ASSERT_RTNL();
@@ -2631,8 +2635,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
        if (idev == NULL)
                return -ENODEV;
-        /* Step 1: remove reference to ipv6 device from parent device.
+        /*
-                   Do not dev_put!
+         * Step 1: remove reference to ipv6 device from parent device.
+         *         Do not dev_put!
         */
        if (how) {
                idev->dead = 1;
@@ -2645,40 +2650,21 @@ static int addrconf_ifdown(struct net_device *dev, int how)
        }
-        /* Step 2: clear hash table */
-        for (i=0; i<IN6_ADDR_HSIZE; i++) {
-                bifa = &inet6_addr_lst[i];
-                write_lock_bh(&addrconf_hash_lock);
-                while ((ifa = *bifa) != NULL) {
-                        if (ifa->idev == idev &&
-                            (how || !(ifa->flags&IFA_F_PERMANENT) ||
-                             ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
-                                *bifa = ifa->lst_next;
-                                ifa->lst_next = NULL;
-                                __in6_ifa_put(ifa);
-                                continue;
-                        }
-                        bifa = &ifa->lst_next;
-                }
-                write_unlock_bh(&addrconf_hash_lock);
-        }
        write_lock_bh(&idev->lock);
-        /* Step 3: clear flags for stateless addrconf */
+        /* Step 2: clear flags for stateless addrconf */
        if (!how)
                idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
-        /* Step 4: clear address list */
 #ifdef CONFIG_IPV6_PRIVACY
        if (how && del_timer(&idev->regen_timer))
                in6_dev_put(idev);
-        /* clear tempaddr list */
+        /* Step 3: clear tempaddr list */
-        while ((ifa = idev->tempaddr_list) != NULL) {
+        while (!list_empty(&idev->tempaddr_list)) {
-                idev->tempaddr_list = ifa->tmp_next;
+                ifa = list_first_entry(&idev->tempaddr_list,
-                ifa->tmp_next = NULL;
+                                       struct inet6_ifaddr, tmp_list);
+                list_del(&ifa->tmp_list);
                ifa->dead = 1;
                write_unlock_bh(&idev->lock);
                spin_lock_bh(&ifa->lock);
@@ -2692,23 +2678,18 @@ static int addrconf_ifdown(struct net_device *dev, int how)
                write_lock_bh(&idev->lock);
        }
 #endif
-        keep_list = NULL;
-        bifa = &keep_list;
-        while ((ifa = idev->addr_list) != NULL) {
-                idev->addr_list = ifa->if_next;
-                ifa->if_next = NULL;
+        while (!list_empty(&idev->addr_list)) {
+                ifa = list_first_entry(&idev->addr_list,
+                                       struct inet6_ifaddr, if_list);
                addrconf_del_timer(ifa);
                /* If just doing link down, and address is permanent
                   and not link-local, then retain it. */
-                if (how == 0 &&
+                if (!how &&
                    (ifa->flags&IFA_F_PERMANENT) &&
                    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
+                        list_move_tail(&ifa->if_list, &keep_list);
-                        /* Move to holding list */
-                        *bifa = ifa;
-                        bifa = &ifa->if_next;
                        /* If not doing DAD on this address, just keep it. */
                        if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
@@ -2724,10 +2705,17 @@ static int addrconf_ifdown(struct net_device *dev, int how)
                        ifa->flags |= IFA_F_TENTATIVE;
                        in6_ifa_hold(ifa);
                } else {
+                        list_del(&ifa->if_list);
                        ifa->dead = 1;
                }
                write_unlock_bh(&idev->lock);
+                /* clear hash table */
+                spin_lock_bh(&addrconf_hash_lock);
+                hlist_del_init_rcu(&ifa->addr_lst);
+                __in6_ifa_put(ifa);
+                spin_unlock_bh(&addrconf_hash_lock);
                __ipv6_ifa_notify(RTM_DELADDR, ifa);
                atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
                in6_ifa_put(ifa);
@@ -2735,12 +2723,11 @@ static int addrconf_ifdown(struct net_device *dev, int how)
                write_lock_bh(&idev->lock);
        }
-        idev->addr_list = keep_list;
+        list_splice(&keep_list, &idev->addr_list);
        write_unlock_bh(&idev->lock);
        /* Step 5: Discard multicast list */
        if (how)
                ipv6_mc_destroy_dev(idev);
        else
@@ -2748,8 +2735,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
        idev->tstamp = jiffies;
-        /* Shot the device (if unregistered) */
+        /* Last: Shot the device (if unregistered) */
        if (how) {
                addrconf_sysctl_unregister(idev);
                neigh_parms_release(&nd_tbl, idev->nd_parms);
@@ -2860,7 +2846,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
         * Optimistic nodes can start receiving
         * Frames right away
         */
-        if(ifp->flags & IFA_F_OPTIMISTIC)
+        if (ifp->flags & IFA_F_OPTIMISTIC)
                ip6_ins_rt(ifp->rt);
        addrconf_dad_kick(ifp);
@@ -2910,7 +2896,7 @@ out:
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 {
-        struct net_device *     dev = ifp->idev->dev;
+        struct net_device *dev = ifp->idev->dev;
        /*
         *      Configure the address for reception. Now it is valid.
@@ -2941,11 +2927,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
        }
 }
-static void addrconf_dad_run(struct inet6_dev *idev) {
+static void addrconf_dad_run(struct inet6_dev *idev)
+{
        struct inet6_ifaddr *ifp;
        read_lock_bh(&idev->lock);
-        for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
+        list_for_each_entry(ifp, &idev->addr_list, if_list) {
                spin_lock(&ifp->lock);
                if (!(ifp->flags & IFA_F_TENTATIVE)) {
                        spin_unlock(&ifp->lock);
@@ -2970,36 +2957,35 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
        struct net *net = seq_file_net(seq);
        for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-                ifa = inet6_addr_lst[state->bucket];
+                struct hlist_node *n;
+                hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket],
-                while (ifa && !net_eq(dev_net(ifa->idev->dev), net))
+                                         addr_lst)
-                        ifa = ifa->lst_next;
+                        if (net_eq(dev_net(ifa->idev->dev), net))
-                if (ifa)
+                                return ifa;
-                        break;
        }
-        return ifa;
+        return NULL;
 }
-static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
+static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
+                                         struct inet6_ifaddr *ifa)
 {
        struct if6_iter_state *state = seq->private;
        struct net *net = seq_file_net(seq);
+        struct hlist_node *n = &ifa->addr_lst;
-        ifa = ifa->lst_next;
+        hlist_for_each_entry_continue_rcu(ifa, n, addr_lst)
-try_again:
+                if (net_eq(dev_net(ifa->idev->dev), net))
-        if (ifa) {
+                        return ifa;
-                if (!net_eq(dev_net(ifa->idev->dev), net)) {
-                        ifa = ifa->lst_next;
-                        goto try_again;
-                }
-        }
-        if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
+        while (++state->bucket < IN6_ADDR_HSIZE) {
-                ifa = inet6_addr_lst[state->bucket];
+                hlist_for_each_entry(ifa, n,
-                goto try_again;
+                                     &inet6_addr_lst[state->bucket], addr_lst) {
+                        if (net_eq(dev_net(ifa->idev->dev), net))
+                                return ifa;
+                }
        }
-        return ifa;
+        return NULL;
 }
 static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
@@ -3007,15 +2993,15 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
        struct inet6_ifaddr *ifa = if6_get_first(seq);
        if (ifa)
-                while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
+                while (pos && (ifa = if6_get_next(seq, ifa)) != NULL)
                        --pos;
        return pos ? NULL : ifa;
 }
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
-        __acquires(addrconf_hash_lock)
+        __acquires(rcu)
 {
-        read_lock_bh(&addrconf_hash_lock);
+        rcu_read_lock_bh();
        return if6_get_idx(seq, *pos);
 }
@@ -3029,9 +3015,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 static void if6_seq_stop(struct seq_file *seq, void *v)
-        __releases(addrconf_hash_lock)
+        __releases(rcu)
 {
-        read_unlock_bh(&addrconf_hash_lock);
+        rcu_read_unlock_bh();
 }
 static int if6_seq_show(struct seq_file *seq, void *v)
@@ -3101,10 +3087,12 @@ void if6_proc_exit(void)
 int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 {
        int ret = 0;
-        struct inet6_ifaddr * ifp;
+        struct inet6_ifaddr *ifp = NULL;
-        u8 hash = ipv6_addr_hash(addr);
+        struct hlist_node *n;
-        read_lock_bh(&addrconf_hash_lock);
+        unsigned int hash = ipv6_addr_hash(addr);
-        for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
+        rcu_read_lock_bh();
+        hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
                if (!net_eq(dev_net(ifp->idev->dev), net))
                        continue;
                if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3113,7 +3101,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
                        break;
                }
        }
-        read_unlock_bh(&addrconf_hash_lock);
+        rcu_read_unlock_bh();
        return ret;
 }
 #endif
@@ -3124,43 +3112,35 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 static void addrconf_verify(unsigned long foo)
 {
+        unsigned long now, next, next_sec, next_sched;
        struct inet6_ifaddr *ifp;
-        unsigned long now, next;
+        struct hlist_node *node;
        int i;
-        spin_lock_bh(&addrconf_verify_lock);
+        rcu_read_lock_bh();
+        spin_lock(&addrconf_verify_lock);
        now = jiffies;
-        next = now + ADDR_CHECK_FREQUENCY;
+        next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
        del_timer(&addr_chk_timer);
-        for (i=0; i < IN6_ADDR_HSIZE; i++) {
+        for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-                read_lock(&addrconf_hash_lock);
+                hlist_for_each_entry_rcu(ifp, node,
-                for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
+                                         &inet6_addr_lst[i], addr_lst) {
                        unsigned long age;
-#ifdef CONFIG_IPV6_PRIVACY
-                        unsigned long regen_advance;
-#endif
                        if (ifp->flags & IFA_F_PERMANENT)
                                continue;
                        spin_lock(&ifp->lock);
-                        age = (now - ifp->tstamp) / HZ;
+                        /* We try to batch several events at once. */
+                        age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
-#ifdef CONFIG_IPV6_PRIVACY
-                        regen_advance = ifp->idev->cnf.regen_max_retry *
-                                        ifp->idev->cnf.dad_transmits *
-                                        ifp->idev->nd_parms->retrans_time / HZ;
-#endif
                        if (ifp->valid_lft != INFINITY_LIFE_TIME &&
                            age >= ifp->valid_lft) {
                                spin_unlock(&ifp->lock);
                                in6_ifa_hold(ifp);
-                                read_unlock(&addrconf_hash_lock);
                                ipv6_del_addr(ifp);
                                goto restart;
                        } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
@@ -3182,7 +3162,6 @@ restart:
                                if (deprecate) {
                                        in6_ifa_hold(ifp);
-                                        read_unlock(&addrconf_hash_lock);
                                        ipv6_ifa_notify(0, ifp);
                                        in6_ifa_put(ifp);
@@ -3191,6 +3170,10 @@ restart:
 #ifdef CONFIG_IPV6_PRIVACY
                        } else if ((ifp->flags&IFA_F_TEMPORARY) &&
                                   !(ifp->flags&IFA_F_TENTATIVE)) {
+                                unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
+                                        ifp->idev->cnf.dad_transmits *
+                                        ifp->idev->nd_parms->retrans_time / HZ;
                                if (age >= ifp->prefered_lft - regen_advance) {
                                        struct inet6_ifaddr *ifpub = ifp->ifpub;
                                        if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -3200,7 +3183,7 @@ restart:
                                                in6_ifa_hold(ifp);
                                                in6_ifa_hold(ifpub);
                                                spin_unlock(&ifp->lock);
-                                                read_unlock(&addrconf_hash_lock);
                                                spin_lock(&ifpub->lock);
                                                ifpub->regen_count = 0;
                                                spin_unlock(&ifpub->lock);
@@ -3220,12 +3203,26 @@ restart:
                                spin_unlock(&ifp->lock);
                        }
                }
-                read_unlock(&addrconf_hash_lock);
        }
-        addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
+        next_sec = round_jiffies_up(next);
+        next_sched = next;
+        /* If rounded timeout is accurate enough, accept it. */
+        if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+                next_sched = next_sec;
+        /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+        if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
+                next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
+        ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+              now, next, next_sec, next_sched));
+        addr_chk_timer.expires = next_sched;
        add_timer(&addr_chk_timer);
-        spin_unlock_bh(&addrconf_verify_lock);
+        spin_unlock(&addrconf_verify_lock);
+        rcu_read_unlock_bh();
 }
 static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
@@ -3515,8 +3512,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
        return nlmsg_end(skb, nlh);
 }
-enum addr_type_t
+enum addr_type_t {
-{
        UNICAST_ADDR,
        MULTICAST_ADDR,
        ANYCAST_ADDR,
@@ -3527,7 +3523,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
                          struct netlink_callback *cb, enum addr_type_t type,
                          int s_ip_idx, int *p_ip_idx)
 {
-        struct inet6_ifaddr *ifa;
        struct ifmcaddr6 *ifmca;
        struct ifacaddr6 *ifaca;
        int err = 1;
@@ -3535,11 +3530,12 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
        read_lock_bh(&idev->lock);
        switch (type) {
-        case UNICAST_ADDR:
+        case UNICAST_ADDR: {
+                struct inet6_ifaddr *ifa;
                /* unicast address incl. temp addr */
-                for (ifa = idev->addr_list; ifa;
+                list_for_each_entry(ifa, &idev->addr_list, if_list) {
-                     ifa = ifa->if_next, ip_idx++) {
+                        if (++ip_idx < s_ip_idx)
-                        if (ip_idx < s_ip_idx)
                                continue;
                        err = inet6_fill_ifaddr(skb, ifa,
                                                NETLINK_CB(cb->skb).pid,
@@ -3550,6 +3546,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
                                break;
                }
                break;
+        }
        case MULTICAST_ADDR:
                /* multicast address */
                for (ifmca = idev->mc_list; ifmca;
@@ -3614,7 +3611,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
                        if (h > s_h || idx > s_idx)
                                s_ip_idx = 0;
                        ip_idx = 0;
-                        if ((idev = __in6_dev_get(dev)) == NULL)
+                        idev = __in6_dev_get(dev);
+                        if (!idev)
                                goto cont;
                        if (in6_dump_addrs(idev, skb, cb, type,
@@ -3681,12 +3679,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
        if (ifm->ifa_index)
                dev = __dev_get_by_index(net, ifm->ifa_index);
-        if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) {
+        ifa = ipv6_get_ifaddr(net, addr, dev, 1);
+        if (!ifa) {
                err = -EADDRNOTAVAIL;
                goto errout;
        }
-        if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
+        skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL);
+        if (!skb) {
                err = -ENOBUFS;
                goto errout_ifa;
        }
@@ -3811,7 +3811,7 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
 static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
                             int bytes)
 {
-        switch(attrtype) {
+        switch (attrtype) {
        case IFLA_INET6_STATS:
                __snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
                break;
@@ -4163,211 +4163,211 @@ static struct addrconf_sysctl_table
        .sysctl_header = NULL,
        .addrconf_vars = {
                {
-                        .procname       =       "forwarding",
+                        .procname       = "forwarding",
-                        .data           =       &ipv6_devconf.forwarding,
+                        .data           = &ipv6_devconf.forwarding,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       addrconf_sysctl_forward,
+                        .proc_handler   = addrconf_sysctl_forward,
                },
                {
-                        .procname       =       "hop_limit",
+                        .procname       = "hop_limit",
-                        .data           =       &ipv6_devconf.hop_limit,
+                        .data           = &ipv6_devconf.hop_limit,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "mtu",
+                        .procname       = "mtu",
-                        .data           =       &ipv6_devconf.mtu6,
+                        .data           = &ipv6_devconf.mtu6,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "accept_ra",
+                        .procname       = "accept_ra",
-                        .data           =       &ipv6_devconf.accept_ra,
+                        .data           = &ipv6_devconf.accept_ra,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "accept_redirects",
+                        .procname       = "accept_redirects",
-                        .data           =       &ipv6_devconf.accept_redirects,
+                        .data           = &ipv6_devconf.accept_redirects,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "autoconf",
+                        .procname       = "autoconf",
-                        .data           =       &ipv6_devconf.autoconf,
+                        .data           = &ipv6_devconf.autoconf,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "dad_transmits",
+                        .procname       = "dad_transmits",
-                        .data           =       &ipv6_devconf.dad_transmits,
+                        .data           = &ipv6_devconf.dad_transmits,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "router_solicitations",
+                        .procname       = "router_solicitations",
-                        .data           =       &ipv6_devconf.rtr_solicits,
+                        .data           = &ipv6_devconf.rtr_solicits,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "router_solicitation_interval",
+                        .procname       = "router_solicitation_interval",
-                        .data           =       &ipv6_devconf.rtr_solicit_interval,
+                        .data           = &ipv6_devconf.rtr_solicit_interval,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec_jiffies,
+                        .proc_handler   = proc_dointvec_jiffies,
                },
                {
-                        .procname       =       "router_solicitation_delay",
+                        .procname       = "router_solicitation_delay",
-                        .data           =       &ipv6_devconf.rtr_solicit_delay,
+                        .data           = &ipv6_devconf.rtr_solicit_delay,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec_jiffies,
+                        .proc_handler   = proc_dointvec_jiffies,
                },
                {
-                        .procname       =       "force_mld_version",
+                        .procname       = "force_mld_version",
-                        .data           =       &ipv6_devconf.force_mld_version,
+                        .data           = &ipv6_devconf.force_mld_version,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
 #ifdef CONFIG_IPV6_PRIVACY
                {
-                        .procname       =       "use_tempaddr",
+                        .procname       = "use_tempaddr",
-                        .data           =       &ipv6_devconf.use_tempaddr,
+                        .data           = &ipv6_devconf.use_tempaddr,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "temp_valid_lft",
+                        .procname       = "temp_valid_lft",
-                        .data           =       &ipv6_devconf.temp_valid_lft,
+                        .data           = &ipv6_devconf.temp_valid_lft,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "temp_prefered_lft",
+                        .procname       = "temp_prefered_lft",
-                        .data           =       &ipv6_devconf.temp_prefered_lft,
+                        .data           = &ipv6_devconf.temp_prefered_lft,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "regen_max_retry",
+                        .procname       = "regen_max_retry",
-                        .data           =       &ipv6_devconf.regen_max_retry,
+                        .data           = &ipv6_devconf.regen_max_retry,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "max_desync_factor",
+                        .procname       = "max_desync_factor",
-                        .data           =       &ipv6_devconf.max_desync_factor,
+                        .data           = &ipv6_devconf.max_desync_factor,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
 #endif
                {
-                        .procname       =       "max_addresses",
+                        .procname       = "max_addresses",
-                        .data           =       &ipv6_devconf.max_addresses,
+                        .data           = &ipv6_devconf.max_addresses,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "accept_ra_defrtr",
+                        .procname       = "accept_ra_defrtr",
-                        .data           =       &ipv6_devconf.accept_ra_defrtr,
+                        .data           = &ipv6_devconf.accept_ra_defrtr,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "accept_ra_pinfo",
+                        .procname       = "accept_ra_pinfo",
-                        .data           =       &ipv6_devconf.accept_ra_pinfo,
+                        .data           = &ipv6_devconf.accept_ra_pinfo,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
 #ifdef CONFIG_IPV6_ROUTER_PREF
                {
-                        .procname       =       "accept_ra_rtr_pref",
+                        .procname       = "accept_ra_rtr_pref",
-                        .data           =       &ipv6_devconf.accept_ra_rtr_pref,
+                        .data           = &ipv6_devconf.accept_ra_rtr_pref,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "router_probe_interval",
+                        .procname       = "router_probe_interval",
-                        .data           =       &ipv6_devconf.rtr_probe_interval,
+                        .data           = &ipv6_devconf.rtr_probe_interval,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec_jiffies,
+                        .proc_handler   = proc_dointvec_jiffies,
                },
 #ifdef CONFIG_IPV6_ROUTE_INFO
                {
-                        .procname       =       "accept_ra_rt_info_max_plen",
+                        .procname       = "accept_ra_rt_info_max_plen",
-                        .data           =       &ipv6_devconf.accept_ra_rt_info_max_plen,
+                        .data           = &ipv6_devconf.accept_ra_rt_info_max_plen,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
 #endif
 #endif
                {
-                        .procname       =       "proxy_ndp",
+                        .procname       = "proxy_ndp",
-                        .data           =       &ipv6_devconf.proxy_ndp,
+                        .data           = &ipv6_devconf.proxy_ndp,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
-                        .procname       =       "accept_source_route",
+                        .procname       = "accept_source_route",
-                        .data           =       &ipv6_devconf.accept_source_route,
+                        .data           = &ipv6_devconf.accept_source_route,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
                {
-                        .procname       =       "optimistic_dad",
+                        .procname       = "optimistic_dad",
-                        .data           =       &ipv6_devconf.optimistic_dad,
+                        .data           = &ipv6_devconf.optimistic_dad,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
 #endif
 #ifdef CONFIG_IPV6_MROUTE
                {
-                        .procname       =       "mc_forwarding",
+                        .procname       = "mc_forwarding",
-                        .data           =       &ipv6_devconf.mc_forwarding,
+                        .data           = &ipv6_devconf.mc_forwarding,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0444,
+                        .mode           = 0444,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
 #endif
                {
-                        .procname       =       "disable_ipv6",
+                        .procname       = "disable_ipv6",
-                        .data           =       &ipv6_devconf.disable_ipv6,
+                        .data           = &ipv6_devconf.disable_ipv6,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       addrconf_sysctl_disable,
+                        .proc_handler   = addrconf_sysctl_disable,
                },
                {
-                        .procname       =       "accept_dad",
+                        .procname       = "accept_dad",
-                        .data           =       &ipv6_devconf.accept_dad,
+                        .data           = &ipv6_devconf.accept_dad,
-                        .maxlen         =       sizeof(int),
+                        .maxlen         = sizeof(int),
-                        .mode           =       0644,
+                        .mode           = 0644,
-                        .proc_handler   =       proc_dointvec,
+                        .proc_handler   = proc_dointvec,
                },
                {
                        .procname       = "force_tllao",
@@ -4403,8 +4403,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
        if (t == NULL)
                goto out;
-        for (i=0; t->addrconf_vars[i].data; i++) {
+        for (i = 0; t->addrconf_vars[i].data; i++) {
-                t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+                t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf;
                t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
                t->addrconf_vars[i].extra2 = net;
        }
@@ -4541,14 +4541,12 @@ int register_inet6addr_notifier(struct notifier_block *nb)
 {
        return atomic_notifier_chain_register(&inet6addr_chain, nb);
 }
 EXPORT_SYMBOL(register_inet6addr_notifier);
 int unregister_inet6addr_notifier(struct notifier_block *nb)
 {
-        return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
+        return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
 }
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
 /*
@@ -4557,11 +4555,12 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier);
 int __init addrconf_init(void)
 {
-        int err;
+        int i, err;
-        if ((err = ipv6_addr_label_init()) < 0) {
+        err = ipv6_addr_label_init();
-                printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n",
+        if (err < 0) {
-                        err);
+                printk(KERN_CRIT "IPv6 Addrconf:"
+                       " cannot initialize default policy table: %d.\n", err);
                return err;
        }
@@ -4592,6 +4591,9 @@ int __init addrconf_init(void)
        if (err)
                goto errlo;
+        for (i = 0; i < IN6_ADDR_HSIZE; i++)
+                INIT_HLIST_HEAD(&inet6_addr_lst[i]);
        register_netdevice_notifier(&ipv6_dev_notf);
        addrconf_verify(0);
@@ -4620,7 +4622,6 @@ errlo:
 void addrconf_cleanup(void)
 {
-        struct inet6_ifaddr *ifa;
        struct net_device *dev;
        int i;
@@ -4640,20 +4641,10 @@ void addrconf_cleanup(void)
        /*
         *      Check hash table.
         */
-        write_lock_bh(&addrconf_hash_lock);
+        spin_lock_bh(&addrconf_hash_lock);
-        for (i=0; i < IN6_ADDR_HSIZE; i++) {
+        for (i = 0; i < IN6_ADDR_HSIZE; i++)
-                for (ifa=inet6_addr_lst[i]; ifa; ) {
+                WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
-                        struct inet6_ifaddr *bifa;
+        spin_unlock_bh(&addrconf_hash_lock);
-                        bifa = ifa;
-                        ifa = ifa->lst_next;
-                        printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
-                        /* Do not free it; something is wrong.
-                           Now we can investigate it with debugger.
-                         */
-                }
-        }
-        write_unlock_bh(&addrconf_hash_lock);
        del_timer(&addr_chk_timer);
        rtnl_unlock();
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 3330a4bd6157..12d2fa42657d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -483,6 +483,7 @@ route_done:
                              np->tclass, NULL, &fl, (struct rt6_info*)dst,
                              MSG_DONTWAIT);
        if (err) {
+                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
                ip6_flush_pending_frames(sk);
                goto out_put;
        }
@@ -563,6 +564,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
                                (struct rt6_info*)dst, MSG_DONTWAIT);
        if (err) {
+                ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
                ip6_flush_pending_frames(sk);
                goto out_put;
        }
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6b82e02158c6..dc6e0b8f260d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -128,12 +128,23 @@ static __inline__ u32 fib6_new_sernum(void)
 /*
 *      test bit
 */
+#if defined(__LITTLE_ENDIAN)
+# define BITOP_BE32_SWIZZLE     (0x1F & ~7)
+#else
+# define BITOP_BE32_SWIZZLE     0
+#endif
 static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
 {
        __be32 *addr = token;
+        /*
-        return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
+         * Here,
+         *      1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+         * is optimized version of
+         *      htonl(1 << ((~fn_bit)&0x1F))
+         * See include/asm-generic/bitops/le.h.
+         */
+        return (1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & addr[fn_bit >> 5];
 }
 static __inline__ struct fib6_node * node_alloc(void)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index c483ab9fd67b..62ed08213d91 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -715,7 +715,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
        if (!(mc->mca_flags&MAF_LOADED)) {
                mc->mca_flags |= MAF_LOADED;
                if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-                        dev_mc_add(dev, buf, dev->addr_len, 0);
+                        dev_mc_add(dev, buf);
        }
        spin_unlock_bh(&mc->mca_lock);
@@ -741,7 +741,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
        if (mc->mca_flags&MAF_LOADED) {
                mc->mca_flags &= ~MAF_LOADED;
                if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-                        dev_mc_delete(dev, buf, dev->addr_len, 0);
+                        dev_mc_del(dev, buf);
        }
        if (mc->mca_flags & MAF_NOREPORT)
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index cbe8dec9744b..e60677519e40 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -141,11 +141,11 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
                        }
                        /* Step to the next */
-                        pr_debug("len%04X \n", optlen);
+                        pr_debug("len%04X\n", optlen);
                        if ((ptr > skb->len - optlen || hdrlen < optlen) &&
                            temp < optinfo->optsnr - 1) {
-                                pr_debug("new pointer is too large! \n");
+                                pr_debug("new pointer is too large!\n");
                                break;
                        }
                        ptr += optlen;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 58344c0fbd13..458eabfbe130 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -97,6 +97,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
        SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
        SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
        SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
+        SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
        SNMP_MIB_SENTINEL
 };
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ae181651c75a..8c452fd5ceae 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -67,36 +67,6 @@ static int xfrm6_get_saddr(struct net *net,
        return 0;
 }
-static struct dst_entry *
-__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
-{
-        struct dst_entry *dst;
-        /* Still not clear if we should set fl->fl6_{src,dst}... */
-        read_lock_bh(&policy->lock);
-        for (dst = policy->bundles; dst; dst = dst->next) {
-                struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
-                struct in6_addr fl_dst_prefix, fl_src_prefix;
-                ipv6_addr_prefix(&fl_dst_prefix,
-                                 &fl->fl6_dst,
-                                 xdst->u.rt6.rt6i_dst.plen);
-                ipv6_addr_prefix(&fl_src_prefix,
-                                 &fl->fl6_src,
-                                 xdst->u.rt6.rt6i_src.plen);
-                if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
-                    ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
-                    xfrm_bundle_ok(policy, xdst, fl, AF_INET6,
-                                   (xdst->u.rt6.rt6i_dst.plen != 128 ||
-                                    xdst->u.rt6.rt6i_src.plen != 128))) {
-                        dst_clone(dst);
-                        break;
-                }
-        }
-        read_unlock_bh(&policy->lock);
-        return dst;
-}
 static int xfrm6_get_tos(struct flowi *fl)
 {
        return 0;
@@ -291,7 +261,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
        .dst_ops =              &xfrm6_dst_ops,
        .dst_lookup =           xfrm6_dst_lookup,
        .get_saddr =            xfrm6_get_saddr,
-        .find_bundle =          __xfrm6_find_bundle,
        .decode_session =       _decode_session6,
        .get_tos =              xfrm6_get_tos,
        .init_path =            xfrm6_init_path,
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index e2e893b474e9..8b915f3ac3b9 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -475,7 +475,7 @@ static int ircomm_param_dce(void *instance, irda_param_t *param, int get)
        /* Check if any of the settings have changed */
        if (dce & 0x0f) {
                if (dce & IRCOMM_DELTA_CTS) {
-                        IRDA_DEBUG(2, "%s(), CTS \n", __func__ );
+                        IRDA_DEBUG(2, "%s(), CTS\n", __func__ );
                }
        }
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
new file mode 100644
index 000000000000..4b1e71751e10
--- /dev/null
+++ b/net/l2tp/Kconfig
@@ -0,0 +1,107 @@
+#
+# Layer Two Tunneling Protocol (L2TP)
+#
+menuconfig L2TP
+        tristate "Layer Two Tunneling Protocol (L2TP)"
+        depends on INET
+        ---help---
+          Layer Two Tunneling Protocol
+          From RFC 2661 <http://www.ietf.org/rfc/rfc2661.txt>.
+          L2TP facilitates the tunneling of packets across an
+          intervening network in a way that is as transparent as
+          possible to both end-users and applications.
+          L2TP is often used to tunnel PPP traffic over IP
+          tunnels. One IP tunnel may carry thousands of individual PPP
+          connections. L2TP is also used as a VPN protocol, popular
+          with home workers to connect to their offices.
+          L2TPv3 allows other protocols as well as PPP to be carried
+          over L2TP tunnels. L2TPv3 is defined in RFC 3931
+          <http://www.ietf.org/rfc/rfc3931.txt>.
+          The kernel component handles only L2TP data packets: a
+          userland daemon handles L2TP the control protocol (tunnel
+          and session setup). One such daemon is OpenL2TP
+          (http://openl2tp.org/).
+          If you don't need L2TP, say N. To compile all L2TP code as
+          modules, choose M here.
+config L2TP_DEBUGFS
+        tristate "L2TP debugfs support"
+        depends on L2TP && DEBUG_FS
+        help
+          Support for l2tp directory in debugfs filesystem. This may be
+          used to dump internal state of the l2tp drivers for problem
+          analysis.
+          If unsure, say 'Y'.
+          To compile this driver as a module, choose M here. The module
+          will be called l2tp_debugfs.
+config L2TP_V3
+        bool "L2TPv3 support (EXPERIMENTAL)"
+        depends on EXPERIMENTAL && L2TP
+        help
+          Layer Two Tunneling Protocol Version 3
+          From RFC 3931 <http://www.ietf.org/rfc/rfc3931.txt>.
+          The Layer Two Tunneling Protocol (L2TP) provides a dynamic
+          mechanism for tunneling Layer 2 (L2) "circuits" across a
+          packet-oriented data network (e.g., over IP).  L2TP, as
+          originally defined in RFC 2661, is a standard method for
+          tunneling Point-to-Point Protocol (PPP) [RFC1661] sessions.
+          L2TP has since been adopted for tunneling a number of other
+          L2 protocols, including ATM, Frame Relay, HDLC and even raw
+          ethernet frames.
+          If you are connecting to L2TPv3 equipment, or you want to
+          tunnel raw ethernet frames using L2TP, say Y here. If
+          unsure, say N.
+config L2TP_IP
+        tristate "L2TP IP encapsulation for L2TPv3"
+        depends on L2TP_V3
+        help
+          Support for L2TP-over-IP socket family.
+          The L2TPv3 protocol defines two possible encapsulations for
+          L2TP frames, namely UDP and plain IP (without UDP). This
+          driver provides a new L2TPIP socket family with which
+          userspace L2TPv3 daemons may create L2TP/IP tunnel sockets
+          when UDP encapsulation is not required. When L2TP is carried
+          in IP packets, it used IP protocol number 115, so this port
+          must be enabled in firewalls.
+          To compile this driver as a module, choose M here. The module
+          will be called l2tp_ip.
+config L2TP_ETH
+        tristate "L2TP ethernet pseudowire support for L2TPv3"
+        depends on L2TP_V3
+        help
+          Support for carrying raw ethernet frames over L2TPv3.
+          From RFC 4719 <http://www.ietf.org/rfc/rfc4719.txt>.
+          The Layer 2 Tunneling Protocol, Version 3 (L2TPv3) can be
+          used as a control protocol and for data encapsulation to set
+          up Pseudowires for transporting layer 2 Packet Data Units
+          across an IP network [RFC3931].
+          This driver provides an ethernet virtual interface for each
+          L2TP ethernet pseudowire instance. Standard Linux tools may
+          be used to assign an IP address to the local virtual
+          interface, or add the interface to a bridge.
+          If you are using L2TPv3, you will almost certainly want to
+          enable this option.
+          To compile this driver as a module, choose M here. The module
+          will be called l2tp_eth.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
new file mode 100644
index 000000000000..110e7bc2de5e
--- /dev/null
+++ b/net/l2tp/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the L2TP.
+#
+obj-$(CONFIG_L2TP) += l2tp_core.o
+# Build l2tp as modules if L2TP is M
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_ETH)) += l2tp_eth.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_DEBUGFS)) += l2tp_debugfs.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
new file mode 100644
index 000000000000..98dfcce1a5fc
--- /dev/null
+++ b/net/l2tp/l2tp_core.c
@@ -0,0 +1,1692 @@
+/*
+ * L2TP core.
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * This file contains some code of the original L2TPv2 pppol2tp
+ * driver, which has the following copyright:
+ *
+ * Authors:     Martijn van Oosterhout <kleptog@svana.org>
+ *              James Chapman (jchapman@katalix.com)
+ * Contributors:
+ *              Michal Ostrowski <mostrows@speakeasy.net>
+ *              Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
+ *              David S. Miller (davem@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/l2tp.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/file.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/xfrm.h>
+#include <net/protocol.h>
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+#include "l2tp_core.h"
+#define L2TP_DRV_VERSION        "V2.0"
+/* L2TP header constants */
+#define L2TP_HDRFLAG_T     0x8000
+#define L2TP_HDRFLAG_L     0x4000
+#define L2TP_HDRFLAG_S     0x0800
+#define L2TP_HDRFLAG_O     0x0200
+#define L2TP_HDRFLAG_P     0x0100
+#define L2TP_HDR_VER_MASK  0x000F
+#define L2TP_HDR_VER_2     0x0002
+#define L2TP_HDR_VER_3     0x0003
+/* L2TPv3 default L2-specific sublayer */
+#define L2TP_SLFLAG_S      0x40000000
+#define L2TP_SL_SEQ_MASK   0x00ffffff
+#define L2TP_HDR_SIZE_SEQ               10
+#define L2TP_HDR_SIZE_NOSEQ             6
+/* Default trace flags */
+#define L2TP_DEFAULT_DEBUG_FLAGS        0
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)                       \
+        do {                                                            \
+                if ((_mask) & (_type))                                  \
+                        printk(_lvl "L2TP: " _fmt, ##args);             \
+        } while (0)
+/* Private data stored for received packets in the skb.
+ */
+struct l2tp_skb_cb {
+        u32                     ns;
+        u16                     has_seq;
+        u16                     length;
+        unsigned long           expires;
+};
+#define L2TP_SKB_CB(skb)        ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
+static atomic_t l2tp_tunnel_count;
+static atomic_t l2tp_session_count;
+/* per-net private data for this module */
+static unsigned int l2tp_net_id;
+struct l2tp_net {
+        struct list_head l2tp_tunnel_list;
+        spinlock_t l2tp_tunnel_list_lock;
+        struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
+        spinlock_t l2tp_session_hlist_lock;
+};
+static inline struct l2tp_net *l2tp_pernet(struct net *net)
+{
+        BUG_ON(!net);
+        return net_generic(net, l2tp_net_id);
+}
+/* Session hash global list for L2TPv3.
+ * The session_id SHOULD be random according to RFC3931, but several
+ * L2TP implementations use incrementing session_ids.  So we do a real
+ * hash on the session_id, rather than a simple bitmask.
+ */
+static inline struct hlist_head *
+l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
+{
+        return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
+}
+/* Lookup a session by id in the global session list
+ */
+static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
+{
+        struct l2tp_net *pn = l2tp_pernet(net);
+        struct hlist_head *session_list =
+                l2tp_session_id_hash_2(pn, session_id);
+        struct l2tp_session *session;
+        struct hlist_node *walk;
+        rcu_read_lock_bh();
+        hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) {
+                if (session->session_id == session_id) {
+                        rcu_read_unlock_bh();
+                        return session;
+                }
+        }
+        rcu_read_unlock_bh();
+        return NULL;
+}
+/* Session hash list.
+ * The session_id SHOULD be random according to RFC2661, but several
+ * L2TP implementations (Cisco and Microsoft) use incrementing
+ * session_ids.  So we do a real hash on the session_id, rather than a
+ * simple bitmask.
+ */
+static inline struct hlist_head *
+l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
+{
+        return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
+}
+/* Lookup a session by id
+ */
+struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id)
+{
+        struct hlist_head *session_list;
+        struct l2tp_session *session;
+        struct hlist_node *walk;
+        /* In L2TPv3, session_ids are unique over all tunnels and we
+         * sometimes need to look them up before we know the
+         * tunnel.
+         */
+        if (tunnel == NULL)
+                return l2tp_session_find_2(net, session_id);
+        session_list = l2tp_session_id_hash(tunnel, session_id);
+        read_lock_bh(&tunnel->hlist_lock);
+        hlist_for_each_entry(session, walk, session_list, hlist) {
+                if (session->session_id == session_id) {
+                        read_unlock_bh(&tunnel->hlist_lock);
+                        return session;
+                }
+        }
+        read_unlock_bh(&tunnel->hlist_lock);
+        return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find);
+struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
+{
+        int hash;
+        struct hlist_node *walk;
+        struct l2tp_session *session;
+        int count = 0;
+        read_lock_bh(&tunnel->hlist_lock);
+        for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+                hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) {
+                        if (++count > nth) {
+                                read_unlock_bh(&tunnel->hlist_lock);
+                                return session;
+                        }
+                }
+        }
+        read_unlock_bh(&tunnel->hlist_lock);
+        return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+/* Lookup a session by interface name.
+ * This is very inefficient but is only used by management interfaces.
+ */
+struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+{
+        struct l2tp_net *pn = l2tp_pernet(net);
+        int hash;
+        struct hlist_node *walk;
+        struct l2tp_session *session;
+        rcu_read_lock_bh();
+        for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
+                hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+                        if (!strcmp(session->ifname, ifname)) {
+                                rcu_read_unlock_bh();
+                                return session;
+                        }
+                }
+        }
+        rcu_read_unlock_bh();
+        return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+/* Lookup a tunnel by id
+ */
+struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
+{
+        struct l2tp_tunnel *tunnel;
+        struct l2tp_net *pn = l2tp_pernet(net);
+        rcu_read_lock_bh();
+        list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+                if (tunnel->tunnel_id == tunnel_id) {
+                        rcu_read_unlock_bh();
+                        return tunnel;
+                }
+        }
+        rcu_read_unlock_bh();
+        return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
+struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
+{
+        struct l2tp_net *pn = l2tp_pernet(net);
+        struct l2tp_tunnel *tunnel;
+        int count = 0;
+        rcu_read_lock_bh();
+        list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+                if (++count > nth) {
+                        rcu_read_unlock_bh();
+                        return tunnel;
+                }
+        }
+        rcu_read_unlock_bh();
+        return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth);
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+/* Queue a skb in order. We come here only if the skb has an L2TP sequence
+ * number.
+ */
+static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *skb)
+{
+        struct sk_buff *skbp;
+        struct sk_buff *tmp;
+        u32 ns = L2TP_SKB_CB(skb)->ns;
+        spin_lock_bh(&session->reorder_q.lock);
+        skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
+                if (L2TP_SKB_CB(skbp)->ns > ns) {
+                        __skb_queue_before(&session->reorder_q, skbp, skb);
+                        PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+                               "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
+                               session->name, ns, L2TP_SKB_CB(skbp)->ns,
+                               skb_queue_len(&session->reorder_q));
+                        session->stats.rx_oos_packets++;
+                        goto out;
+                }
+        }
+        __skb_queue_tail(&session->reorder_q, skb);
+out:
+        spin_unlock_bh(&session->reorder_q.lock);
+}
+/* Dequeue a single skb.
+ */
+static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *skb)
+{
+        struct l2tp_tunnel *tunnel = session->tunnel;
+        int length = L2TP_SKB_CB(skb)->length;
+        /* We're about to requeue the skb, so return resources
+         * to its current owner (a socket receive buffer).
+         */
+        skb_orphan(skb);
+        tunnel->stats.rx_packets++;
+        tunnel->stats.rx_bytes += length;
+        session->stats.rx_packets++;
+        session->stats.rx_bytes += length;
+        if (L2TP_SKB_CB(skb)->has_seq) {
+                /* Bump our Nr */
+                session->nr++;
+                if (tunnel->version == L2TP_HDR_VER_2)
+                        session->nr &= 0xffff;
+                else
+                        session->nr &= 0xffffff;
+                PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+                       "%s: updated nr to %hu\n", session->name, session->nr);
+        }
+        /* call private receive handler */
+        if (session->recv_skb != NULL)
+                (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
+        else
+                kfree_skb(skb);
+        if (session->deref)
+                (*session->deref)(session);
+}
+/* Dequeue skbs from the session's reorder_q, subject to packet order.
+ * Skbs that have been in the queue for too long are simply discarded.
+ */
+static void l2tp_recv_dequeue(struct l2tp_session *session)
+{
+        struct sk_buff *skb;
+        struct sk_buff *tmp;
+        /* If the pkt at the head of the queue has the nr that we
+         * expect to send up next, dequeue it and any other
+         * in-sequence packets behind it.
+         */
+        spin_lock_bh(&session->reorder_q.lock);
+        skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
+                if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
+                        session->stats.rx_seq_discards++;
+                        session->stats.rx_errors++;
+                        PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+                               "%s: oos pkt %u len %d discarded (too old), "
+                               "waiting for %u, reorder_q_len=%d\n",
+                               session->name, L2TP_SKB_CB(skb)->ns,
+                               L2TP_SKB_CB(skb)->length, session->nr,
+                               skb_queue_len(&session->reorder_q));
+                        __skb_unlink(skb, &session->reorder_q);
+                        kfree_skb(skb);
+                        if (session->deref)
+                                (*session->deref)(session);
+                        continue;
+                }
+                if (L2TP_SKB_CB(skb)->has_seq) {
+                        if (L2TP_SKB_CB(skb)->ns != session->nr) {
+                                PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+                                       "%s: holding oos pkt %u len %d, "
+                                       "waiting for %u, reorder_q_len=%d\n",
+                                       session->name, L2TP_SKB_CB(skb)->ns,
+                                       L2TP_SKB_CB(skb)->length, session->nr,
+                                       skb_queue_len(&session->reorder_q));
+                                goto out;
+                        }
+                }
+                __skb_unlink(skb, &session->reorder_q);
+                /* Process the skb. We release the queue lock while we
+                 * do so to let other contexts process the queue.
+                 */
+                spin_unlock_bh(&session->reorder_q.lock);
+                l2tp_recv_dequeue_skb(session, skb);
+                spin_lock_bh(&session->reorder_q.lock);
+        }
+out:
+        spin_unlock_bh(&session->reorder_q.lock);
+}
+static inline int l2tp_verify_udp_checksum(struct sock *sk,
+                                           struct sk_buff *skb)
+{
+        struct udphdr *uh = udp_hdr(skb);
+        u16 ulen = ntohs(uh->len);
+        struct inet_sock *inet;
+        __wsum psum;
+        if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
+                return 0;
+        inet = inet_sk(sk);
+        psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
+                                  IPPROTO_UDP, 0);
+        if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
+            !csum_fold(csum_add(psum, skb->csum)))
+                return 0;
+        skb->csum = psum;
+        return __skb_checksum_complete(skb);
+}
+/* Do receive processing of L2TP data frames. We handle both L2TPv2
+ * and L2TPv3 data frames here.
+ *
+ * L2TPv2 Data Message Header
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |T|L|x|x|S|x|O|P|x|x|x|x|  Ver  |          Length (opt)         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           Tunnel ID           |           Session ID          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |             Ns (opt)          |             Nr (opt)          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      Offset Size (opt)        |    Offset pad... (opt)
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Data frames are marked by T=0. All other fields are the same as
+ * those in L2TP control frames.
+ *
+ * L2TPv3 Data Message Header
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      L2TP Session Header                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      L2-Specific Sublayer                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        Tunnel Payload                      ...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 Session Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Session ID                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Cookie (optional, maximum 64 bits)...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *                                                                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 L2-Specific Sublayer Format
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |x|S|x|x|x|x|x|x|              Sequence Number                  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Cookie value, sublayer format and offset (pad) are negotiated with
+ * the peer when the session is set up. Unlike L2TPv2, we do not need
+ * to parse the packet header to determine if optional fields are
+ * present.
+ *
+ * Caller must already have parsed the frame and determined that it is
+ * a data (not control) frame before coming here. Fields up to the
+ * session-id have already been parsed and ptr points to the data
+ * after the session-id.
+ */
+void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
+                      unsigned char *ptr, unsigned char *optr, u16 hdrflags,
+                      int length, int (*payload_hook)(struct sk_buff *skb))
+{
+        struct l2tp_tunnel *tunnel = session->tunnel;
+        int offset;
+        u32 ns, nr;
+        /* The ref count is increased since we now hold a pointer to
+         * the session. Take care to decrement the refcnt when exiting
+         * this function from now on...
+         */
+        l2tp_session_inc_refcount(session);
+        if (session->ref)
+                (*session->ref)(session);
+        /* Parse and check optional cookie */
+        if (session->peer_cookie_len > 0) {
+                if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
+                        PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+                               "%s: cookie mismatch (%u/%u). Discarding.\n",
+                               tunnel->name, tunnel->tunnel_id, session->session_id);
+                        session->stats.rx_cookie_discards++;
+                        goto discard;
+                }
+                ptr += session->peer_cookie_len;
+        }
+        /* Handle the optional sequence numbers. Sequence numbers are
+         * in different places for L2TPv2 and L2TPv3.
+         *
+         * If we are the LAC, enable/disable sequence numbers under
+         * the control of the LNS.  If no sequence numbers present but
+         * we were expecting them, discard frame.
+         */
+        ns = nr = 0;
+        L2TP_SKB_CB(skb)->has_seq = 0;
+        if (tunnel->version == L2TP_HDR_VER_2) {
+                if (hdrflags & L2TP_HDRFLAG_S) {
+                        ns = ntohs(*(__be16 *) ptr);
+                        ptr += 2;
+                        nr = ntohs(*(__be16 *) ptr);
+                        ptr += 2;
+                        /* Store L2TP info in the skb */
+                        L2TP_SKB_CB(skb)->ns = ns;
+                        L2TP_SKB_CB(skb)->has_seq = 1;
+                        PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+                               "%s: recv data ns=%u, nr=%u, session nr=%u\n",
+                               session->name, ns, nr, session->nr);
+                }
+        } else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
+                u32 l2h = ntohl(*(__be32 *) ptr);
+                if (l2h & 0x40000000) {
+                        ns = l2h & 0x00ffffff;
+                        /* Store L2TP info in the skb */
+                        L2TP_SKB_CB(skb)->ns = ns;
+                        L2TP_SKB_CB(skb)->has_seq = 1;
+                        PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+                               "%s: recv data ns=%u, session nr=%u\n",
+                               session->name, ns, session->nr);
+                }
+        }
+        /* Advance past L2-specific header, if present */
+        ptr += session->l2specific_len;
+        if (L2TP_SKB_CB(skb)->has_seq) {
+                /* Received a packet with sequence numbers. If we're the LNS,
+                 * check if we sre sending sequence numbers and if not,
+                 * configure it so.
+                 */
+                if ((!session->lns_mode) && (!session->send_seq)) {
+                        PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
+                               "%s: requested to enable seq numbers by LNS\n",
+                               session->name);
+                        session->send_seq = -1;
+                        l2tp_session_set_header_len(session, tunnel->version);
+                }
+        } else {
+                /* No sequence numbers.
+                 * If user has configured mandatory sequence numbers, discard.
+                 */
+                if (session->recv_seq) {
+                        PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
+                               "%s: recv data has no seq numbers when required. "
+                               "Discarding\n", session->name);
+                        session->stats.rx_seq_discards++;
+                        goto discard;
+                }
+                /* If we're the LAC and we're sending sequence numbers, the
+                 * LNS has requested that we no longer send sequence numbers.
+                 * If we're the LNS and we're sending sequence numbers, the
+                 * LAC is broken. Discard the frame.
+                 */
+                if ((!session->lns_mode) && (session->send_seq)) {
+                        PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
+                               "%s: requested to disable seq numbers by LNS\n",
+                               session->name);
+                        session->send_seq = 0;
+                        l2tp_session_set_header_len(session, tunnel->version);
+                } else if (session->send_seq) {
+                        PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
+                               "%s: recv data has no seq numbers when required. "
+                               "Discarding\n", session->name);
+                        session->stats.rx_seq_discards++;
+                        goto discard;
+                }
+        }
+        /* Session data offset is handled differently for L2TPv2 and
+         * L2TPv3. For L2TPv2, there is an optional 16-bit value in
+         * the header. For L2TPv3, the offset is negotiated using AVPs
+         * in the session setup control protocol.
+         */
+        if (tunnel->version == L2TP_HDR_VER_2) {
+                /* If offset bit set, skip it. */
+                if (hdrflags & L2TP_HDRFLAG_O) {
+                        offset = ntohs(*(__be16 *)ptr);
+                        ptr += 2 + offset;
+                }
+        } else
+                ptr += session->offset;
+        offset = ptr - optr;
+        if (!pskb_may_pull(skb, offset))
+                goto discard;
+        __skb_pull(skb, offset);
+        /* If caller wants to process the payload before we queue the
+         * packet, do so now.
+         */
+        if (payload_hook)
+                if ((*payload_hook)(skb))
+                        goto discard;
+        /* Prepare skb for adding to the session's reorder_q.  Hold
+         * packets for max reorder_timeout or 1 second if not
+         * reordering.
+         */
+        L2TP_SKB_CB(skb)->length = length;
+        L2TP_SKB_CB(skb)->expires = jiffies +
+                (session->reorder_timeout ? session->reorder_timeout : HZ);
+        /* Add packet to the session's receive queue. Reordering is done here, if
+         * enabled. Saved L2TP protocol info is stored in skb->sb[].
+         */
+        if (L2TP_SKB_CB(skb)->has_seq) {
+                if (session->reorder_timeout != 0) {
+                        /* Packet reordering enabled. Add skb to session's
+                         * reorder queue, in order of ns.
+                         */
+                        l2tp_recv_queue_skb(session, skb);
+                } else {
+                        /* Packet reordering disabled. Discard out-of-sequence
+                         * packets
+                         */
+                        if (L2TP_SKB_CB(skb)->ns != session->nr) {
+                                session->stats.rx_seq_discards++;
+                                PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+                                       "%s: oos pkt %u len %d discarded, "
+                                       "waiting for %u, reorder_q_len=%d\n",
+                                       session->name, L2TP_SKB_CB(skb)->ns,
+                                       L2TP_SKB_CB(skb)->length, session->nr,
+                                       skb_queue_len(&session->reorder_q));
+                                goto discard;
+                        }
+                        skb_queue_tail(&session->reorder_q, skb);
+                }
+        } else {
+                /* No sequence numbers. Add the skb to the tail of the
+                 * reorder queue. This ensures that it will be
+                 * delivered after all previous sequenced skbs.
+                 */
+                skb_queue_tail(&session->reorder_q, skb);
+        }
+        /* Try to dequeue as many skbs from reorder_q as we can. */
+        l2tp_recv_dequeue(session);
+        l2tp_session_dec_refcount(session);
+        return;
+discard:
+        session->stats.rx_errors++;
+        kfree_skb(skb);
+        if (session->deref)
+                (*session->deref)(session);
+        l2tp_session_dec_refcount(session);
+}
+EXPORT_SYMBOL(l2tp_recv_common);
+/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
+                       int (*payload_hook)(struct sk_buff *skb))
+{
+        struct l2tp_session *session = NULL;
+        unsigned char *ptr, *optr;
+        u16 hdrflags;
+        u32 tunnel_id, session_id;
+        int offset;
+        u16 version;
+        int length;
+        if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
+                goto discard_bad_csum;
+        /* UDP always verifies the packet length. */
+        __skb_pull(skb, sizeof(struct udphdr));
+        /* Short packet? */
+        if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+                PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+                       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+                goto error;
+        }
+        /* Point to L2TP header */
+        optr = ptr = skb->data;
+        /* Trace packet contents, if enabled */
+        if (tunnel->debug & L2TP_MSG_DATA) {
+                length = min(32u, skb->len);
+                if (!pskb_may_pull(skb, length))
+                        goto error;
+                printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+                offset = 0;
+                do {
+                        printk(" %02X", ptr[offset]);
+                } while (++offset < length);
+                printk("\n");
+        }
+        /* Get L2TP header flags */
+        hdrflags = ntohs(*(__be16 *) ptr);
+        /* Check protocol version */
+        version = hdrflags & L2TP_HDR_VER_MASK;
+        if (version != tunnel->version) {
+                PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+                       "%s: recv protocol version mismatch: got %d expected %d\n",
+                       tunnel->name, version, tunnel->version);
+                goto error;
+        }
+        /* Get length of L2TP packet */
+        length = skb->len;
+        /* If type is control packet, it is handled by userspace. */
+        if (hdrflags & L2TP_HDRFLAG_T) {
+                PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+                       "%s: recv control packet, len=%d\n", tunnel->name, length);
+                goto error;
+        }
+        /* Skip flags */
+        ptr += 2;
+        if (tunnel->version == L2TP_HDR_VER_2) {
+                /* If length is present, skip it */
+                if (hdrflags & L2TP_HDRFLAG_L)
+                        ptr += 2;
+                /* Extract tunnel and session ID */
+                tunnel_id = ntohs(*(__be16 *) ptr);
+                ptr += 2;
+                session_id = ntohs(*(__be16 *) ptr);
+                ptr += 2;
+        } else {
+                ptr += 2;       /* skip reserved bits */
+                tunnel_id = tunnel->tunnel_id;
+                session_id = ntohl(*(__be32 *) ptr);
+                ptr += 4;
+        }
+        /* Find the session context */
+        session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+        if (!session || !session->recv_skb) {
+                /* Not found? Pass to userspace to deal with */
+                PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+                       "%s: no session found (%u/%u). Passing up.\n",
+                       tunnel->name, tunnel_id, session_id);
+                goto error;
+        }
+        l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
+        return 0;
+discard_bad_csum:
+        LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
+        UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
+        tunnel->stats.rx_errors++;
+        kfree_skb(skb);
+        return 0;
+error:
+        /* Put UDP header back */
+        __skb_push(skb, sizeof(struct udphdr));
+        return 1;
+}
+EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
+/* UDP encapsulation receive handler. See net/ipv4/udp.c.
+ * Return codes:
+ * 0 : success.
+ * <0: error
+ * >0: skb should be passed up to userspace as UDP.
+ */
+int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+        struct l2tp_tunnel *tunnel;
+        tunnel = l2tp_sock_to_tunnel(sk);
+        if (tunnel == NULL)
+                goto pass_up;
+        PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+               "%s: received %d bytes\n", tunnel->name, skb->len);
+        if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook))
+                goto pass_up_put;
+        sock_put(sk);
+        return 0;
+pass_up_put:
+        sock_put(sk);
+pass_up:
+        return 1;
+}
+EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+/* Build an L2TP header for the session into the buffer provided.
+ */
+static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
+{
+        struct l2tp_tunnel *tunnel = session->tunnel;
+        __be16 *bufp = buf;
+        __be16 *optr = buf;
+        u16 flags = L2TP_HDR_VER_2;
+        u32 tunnel_id = tunnel->peer_tunnel_id;
+        u32 session_id = session->peer_session_id;
+        if (session->send_seq)
+                flags |= L2TP_HDRFLAG_S;
+        /* Setup L2TP header. */
+        *bufp++ = htons(flags);
+        *bufp++ = htons(tunnel_id);
+        *bufp++ = htons(session_id);
+        if (session->send_seq) {
+                *bufp++ = htons(session->ns);
+                *bufp++ = 0;
+                session->ns++;
+                session->ns &= 0xffff;
+                PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+                       "%s: updated ns to %u\n", session->name, session->ns);
+        }
+        return bufp - optr;
+}
+static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
+{
+        struct l2tp_tunnel *tunnel = session->tunnel;
+        char *bufp = buf;
+        char *optr = bufp;
+        /* Setup L2TP header. The header differs slightly for UDP and
+         * IP encapsulations. For UDP, there is 4 bytes of flags.
+         */
+        if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+                u16 flags = L2TP_HDR_VER_3;
+                *((__be16 *) bufp) = htons(flags);
+                bufp += 2;
+                *((__be16 *) bufp) = 0;
+                bufp += 2;
+        }
+        *((__be32 *) bufp) = htonl(session->peer_session_id);
+        bufp += 4;
+        if (session->cookie_len) {
+                memcpy(bufp, &session->cookie[0], session->cookie_len);
+                bufp += session->cookie_len;
+        }
+        if (session->l2specific_len) {
+                if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
+                        u32 l2h = 0;
+                        if (session->send_seq) {
+                                l2h = 0x40000000 | session->ns;
+                                session->ns++;
+                                session->ns &= 0xffffff;
+                                PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+                                       "%s: updated ns to %u\n", session->name, session->ns);
+                        }
+                        *((__be32 *) bufp) = htonl(l2h);
+                }
+                bufp += session->l2specific_len;
+        }
+        if (session->offset)
+                bufp += session->offset;
+        return bufp - optr;
+}
+int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
+{
+        struct l2tp_tunnel *tunnel = session->tunnel;
+        unsigned int len = skb->len;
+        int error;
+        /* Debug */
+        if (session->send_seq)
+                PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
+                       "%s: send %Zd bytes, ns=%u\n", session->name,
+                       data_len, session->ns - 1);
+        else
+                PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
+                       "%s: send %Zd bytes\n", session->name, data_len);
+        if (session->debug & L2TP_MSG_DATA) {
+                int i;
+                int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+                unsigned char *datap = skb->data + uhlen;
+                printk(KERN_DEBUG "%s: xmit:", session->name);
+                for (i = 0; i < (len - uhlen); i++) {
+                        printk(" %02X", *datap++);
+                        if (i == 31) {
+                                printk(" ...");
+                                break;
+                        }
+                }
+                printk("\n");
+        }
+        /* Queue the packet to IP for output */
+        error = ip_queue_xmit(skb, 1);
+        /* Update stats */
+        if (error >= 0) {
+                tunnel->stats.tx_packets++;
+                tunnel->stats.tx_bytes += len;
+                session->stats.tx_packets++;
+                session->stats.tx_bytes += len;
+        } else {
+                tunnel->stats.tx_errors++;
+                session->stats.tx_errors++;
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_xmit_core);
+/* Automatically called when the skb is freed.
+ */
+static void l2tp_sock_wfree(struct sk_buff *skb)
+{
+        sock_put(skb->sk);
+}
+/* For data skbs that we transmit, we associate with the tunnel socket
+ * but don't do accounting.
+ */
+static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+        sock_hold(sk);
+        skb->sk = sk;
+        skb->destructor = l2tp_sock_wfree;
+}
+/* If caller requires the skb to have a ppp header, the header must be
+ * inserted in the skb data before calling this function.
+ */
+int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
+{
+        int data_len = skb->len;
+        struct l2tp_tunnel *tunnel = session->tunnel;
+        struct sock *sk = tunnel->sock;
+        struct udphdr *uh;
+        struct inet_sock *inet;
+        __wsum csum;
+        int old_headroom;
+        int new_headroom;
+        int headroom;
+        int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+        int udp_len;
+        /* Check that there's enough headroom in the skb to insert IP,
+         * UDP and L2TP headers. If not enough, expand it to
+         * make room. Adjust truesize.
+         */
+        headroom = NET_SKB_PAD + sizeof(struct iphdr) +
+                uhlen + hdr_len;
+        old_headroom = skb_headroom(skb);
+        if (skb_cow_head(skb, headroom))
+                goto abort;
+        new_headroom = skb_headroom(skb);
+        skb_orphan(skb);
+        skb->truesize += new_headroom - old_headroom;
+        /* Setup L2TP header */
+        session->build_header(session, __skb_push(skb, hdr_len));
+        /* Reset skb netfilter state */
+        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+                              IPSKB_REROUTED);
+        nf_reset(skb);
+        /* Get routing info from the tunnel socket */
+        skb_dst_drop(skb);
+        skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
+        switch (tunnel->encap) {
+        case L2TP_ENCAPTYPE_UDP:
+                /* Setup UDP header */
+                inet = inet_sk(sk);
+                __skb_push(skb, sizeof(*uh));
+                skb_reset_transport_header(skb);
+                uh = udp_hdr(skb);
+                uh->source = inet->inet_sport;
+                uh->dest = inet->inet_dport;
+                udp_len = uhlen + hdr_len + data_len;
+                uh->len = htons(udp_len);
+                uh->check = 0;
+                /* Calculate UDP checksum if configured to do so */
+                if (sk->sk_no_check == UDP_CSUM_NOXMIT)
+                        skb->ip_summed = CHECKSUM_NONE;
+                else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+                         (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+                        skb->ip_summed = CHECKSUM_COMPLETE;
+                        csum = skb_checksum(skb, 0, udp_len, 0);
+                        uh->check = csum_tcpudp_magic(inet->inet_saddr,
+                                                      inet->inet_daddr,
+                                                      udp_len, IPPROTO_UDP, csum);
+                        if (uh->check == 0)
+                                uh->check = CSUM_MANGLED_0;
+                } else {
+                        skb->ip_summed = CHECKSUM_PARTIAL;
+                        skb->csum_start = skb_transport_header(skb) - skb->head;
+                        skb->csum_offset = offsetof(struct udphdr, check);
+                        uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+                                                       inet->inet_daddr,
+                                                       udp_len, IPPROTO_UDP, 0);
+                }
+                break;
+        case L2TP_ENCAPTYPE_IP:
+                break;
+        }
+        l2tp_skb_set_owner_w(skb, sk);
+        l2tp_xmit_core(session, skb, data_len);
+abort:
+        return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
+/*****************************************************************************
+ * Tinnel and session create/destroy.
+ *****************************************************************************/
+/* Tunnel socket destruct hook.
+ * The tunnel context is deleted only when all session sockets have been
+ * closed.
+ */
+void l2tp_tunnel_destruct(struct sock *sk)
+{
+        struct l2tp_tunnel *tunnel;
+        tunnel = sk->sk_user_data;
+        if (tunnel == NULL)
+                goto end;
+        PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+               "%s: closing...\n", tunnel->name);
+        /* Close all sessions */
+        l2tp_tunnel_closeall(tunnel);
+        switch (tunnel->encap) {
+        case L2TP_ENCAPTYPE_UDP:
+                /* No longer an encapsulation socket. See net/ipv4/udp.c */
+                (udp_sk(sk))->encap_type = 0;
+                (udp_sk(sk))->encap_rcv = NULL;
+                break;
+        case L2TP_ENCAPTYPE_IP:
+                break;
+        }
+        /* Remove hooks into tunnel socket */
+        tunnel->sock = NULL;
+        sk->sk_destruct = tunnel->old_sk_destruct;
+        sk->sk_user_data = NULL;
+        /* Call the original destructor */
+        if (sk->sk_destruct)
+                (*sk->sk_destruct)(sk);
+        /* We're finished with the socket */
+        l2tp_tunnel_dec_refcount(tunnel);
+end:
+        return;
+}
+EXPORT_SYMBOL(l2tp_tunnel_destruct);
+/* When the tunnel is closed, all the attached sessions need to go too.
+ */
+void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+{
+        int hash;
+        struct hlist_node *walk;
+        struct hlist_node *tmp;
+        struct l2tp_session *session;
+        BUG_ON(tunnel == NULL);
+        PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+               "%s: closing all sessions...\n", tunnel->name);
+        write_lock_bh(&tunnel->hlist_lock);
+        for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+again:
+                hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+                        session = hlist_entry(walk, struct l2tp_session, hlist);
+                        PRINTK(session->debug, L2TP_MSG_CONTROL, KERN_INFO,
+                               "%s: closing session\n", session->name);
+                        hlist_del_init(&session->hlist);
+                        /* Since we should hold the sock lock while
+                         * doing any unbinding, we need to release the
+                         * lock we're holding before taking that lock.
+                         * Hold a reference to the sock so it doesn't
+                         * disappear as we're jumping between locks.
+                         */
+                        if (session->ref != NULL)
+                                (*session->ref)(session);
+                        write_unlock_bh(&tunnel->hlist_lock);
+                        if (tunnel->version != L2TP_HDR_VER_2) {
+                                struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+                                spin_lock_bh(&pn->l2tp_session_hlist_lock);
+                                hlist_del_init_rcu(&session->global_hlist);
+                                spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+                                synchronize_rcu();
+                        }
+                        if (session->session_close != NULL)
+                                (*session->session_close)(session);
+                        if (session->deref != NULL)
+                                (*session->deref)(session);
+                        write_lock_bh(&tunnel->hlist_lock);
+                        /* Now restart from the beginning of this hash
+                         * chain.  We always remove a session from the
+                         * list so we are guaranteed to make forward
+                         * progress.
+                         */
+                        goto again;
+                }
+        }
+        write_unlock_bh(&tunnel->hlist_lock);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
+/* Really kill the tunnel.
+ * Come here only when all sessions have been cleared from the tunnel.
+ */
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+{
+        struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+        BUG_ON(atomic_read(&tunnel->ref_count) != 0);
+        BUG_ON(tunnel->sock != NULL);
+        PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+               "%s: free...\n", tunnel->name);
+        /* Remove from tunnel list */
+        spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+        list_del_rcu(&tunnel->list);
+        spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+        synchronize_rcu();
+        atomic_dec(&l2tp_tunnel_count);
+        kfree(tunnel);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
+/* Create a socket for the tunnel, if one isn't set up by
+ * userspace. This is used for static tunnels where there is no
+ * managing L2TP daemon.
+ */
+static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp)
+{
+        int err = -EINVAL;
+        struct sockaddr_in udp_addr;
+        struct sockaddr_l2tpip ip_addr;
+        struct socket *sock = NULL;
+        switch (cfg->encap) {
+        case L2TP_ENCAPTYPE_UDP:
+                err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp);
+                if (err < 0)
+                        goto out;
+                sock = *sockp;
+                memset(&udp_addr, 0, sizeof(udp_addr));
+                udp_addr.sin_family = AF_INET;
+                udp_addr.sin_addr = cfg->local_ip;
+                udp_addr.sin_port = htons(cfg->local_udp_port);
+                err = kernel_bind(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr));
+                if (err < 0)
+                        goto out;
+                udp_addr.sin_family = AF_INET;
+                udp_addr.sin_addr = cfg->peer_ip;
+                udp_addr.sin_port = htons(cfg->peer_udp_port);
+                err = kernel_connect(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr), 0);
+                if (err < 0)
+                        goto out;
+                if (!cfg->use_udp_checksums)
+                        sock->sk->sk_no_check = UDP_CSUM_NOXMIT;
+                break;
+        case L2TP_ENCAPTYPE_IP:
+                err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP, sockp);
+                if (err < 0)
+                        goto out;
+                sock = *sockp;
+                memset(&ip_addr, 0, sizeof(ip_addr));
+                ip_addr.l2tp_family = AF_INET;
+                ip_addr.l2tp_addr = cfg->local_ip;
+                ip_addr.l2tp_conn_id = tunnel_id;
+                err = kernel_bind(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr));
+                if (err < 0)
+                        goto out;
+                ip_addr.l2tp_family = AF_INET;
+                ip_addr.l2tp_addr = cfg->peer_ip;
+                ip_addr.l2tp_conn_id = peer_tunnel_id;
+                err = kernel_connect(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr), 0);
+                if (err < 0)
+                        goto out;
+                break;
+        default:
+                goto out;
+        }
+out:
+        if ((err < 0) && sock) {
+                sock_release(sock);
+                *sockp = NULL;
+        }
+        return err;
+}
+int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
+{
+        struct l2tp_tunnel *tunnel = NULL;
+        int err;
+        struct socket *sock = NULL;
+        struct sock *sk = NULL;
+        struct l2tp_net *pn;
+        enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
+        /* Get the tunnel socket from the fd, which was opened by
+         * the userspace L2TP daemon. If not specified, create a
+         * kernel socket.
+         */
+        if (fd < 0) {
+                err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock);
+                if (err < 0)
+                        goto err;
+        } else {
+                err = -EBADF;
+                sock = sockfd_lookup(fd, &err);
+                if (!sock) {
+                        printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+                               tunnel_id, fd, err);
+                        goto err;
+                }
+        }
+        sk = sock->sk;
+        if (cfg != NULL)
+                encap = cfg->encap;
+        /* Quick sanity checks */
+        switch (encap) {
+        case L2TP_ENCAPTYPE_UDP:
+                err = -EPROTONOSUPPORT;
+                if (sk->sk_protocol != IPPROTO_UDP) {
+                        printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+                               tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+                        goto err;
+                }
+                break;
+        case L2TP_ENCAPTYPE_IP:
+                err = -EPROTONOSUPPORT;
+                if (sk->sk_protocol != IPPROTO_L2TP) {
+                        printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+                               tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
+                        goto err;
+                }
+                break;
+        }
+        /* Check if this socket has already been prepped */
+        tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
+        if (tunnel != NULL) {
+                /* This socket has already been prepped */
+                err = -EBUSY;
+                goto err;
+        }
+        tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
+        if (tunnel == NULL) {
+                err = -ENOMEM;
+                goto err;
+        }
+        tunnel->version = version;
+        tunnel->tunnel_id = tunnel_id;
+        tunnel->peer_tunnel_id = peer_tunnel_id;
+        tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
+        tunnel->magic = L2TP_TUNNEL_MAGIC;
+        sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
+        rwlock_init(&tunnel->hlist_lock);
+        /* The net we belong to */
+        tunnel->l2tp_net = net;
+        pn = l2tp_pernet(net);
+        if (cfg != NULL)
+                tunnel->debug = cfg->debug;
+        /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+        tunnel->encap = encap;
+        if (encap == L2TP_ENCAPTYPE_UDP) {
+                /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+                udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
+                udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+        }
+        sk->sk_user_data = tunnel;
+        /* Hook on the tunnel socket destructor so that we can cleanup
+         * if the tunnel socket goes away.
+         */
+        tunnel->old_sk_destruct = sk->sk_destruct;
+        sk->sk_destruct = &l2tp_tunnel_destruct;
+        tunnel->sock = sk;
+        sk->sk_allocation = GFP_ATOMIC;
+        /* Add tunnel to our list */
+        INIT_LIST_HEAD(&tunnel->list);
+        spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+        list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
+        spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+        synchronize_rcu();
+        atomic_inc(&l2tp_tunnel_count);
+        /* Bump the reference count. The tunnel context is deleted
+         * only when this drops to zero.
+         */
+        l2tp_tunnel_inc_refcount(tunnel);
+        err = 0;
+err:
+        if (tunnelp)
+                *tunnelp = tunnel;
+        /* If tunnel's socket was created by the kernel, it doesn't
+         *  have a file.
+         */
+        if (sock && sock->file)
+                sockfd_put(sock);
+        return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+/* This function is used by the netlink TUNNEL_DELETE command.
+ */
+int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+{
+        int err = 0;
+        struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL;
+        /* Force the tunnel socket to close. This will eventually
+         * cause the tunnel to be deleted via the normal socket close
+         * mechanisms when userspace closes the tunnel socket.
+         */
+        if (sock != NULL) {
+                err = inet_shutdown(sock, 2);
+                /* If the tunnel's socket was created by the kernel,
+                 * close the socket here since the socket was not
+                 * created by userspace.
+                 */
+                if (sock->file == NULL)
+                        err = inet_release(sock);
+        }
+        return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
+/* Really kill the session.
+ */
+void l2tp_session_free(struct l2tp_session *session)
+{
+        struct l2tp_tunnel *tunnel;
+        BUG_ON(atomic_read(&session->ref_count) != 0);
+        tunnel = session->tunnel;
+        if (tunnel != NULL) {
+                BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+                /* Delete the session from the hash */
+                write_lock_bh(&tunnel->hlist_lock);
+                hlist_del_init(&session->hlist);
+                write_unlock_bh(&tunnel->hlist_lock);
+                /* Unlink from the global hash if not L2TPv2 */
+                if (tunnel->version != L2TP_HDR_VER_2) {
+                        struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+                        spin_lock_bh(&pn->l2tp_session_hlist_lock);
+                        hlist_del_init_rcu(&session->global_hlist);
+                        spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+                        synchronize_rcu();
+                }
+                if (session->session_id != 0)
+                        atomic_dec(&l2tp_session_count);
+                sock_put(tunnel->sock);
+                /* This will delete the tunnel context if this
+                 * is the last session on the tunnel.
+                 */
+                session->tunnel = NULL;
+                l2tp_tunnel_dec_refcount(tunnel);
+        }
+        kfree(session);
+        return;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_free);
+/* This function is used by the netlink SESSION_DELETE command and by
+   pseudowire modules.
+ */
+int l2tp_session_delete(struct l2tp_session *session)
+{
+        if (session->session_close != NULL)
+                (*session->session_close)(session);
+        l2tp_session_dec_refcount(session);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_delete);
+/* We come here whenever a session's send_seq, cookie_len or
+ * l2specific_len parameters are set.
+ */
+void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+{
+        if (version == L2TP_HDR_VER_2) {
+                session->hdr_len = 6;
+                if (session->send_seq)
+                        session->hdr_len += 4;
+        } else {
+                session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset;
+                if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
+                        session->hdr_len += 4;
+        }
+}
+EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
+struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+        struct l2tp_session *session;
+        session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
+        if (session != NULL) {
+                session->magic = L2TP_SESSION_MAGIC;
+                session->tunnel = tunnel;
+                session->session_id = session_id;
+                session->peer_session_id = peer_session_id;
+                session->nr = 1;
+                sprintf(&session->name[0], "sess %u/%u",
+                        tunnel->tunnel_id, session->session_id);
+                skb_queue_head_init(&session->reorder_q);
+                INIT_HLIST_NODE(&session->hlist);
+                INIT_HLIST_NODE(&session->global_hlist);
+                /* Inherit debug options from tunnel */
+                session->debug = tunnel->debug;
+                if (cfg) {
+                        session->pwtype = cfg->pw_type;
+                        session->debug = cfg->debug;
+                        session->mtu = cfg->mtu;
+                        session->mru = cfg->mru;
+                        session->send_seq = cfg->send_seq;
+                        session->recv_seq = cfg->recv_seq;
+                        session->lns_mode = cfg->lns_mode;
+                        session->reorder_timeout = cfg->reorder_timeout;
+                        session->offset = cfg->offset;
+                        session->l2specific_type = cfg->l2specific_type;
+                        session->l2specific_len = cfg->l2specific_len;
+                        session->cookie_len = cfg->cookie_len;
+                        memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
+                        session->peer_cookie_len = cfg->peer_cookie_len;
+                        memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
+                }
+                if (tunnel->version == L2TP_HDR_VER_2)
+                        session->build_header = l2tp_build_l2tpv2_header;
+                else
+                        session->build_header = l2tp_build_l2tpv3_header;
+                l2tp_session_set_header_len(session, tunnel->version);
+                /* Bump the reference count. The session context is deleted
+                 * only when this drops to zero.
+                 */
+                l2tp_session_inc_refcount(session);
+                l2tp_tunnel_inc_refcount(tunnel);
+                /* Ensure tunnel socket isn't deleted */
+                sock_hold(tunnel->sock);
+                /* Add session to the tunnel's hash list */
+                write_lock_bh(&tunnel->hlist_lock);
+                hlist_add_head(&session->hlist,
+                               l2tp_session_id_hash(tunnel, session_id));
+                write_unlock_bh(&tunnel->hlist_lock);
+                /* And to the global session list if L2TPv3 */
+                if (tunnel->version != L2TP_HDR_VER_2) {
+                        struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+                        spin_lock_bh(&pn->l2tp_session_hlist_lock);
+                        hlist_add_head_rcu(&session->global_hlist,
+                                           l2tp_session_id_hash_2(pn, session_id));
+                        spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+                        synchronize_rcu();
+                }
+                /* Ignore management session in session count value */
+                if (session->session_id != 0)
+                        atomic_inc(&l2tp_session_count);
+        }
+        return session;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_create);
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+static __net_init int l2tp_init_net(struct net *net)
+{
+        struct l2tp_net *pn;
+        int err;
+        int hash;
+        pn = kzalloc(sizeof(*pn), GFP_KERNEL);
+        if (!pn)
+                return -ENOMEM;
+        INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
+        spin_lock_init(&pn->l2tp_tunnel_list_lock);
+        for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
+                INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
+        spin_lock_init(&pn->l2tp_session_hlist_lock);
+        err = net_assign_generic(net, l2tp_net_id, pn);
+        if (err)
+                goto out;
+        return 0;
+out:
+        kfree(pn);
+        return err;
+}
+static __net_exit void l2tp_exit_net(struct net *net)
+{
+        struct l2tp_net *pn;
+        pn = net_generic(net, l2tp_net_id);
+        /*
+         * if someone has cached our net then
+         * further net_generic call will return NULL
+         */
+        net_assign_generic(net, l2tp_net_id, NULL);
+        kfree(pn);
+}
+static struct pernet_operations l2tp_net_ops = {
+        .init = l2tp_init_net,
+        .exit = l2tp_exit_net,
+        .id   = &l2tp_net_id,
+        .size = sizeof(struct l2tp_net),
+};
+static int __init l2tp_init(void)
+{
+        int rc = 0;
+        rc = register_pernet_device(&l2tp_net_ops);
+        if (rc)
+                goto out;
+        printk(KERN_INFO "L2TP core driver, %s\n", L2TP_DRV_VERSION);
+out:
+        return rc;
+}
+static void __exit l2tp_exit(void)
+{
+        unregister_pernet_device(&l2tp_net_ops);
+}
+module_init(l2tp_init);
+module_exit(l2tp_exit);
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP core");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(L2TP_DRV_VERSION);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
new file mode 100644
index 000000000000..f0f318edd3f1
--- /dev/null
+++ b/net/l2tp/l2tp_core.h
@@ -0,0 +1,304 @@
+/*
+ * L2TP internal definitions.
+ *
+ * Copyright (c) 2008,2009 Katalix Systems Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _L2TP_CORE_H_
+#define _L2TP_CORE_H_
+/* Just some random numbers */
+#define L2TP_TUNNEL_MAGIC       0x42114DDA
+#define L2TP_SESSION_MAGIC      0x0C04EB7D
+/* Per tunnel, session hash table size */
+#define L2TP_HASH_BITS  4
+#define L2TP_HASH_SIZE  (1 << L2TP_HASH_BITS)
+/* System-wide, session hash table size */
+#define L2TP_HASH_BITS_2        8
+#define L2TP_HASH_SIZE_2        (1 << L2TP_HASH_BITS_2)
+/* Debug message categories for the DEBUG socket option */
+enum {
+        L2TP_MSG_DEBUG          = (1 << 0),     /* verbose debug (if
+                                                 * compiled in) */
+        L2TP_MSG_CONTROL        = (1 << 1),     /* userspace - kernel
+                                                 * interface */
+        L2TP_MSG_SEQ            = (1 << 2),     /* sequence numbers */
+        L2TP_MSG_DATA           = (1 << 3),     /* data packets */
+};
+struct sk_buff;
+struct l2tp_stats {
+        u64                     tx_packets;
+        u64                     tx_bytes;
+        u64                     tx_errors;
+        u64                     rx_packets;
+        u64                     rx_bytes;
+        u64                     rx_seq_discards;
+        u64                     rx_oos_packets;
+        u64                     rx_errors;
+        u64                     rx_cookie_discards;
+};
+struct l2tp_tunnel;
+/* Describes a session. Contains information to determine incoming
+ * packets and transmit outgoing ones.
+ */
+struct l2tp_session_cfg {
+        enum l2tp_pwtype        pw_type;
+        unsigned                data_seq:2;     /* data sequencing level
+                                                 * 0 => none, 1 => IP only,
+                                                 * 2 => all
+                                                 */
+        unsigned                recv_seq:1;     /* expect receive packets with
+                                                 * sequence numbers? */
+        unsigned                send_seq:1;     /* send packets with sequence
+                                                 * numbers? */
+        unsigned                lns_mode:1;     /* behave as LNS? LAC enables
+                                                 * sequence numbers under
+                                                 * control of LNS. */
+        int                     debug;          /* bitmask of debug message
+                                                 * categories */
+        u16                     vlan_id;        /* VLAN pseudowire only */
+        u16                     offset;         /* offset to payload */
+        u16                     l2specific_len; /* Layer 2 specific length */
+        u16                     l2specific_type; /* Layer 2 specific type */
+        u8                      cookie[8];      /* optional cookie */
+        int                     cookie_len;     /* 0, 4 or 8 bytes */
+        u8                      peer_cookie[8]; /* peer's cookie */
+        int                     peer_cookie_len; /* 0, 4 or 8 bytes */
+        int                     reorder_timeout; /* configured reorder timeout
+                                                  * (in jiffies) */
+        int                     mtu;
+        int                     mru;
+        char                    *ifname;
+};
+struct l2tp_session {
+        int                     magic;          /* should be
+                                                 * L2TP_SESSION_MAGIC */
+        struct l2tp_tunnel      *tunnel;        /* back pointer to tunnel
+                                                 * context */
+        u32                     session_id;
+        u32                     peer_session_id;
+        u8                      cookie[8];
+        int                     cookie_len;
+        u8                      peer_cookie[8];
+        int                     peer_cookie_len;
+        u16                     offset;         /* offset from end of L2TP header
+                                                   to beginning of data */
+        u16                     l2specific_len;
+        u16                     l2specific_type;
+        u16                     hdr_len;
+        u32                     nr;             /* session NR state (receive) */
+        u32                     ns;             /* session NR state (send) */
+        struct sk_buff_head     reorder_q;      /* receive reorder queue */
+        struct hlist_node       hlist;          /* Hash list node */
+        atomic_t                ref_count;
+        char                    name[32];       /* for logging */
+        char                    ifname[IFNAMSIZ];
+        unsigned                data_seq:2;     /* data sequencing level
+                                                 * 0 => none, 1 => IP only,
+                                                 * 2 => all
+                                                 */
+        unsigned                recv_seq:1;     /* expect receive packets with
+                                                 * sequence numbers? */
+        unsigned                send_seq:1;     /* send packets with sequence
+                                                 * numbers? */
+        unsigned                lns_mode:1;     /* behave as LNS? LAC enables
+                                                 * sequence numbers under
+                                                 * control of LNS. */
+        int                     debug;          /* bitmask of debug message
+                                                 * categories */
+        int                     reorder_timeout; /* configured reorder timeout
+                                                  * (in jiffies) */
+        int                     mtu;
+        int                     mru;
+        enum l2tp_pwtype        pwtype;
+        struct l2tp_stats       stats;
+        struct hlist_node       global_hlist;   /* Global hash list node */
+        int (*build_header)(struct l2tp_session *session, void *buf);
+        void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
+        void (*session_close)(struct l2tp_session *session);
+        void (*ref)(struct l2tp_session *session);
+        void (*deref)(struct l2tp_session *session);
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+        void (*show)(struct seq_file *m, void *priv);
+#endif
+        uint8_t                 priv[0];        /* private data */
+};
+/* Describes the tunnel. It contains info to track all the associated
+ * sessions so incoming packets can be sorted out
+ */
+struct l2tp_tunnel_cfg {
+        int                     debug;          /* bitmask of debug message
+                                                 * categories */
+        enum l2tp_encap_type    encap;
+        /* Used only for kernel-created sockets */
+        struct in_addr          local_ip;
+        struct in_addr          peer_ip;
+        u16                     local_udp_port;
+        u16                     peer_udp_port;
+        unsigned int            use_udp_checksums:1;
+};
+struct l2tp_tunnel {
+        int                     magic;          /* Should be L2TP_TUNNEL_MAGIC */
+        rwlock_t                hlist_lock;     /* protect session_hlist */
+        struct hlist_head       session_hlist[L2TP_HASH_SIZE];
+                                                /* hashed list of sessions,
+                                                 * hashed by id */
+        u32                     tunnel_id;
+        u32                     peer_tunnel_id;
+        int                     version;        /* 2=>L2TPv2, 3=>L2TPv3 */
+        char                    name[20];       /* for logging */
+        int                     debug;          /* bitmask of debug message
+                                                 * categories */
+        enum l2tp_encap_type    encap;
+        struct l2tp_stats       stats;
+        struct list_head        list;           /* Keep a list of all tunnels */
+        struct net              *l2tp_net;      /* the net we belong to */
+        atomic_t                ref_count;
+#ifdef CONFIG_DEBUG_FS
+        void (*show)(struct seq_file *m, void *arg);
+#endif
+        int (*recv_payload_hook)(struct sk_buff *skb);
+        void (*old_sk_destruct)(struct sock *);
+        struct sock             *sock;          /* Parent socket */
+        int                     fd;
+        uint8_t                 priv[0];        /* private data */
+};
+struct l2tp_nl_cmd_ops {
+        int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+        int (*session_delete)(struct l2tp_session *session);
+};
+static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
+{
+        return &tunnel->priv[0];
+}
+static inline void *l2tp_session_priv(struct l2tp_session *session)
+{
+        return &session->priv[0];
+}
+static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk)
+{
+        struct l2tp_tunnel *tunnel;
+        if (sk == NULL)
+                return NULL;
+        sock_hold(sk);
+        tunnel = (struct l2tp_tunnel *)(sk->sk_user_data);
+        if (tunnel == NULL) {
+                sock_put(sk);
+                goto out;
+        }
+        BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+out:
+        return tunnel;
+}
+extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
+extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
+extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
+extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
+extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
+extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern int l2tp_session_delete(struct l2tp_session *session);
+extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
+extern void l2tp_session_free(struct l2tp_session *session);
+extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
+extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
+extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
+extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
+extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
+extern void l2tp_tunnel_destruct(struct sock *sk);
+extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
+extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
+extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
+/* Tunnel reference counts. Incremented per session that is added to
+ * the tunnel.
+ */
+static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
+{
+        atomic_inc(&tunnel->ref_count);
+}
+static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
+{
+        if (atomic_dec_and_test(&tunnel->ref_count))
+                l2tp_tunnel_free(tunnel);
+}
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_tunnel_inc_refcount(_t) do { \
+                printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+                l2tp_tunnel_inc_refcount_1(_t);                         \
+        } while (0)
+#define l2tp_tunnel_dec_refcount(_t) do { \
+                printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+                l2tp_tunnel_dec_refcount_1(_t);                         \
+        } while (0)
+#else
+#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
+#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
+#endif
+/* Session reference counts. Incremented when code obtains a reference
+ * to a session.
+ */
+static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session)
+{
+        atomic_inc(&session->ref_count);
+}
+static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session)
+{
+        if (atomic_dec_and_test(&session->ref_count))
+                l2tp_session_free(session);
+}
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_session_inc_refcount(_s) do { \
+                printk(KERN_DEBUG "l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
+                l2tp_session_inc_refcount_1(_s);                                \
+        } while (0)
+#define l2tp_session_dec_refcount(_s) do { \
+                printk(KERN_DEBUG "l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
+                l2tp_session_dec_refcount_1(_s);                                \
+        } while (0)
+#else
+#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
+#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
+#endif
+#endif /* _L2TP_CORE_H_ */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
new file mode 100644
index 000000000000..908f10f9720e
--- /dev/null
+++ b/net/l2tp/l2tp_debugfs.c
@@ -0,0 +1,341 @@
+/*
+ * L2TP subsystem debugfs
+ *
+ * Copyright (c) 2010 Katalix Systems Ltd
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include "l2tp_core.h"
+static struct dentry *rootdir;
+static struct dentry *tunnels;
+struct l2tp_dfs_seq_data {
+        struct net *net;
+        int tunnel_idx;                 /* current tunnel */
+        int session_idx;                /* index of session within current tunnel */
+        struct l2tp_tunnel *tunnel;
+        struct l2tp_session *session;   /* NULL means get next tunnel */
+};
+static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
+{
+        pd->tunnel = l2tp_tunnel_find_nth(pd->net, pd->tunnel_idx);
+        pd->tunnel_idx++;
+}
+static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
+{
+        pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+        pd->session_idx++;
+        if (pd->session == NULL) {
+                pd->session_idx = 0;
+                l2tp_dfs_next_tunnel(pd);
+        }
+}
+static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs)
+{
+        struct l2tp_dfs_seq_data *pd = SEQ_START_TOKEN;
+        loff_t pos = *offs;
+        if (!pos)
+                goto out;
+        BUG_ON(m->private == NULL);
+        pd = m->private;
+        if (pd->tunnel == NULL)
+                l2tp_dfs_next_tunnel(pd);
+        else
+                l2tp_dfs_next_session(pd);
+        /* NULL tunnel and session indicates end of list */
+        if ((pd->tunnel == NULL) && (pd->session == NULL))
+                pd = NULL;
+out:
+        return pd;
+}
+static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+        (*pos)++;
+        return NULL;
+}
+static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
+{
+        /* nothing to do */
+}
+static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
+{
+        struct l2tp_tunnel *tunnel = v;
+        int session_count = 0;
+        int hash;
+        struct hlist_node *walk;
+        struct hlist_node *tmp;
+        read_lock_bh(&tunnel->hlist_lock);
+        for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+                hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+                        struct l2tp_session *session;
+                        session = hlist_entry(walk, struct l2tp_session, hlist);
+                        if (session->session_id == 0)
+                                continue;
+                        session_count++;
+                }
+        }
+        read_unlock_bh(&tunnel->hlist_lock);
+        seq_printf(m, "\nTUNNEL %u peer %u", tunnel->tunnel_id, tunnel->peer_tunnel_id);
+        if (tunnel->sock) {
+                struct inet_sock *inet = inet_sk(tunnel->sock);
+                seq_printf(m, " from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n",
+                           NIPQUAD(inet->inet_saddr), NIPQUAD(inet->inet_daddr));
+                if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+                        seq_printf(m, " source port %hu, dest port %hu\n",
+                                   ntohs(inet->inet_sport), ntohs(inet->inet_dport));
+        }
+        seq_printf(m, " L2TPv%d, %s\n", tunnel->version,
+                   tunnel->encap == L2TP_ENCAPTYPE_UDP ? "UDP" :
+                   tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" :
+                   "");
+        seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count,
+                   tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
+                   atomic_read(&tunnel->ref_count));
+        seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+                   tunnel->debug,
+                   (unsigned long long)tunnel->stats.tx_packets,
+                   (unsigned long long)tunnel->stats.tx_bytes,
+                   (unsigned long long)tunnel->stats.tx_errors,
+                   (unsigned long long)tunnel->stats.rx_packets,
+                   (unsigned long long)tunnel->stats.rx_bytes,
+                   (unsigned long long)tunnel->stats.rx_errors);
+        if (tunnel->show != NULL)
+                tunnel->show(m, tunnel);
+}
+static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
+{
+        struct l2tp_session *session = v;
+        seq_printf(m, "  SESSION %u, peer %u, %s\n", session->session_id,
+                   session->peer_session_id,
+                   session->pwtype == L2TP_PWTYPE_ETH ? "ETH" :
+                   session->pwtype == L2TP_PWTYPE_PPP ? "PPP" :
+                   "");
+        if (session->send_seq || session->recv_seq)
+                seq_printf(m, "   nr %hu, ns %hu\n", session->nr, session->ns);
+        seq_printf(m, "   refcnt %d\n", atomic_read(&session->ref_count));
+        seq_printf(m, "   config %d/%d/%c/%c/%s/%s %08x %u\n",
+                   session->mtu, session->mru,
+                   session->recv_seq ? 'R' : '-',
+                   session->send_seq ? 'S' : '-',
+                   session->data_seq == 1 ? "IPSEQ" :
+                   session->data_seq == 2 ? "DATASEQ" : "-",
+                   session->lns_mode ? "LNS" : "LAC",
+                   session->debug,
+                   jiffies_to_msecs(session->reorder_timeout));
+        seq_printf(m, "   offset %hu l2specific %hu/%hu\n",
+                   session->offset, session->l2specific_type, session->l2specific_len);
+        if (session->cookie_len) {
+                seq_printf(m, "   cookie %02x%02x%02x%02x",
+                           session->cookie[0], session->cookie[1],
+                           session->cookie[2], session->cookie[3]);
+                if (session->cookie_len == 8)
+                        seq_printf(m, "%02x%02x%02x%02x",
+                                   session->cookie[4], session->cookie[5],
+                                   session->cookie[6], session->cookie[7]);
+                seq_printf(m, "\n");
+        }
+        if (session->peer_cookie_len) {
+                seq_printf(m, "   peer cookie %02x%02x%02x%02x",
+                           session->peer_cookie[0], session->peer_cookie[1],
+                           session->peer_cookie[2], session->peer_cookie[3]);
+                if (session->peer_cookie_len == 8)
+                        seq_printf(m, "%02x%02x%02x%02x",
+                                   session->peer_cookie[4], session->peer_cookie[5],
+                                   session->peer_cookie[6], session->peer_cookie[7]);
+                seq_printf(m, "\n");
+        }
+        seq_printf(m, "   %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+                   session->nr, session->ns,
+                   (unsigned long long)session->stats.tx_packets,
+                   (unsigned long long)session->stats.tx_bytes,
+                   (unsigned long long)session->stats.tx_errors,
+                   (unsigned long long)session->stats.rx_packets,
+                   (unsigned long long)session->stats.rx_bytes,
+                   (unsigned long long)session->stats.rx_errors);
+        if (session->show != NULL)
+                session->show(m, session);
+}
+static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
+{
+        struct l2tp_dfs_seq_data *pd = v;
+        /* display header on line 1 */
+        if (v == SEQ_START_TOKEN) {
+                seq_puts(m, "TUNNEL ID, peer ID from IP to IP\n");
+                seq_puts(m, " L2TPv2/L2TPv3, UDP/IP\n");
+                seq_puts(m, " sessions session-count, refcnt refcnt/sk->refcnt\n");
+                seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+                seq_puts(m, "  SESSION ID, peer ID, PWTYPE\n");
+                seq_puts(m, "   refcnt cnt\n");
+                seq_puts(m, "   offset OFFSET l2specific TYPE/LEN\n");
+                seq_puts(m, "   [ cookie ]\n");
+                seq_puts(m, "   [ peer cookie ]\n");
+                seq_puts(m, "   config mtu/mru/rcvseq/sendseq/dataseq/lns debug reorderto\n");
+                seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+                goto out;
+        }
+        /* Show the tunnel or session context */
+        if (pd->session == NULL)
+                l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
+        else
+                l2tp_dfs_seq_session_show(m, pd->session);
+out:
+        return 0;
+}
+static const struct seq_operations l2tp_dfs_seq_ops = {
+        .start          = l2tp_dfs_seq_start,
+        .next           = l2tp_dfs_seq_next,
+        .stop           = l2tp_dfs_seq_stop,
+        .show           = l2tp_dfs_seq_show,
+};
+static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
+{
+        struct l2tp_dfs_seq_data *pd;
+        struct seq_file *seq;
+        int rc = -ENOMEM;
+        pd = kzalloc(GFP_KERNEL, sizeof(*pd));
+        if (pd == NULL)
+                goto out;
+        /* Derive the network namespace from the pid opening the
+         * file.
+         */
+        pd->net = get_net_ns_by_pid(current->pid);
+        if (IS_ERR(pd->net)) {
+                rc = -PTR_ERR(pd->net);
+                goto err_free_pd;
+        }
+        rc = seq_open(file, &l2tp_dfs_seq_ops);
+        if (rc)
+                goto err_free_net;
+        seq = file->private_data;
+        seq->private = pd;
+out:
+        return rc;
+err_free_net:
+        put_net(pd->net);
+err_free_pd:
+        kfree(pd);
+        goto out;
+}
+static int l2tp_dfs_seq_release(struct inode *inode, struct file *file)
+{
+        struct l2tp_dfs_seq_data *pd;
+        struct seq_file *seq;
+        seq = file->private_data;
+        pd = seq->private;
+        if (pd->net)
+                put_net(pd->net);
+        kfree(pd);
+        seq_release(inode, file);
+        return 0;
+}
+static const struct file_operations l2tp_dfs_fops = {
+        .owner          = THIS_MODULE,
+        .open           = l2tp_dfs_seq_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = l2tp_dfs_seq_release,
+};
+static int __init l2tp_debugfs_init(void)
+{
+        int rc = 0;
+        rootdir = debugfs_create_dir("l2tp", NULL);
+        if (IS_ERR(rootdir)) {
+                rc = PTR_ERR(rootdir);
+                rootdir = NULL;
+                goto out;
+        }
+        tunnels = debugfs_create_file("tunnels", 0600, rootdir, NULL, &l2tp_dfs_fops);
+        if (tunnels == NULL)
+                rc = -EIO;
+        printk(KERN_INFO "L2TP debugfs support\n");
+out:
+        if (rc)
+                printk(KERN_WARNING "l2tp debugfs: unable to init\n");
+        return rc;
+}
+static void __exit l2tp_debugfs_exit(void)
+{
+        debugfs_remove(tunnels);
+        debugfs_remove(rootdir);
+}
+module_init(l2tp_debugfs_init);
+module_exit(l2tp_debugfs_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP debugfs driver");
+MODULE_VERSION("1.0");
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
new file mode 100644
index 000000000000..ca1164afeb74
--- /dev/null
+++ b/net/l2tp/l2tp_eth.c
@@ -0,0 +1,361 @@
+/*
+ * L2TPv3 ethernet pseudowire driver
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include "l2tp_core.h"
+/* Default device name. May be overridden by name specified by user */
+#define L2TP_ETH_DEV_NAME       "l2tpeth%d"
+/* via netdev_priv() */
+struct l2tp_eth {
+        struct net_device       *dev;
+        struct sock             *tunnel_sock;
+        struct l2tp_session     *session;
+        struct list_head        list;
+};
+/* via l2tp_session_priv() */
+struct l2tp_eth_sess {
+        struct net_device       *dev;
+};
+/* per-net private data for this module */
+static unsigned int l2tp_eth_net_id;
+struct l2tp_eth_net {
+        struct list_head l2tp_eth_dev_list;
+        spinlock_t l2tp_eth_lock;
+};
+static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
+{
+        return net_generic(net, l2tp_eth_net_id);
+}
+static int l2tp_eth_dev_init(struct net_device *dev)
+{
+        struct l2tp_eth *priv = netdev_priv(dev);
+        priv->dev = dev;
+        random_ether_addr(dev->dev_addr);
+        memset(&dev->broadcast[0], 0xff, 6);
+        return 0;
+}
+static void l2tp_eth_dev_uninit(struct net_device *dev)
+{
+        struct l2tp_eth *priv = netdev_priv(dev);
+        struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
+        spin_lock(&pn->l2tp_eth_lock);
+        list_del_init(&priv->list);
+        spin_unlock(&pn->l2tp_eth_lock);
+        dev_put(dev);
+}
+static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+        struct l2tp_eth *priv = netdev_priv(dev);
+        struct l2tp_session *session = priv->session;
+        l2tp_xmit_skb(session, skb, session->hdr_len);
+        dev->stats.tx_bytes += skb->len;
+        dev->stats.tx_packets++;
+        return 0;
+}
+static struct net_device_ops l2tp_eth_netdev_ops = {
+        .ndo_init               = l2tp_eth_dev_init,
+        .ndo_uninit             = l2tp_eth_dev_uninit,
+        .ndo_start_xmit         = l2tp_eth_dev_xmit,
+};
+static void l2tp_eth_dev_setup(struct net_device *dev)
+{
+        ether_setup(dev);
+        dev->netdev_ops         = &l2tp_eth_netdev_ops;
+        dev->destructor         = free_netdev;
+}
+static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
+{
+        struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
+        struct net_device *dev = spriv->dev;
+        if (session->debug & L2TP_MSG_DATA) {
+                unsigned int length;
+                int offset;
+                u8 *ptr = skb->data;
+                length = min(32u, skb->len);
+                if (!pskb_may_pull(skb, length))
+                        goto error;
+                printk(KERN_DEBUG "%s: eth recv: ", session->name);
+                offset = 0;
+                do {
+                        printk(" %02X", ptr[offset]);
+                } while (++offset < length);
+                printk("\n");
+        }
+        if (data_len < ETH_HLEN)
+                goto error;
+        secpath_reset(skb);
+        /* checksums verified by L2TP */
+        skb->ip_summed = CHECKSUM_NONE;
+        skb_dst_drop(skb);
+        nf_reset(skb);
+        if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
+                dev->last_rx = jiffies;
+                dev->stats.rx_packets++;
+                dev->stats.rx_bytes += data_len;
+        } else
+                dev->stats.rx_errors++;
+        return;
+error:
+        dev->stats.rx_errors++;
+        kfree_skb(skb);
+}
+static void l2tp_eth_delete(struct l2tp_session *session)
+{
+        struct l2tp_eth_sess *spriv;
+        struct net_device *dev;
+        if (session) {
+                spriv = l2tp_session_priv(session);
+                dev = spriv->dev;
+                if (dev) {
+                        unregister_netdev(dev);
+                        spriv->dev = NULL;
+                }
+        }
+}
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+static void l2tp_eth_show(struct seq_file *m, void *arg)
+{
+        struct l2tp_session *session = arg;
+        struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
+        struct net_device *dev = spriv->dev;
+        seq_printf(m, "   interface %s\n", dev->name);
+}
+#endif
+static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+        struct net_device *dev;
+        char name[IFNAMSIZ];
+        struct l2tp_tunnel *tunnel;
+        struct l2tp_session *session;
+        struct l2tp_eth *priv;
+        struct l2tp_eth_sess *spriv;
+        int rc;
+        struct l2tp_eth_net *pn;
+        tunnel = l2tp_tunnel_find(net, tunnel_id);
+        if (!tunnel) {
+                rc = -ENODEV;
+                goto out;
+        }
+        session = l2tp_session_find(net, tunnel, session_id);
+        if (session) {
+                rc = -EEXIST;
+                goto out;
+        }
+        if (cfg->ifname) {
+                dev = dev_get_by_name(net, cfg->ifname);
+                if (dev) {
+                        dev_put(dev);
+                        rc = -EEXIST;
+                        goto out;
+                }
+                strlcpy(name, cfg->ifname, IFNAMSIZ);
+        } else
+                strcpy(name, L2TP_ETH_DEV_NAME);
+        session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
+                                      peer_session_id, cfg);
+        if (!session) {
+                rc = -ENOMEM;
+                goto out;
+        }
+        dev = alloc_netdev(sizeof(*priv), name, l2tp_eth_dev_setup);
+        if (!dev) {
+                rc = -ENOMEM;
+                goto out_del_session;
+        }
+        dev_net_set(dev, net);
+        if (session->mtu == 0)
+                session->mtu = dev->mtu - session->hdr_len;
+        dev->mtu = session->mtu;
+        dev->needed_headroom += session->hdr_len;
+        priv = netdev_priv(dev);
+        priv->dev = dev;
+        priv->session = session;
+        INIT_LIST_HEAD(&priv->list);
+        priv->tunnel_sock = tunnel->sock;
+        session->recv_skb = l2tp_eth_dev_recv;
+        session->session_close = l2tp_eth_delete;
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+        session->show = l2tp_eth_show;
+#endif
+        spriv = l2tp_session_priv(session);
+        spriv->dev = dev;
+        rc = register_netdev(dev);
+        if (rc < 0)
+                goto out_del_dev;
+        /* Must be done after register_netdev() */
+        strlcpy(session->ifname, dev->name, IFNAMSIZ);
+        dev_hold(dev);
+        pn = l2tp_eth_pernet(dev_net(dev));
+        spin_lock(&pn->l2tp_eth_lock);
+        list_add(&priv->list, &pn->l2tp_eth_dev_list);
+        spin_unlock(&pn->l2tp_eth_lock);
+        return 0;
+out_del_dev:
+        free_netdev(dev);
+out_del_session:
+        l2tp_session_delete(session);
+out:
+        return rc;
+}
+static __net_init int l2tp_eth_init_net(struct net *net)
+{
+        struct l2tp_eth_net *pn;
+        int err;
+        pn = kzalloc(sizeof(*pn), GFP_KERNEL);
+        if (!pn)
+                return -ENOMEM;
+        INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
+        spin_lock_init(&pn->l2tp_eth_lock);
+        err = net_assign_generic(net, l2tp_eth_net_id, pn);
+        if (err)
+                goto out;
+        return 0;
+out:
+        kfree(pn);
+        return err;
+}
+static __net_exit void l2tp_eth_exit_net(struct net *net)
+{
+        struct l2tp_eth_net *pn;
+        pn = net_generic(net, l2tp_eth_net_id);
+        /*
+         * if someone has cached our net then
+         * further net_generic call will return NULL
+         */
+        net_assign_generic(net, l2tp_eth_net_id, NULL);
+        kfree(pn);
+}
+static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
+        .init = l2tp_eth_init_net,
+        .exit = l2tp_eth_exit_net,
+        .id   = &l2tp_eth_net_id,
+        .size = sizeof(struct l2tp_eth_net),
+};
+static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
+        .session_create = l2tp_eth_create,
+        .session_delete = l2tp_session_delete,
+};
+static int __init l2tp_eth_init(void)
+{
+        int err = 0;
+        err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
+        if (err)
+                goto out;
+        err = register_pernet_device(&l2tp_eth_net_ops);
+        if (err)
+                goto out_unreg;
+        printk(KERN_INFO "L2TP ethernet pseudowire support (L2TPv3)\n");
+        return 0;
+out_unreg:
+        l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+out:
+        return err;
+}
+static void __exit l2tp_eth_exit(void)
+{
+        unregister_pernet_device(&l2tp_eth_net_ops);
+        l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+}
+module_init(l2tp_eth_init);
+module_exit(l2tp_eth_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP ethernet pseudowire driver");
+MODULE_VERSION("1.0");
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
new file mode 100644
index 000000000000..75bf784ba18d
--- /dev/null
+++ b/net/l2tp/l2tp_ip.c
@@ -0,0 +1,679 @@
+/*
+ * L2TPv3 IP encapsulation support
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/icmp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/socket.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include "l2tp_core.h"
+struct l2tp_ip_sock {
+        /* inet_sock has to be the first member of l2tp_ip_sock */
+        struct inet_sock        inet;
+        __u32                   conn_id;
+        __u32                   peer_conn_id;
+        __u64                   tx_packets;
+        __u64                   tx_bytes;
+        __u64                   tx_errors;
+        __u64                   rx_packets;
+        __u64                   rx_bytes;
+        __u64                   rx_errors;
+};
+static DEFINE_RWLOCK(l2tp_ip_lock);
+static struct hlist_head l2tp_ip_table;
+static struct hlist_head l2tp_ip_bind_table;
+static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
+{
+        return (struct l2tp_ip_sock *)sk;
+}
+static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+        struct hlist_node *node;
+        struct sock *sk;
+        sk_for_each_bound(sk, node, &l2tp_ip_bind_table) {
+                struct inet_sock *inet = inet_sk(sk);
+                struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
+                if (l2tp == NULL)
+                        continue;
+                if ((l2tp->conn_id == tunnel_id) &&
+#ifdef CONFIG_NET_NS
+                    (sk->sk_net == net) &&
+#endif
+                    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+                    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+                        goto found;
+        }
+        sk = NULL;
+found:
+        return sk;
+}
+static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+        struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id);
+        if (sk)
+                sock_hold(sk);
+        return sk;
+}
+/* When processing receive frames, there are two cases to
+ * consider. Data frames consist of a non-zero session-id and an
+ * optional cookie. Control frames consist of a regular L2TP header
+ * preceded by 32-bits of zeros.
+ *
+ * L2TPv3 Session Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Session ID                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Cookie (optional, maximum 64 bits)...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *                                                                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 Control Message Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      (32 bits of zeros)                       |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |T|L|x|x|S|x|x|x|x|x|x|x|  Ver  |             Length            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     Control Connection ID                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Ns              |               Nr              |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * All control frames are passed to userspace.
+ */
+static int l2tp_ip_recv(struct sk_buff *skb)
+{
+        struct sock *sk;
+        u32 session_id;
+        u32 tunnel_id;
+        unsigned char *ptr, *optr;
+        struct l2tp_session *session;
+        struct l2tp_tunnel *tunnel = NULL;
+        int length;
+        int offset;
+        /* Point to L2TP header */
+        optr = ptr = skb->data;
+        if (!pskb_may_pull(skb, 4))
+                goto discard;
+        session_id = ntohl(*((__be32 *) ptr));
+        ptr += 4;
+        /* RFC3931: L2TP/IP packets have the first 4 bytes containing
+         * the session_id. If it is 0, the packet is a L2TP control
+         * frame and the session_id value can be discarded.
+         */
+        if (session_id == 0) {
+                __skb_pull(skb, 4);
+                goto pass_up;
+        }
+        /* Ok, this is a data packet. Lookup the session. */
+        session = l2tp_session_find(&init_net, NULL, session_id);
+        if (session == NULL)
+                goto discard;
+        tunnel = session->tunnel;
+        if (tunnel == NULL)
+                goto discard;
+        /* Trace packet contents, if enabled */
+        if (tunnel->debug & L2TP_MSG_DATA) {
+                length = min(32u, skb->len);
+                if (!pskb_may_pull(skb, length))
+                        goto discard;
+                printk(KERN_DEBUG "%s: ip recv: ", tunnel->name);
+                offset = 0;
+                do {
+                        printk(" %02X", ptr[offset]);
+                } while (++offset < length);
+                printk("\n");
+        }
+        l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+        return 0;
+pass_up:
+        /* Get the tunnel_id from the L2TP header */
+        if (!pskb_may_pull(skb, 12))
+                goto discard;
+        if ((skb->data[0] & 0xc0) != 0xc0)
+                goto discard;
+        tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+        tunnel = l2tp_tunnel_find(&init_net, tunnel_id);
+        if (tunnel != NULL)
+                sk = tunnel->sock;
+        else {
+                struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
+                read_lock_bh(&l2tp_ip_lock);
+                sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id);
+                read_unlock_bh(&l2tp_ip_lock);
+        }
+        if (sk == NULL)
+                goto discard;
+        sock_hold(sk);
+        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+                goto discard_put;
+        nf_reset(skb);
+        return sk_receive_skb(sk, skb, 1);
+discard_put:
+        sock_put(sk);
+discard:
+        kfree_skb(skb);
+        return 0;
+}
+static int l2tp_ip_open(struct sock *sk)
+{
+        /* Prevent autobind. We don't have ports. */
+        inet_sk(sk)->inet_num = IPPROTO_L2TP;
+        write_lock_bh(&l2tp_ip_lock);
+        sk_add_node(sk, &l2tp_ip_table);
+        write_unlock_bh(&l2tp_ip_lock);
+        return 0;
+}
+static void l2tp_ip_close(struct sock *sk, long timeout)
+{
+        write_lock_bh(&l2tp_ip_lock);
+        hlist_del_init(&sk->sk_bind_node);
+        hlist_del_init(&sk->sk_node);
+        write_unlock_bh(&l2tp_ip_lock);
+        sk_common_release(sk);
+}
+static void l2tp_ip_destroy_sock(struct sock *sk)
+{
+        struct sk_buff *skb;
+        while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
+                kfree_skb(skb);
+        sk_refcnt_debug_dec(sk);
+}
+static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+        struct inet_sock *inet = inet_sk(sk);
+        struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
+        int ret = -EINVAL;
+        int chk_addr_ret;
+        ret = -EADDRINUSE;
+        read_lock_bh(&l2tp_ip_lock);
+        if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id))
+                goto out_in_use;
+        read_unlock_bh(&l2tp_ip_lock);
+        lock_sock(sk);
+        if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
+                goto out;
+        chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr);
+        ret = -EADDRNOTAVAIL;
+        if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
+            chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+                goto out;
+        inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
+        if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+                inet->inet_saddr = 0;  /* Use device */
+        sk_dst_reset(sk);
+        l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
+        write_lock_bh(&l2tp_ip_lock);
+        sk_add_bind_node(sk, &l2tp_ip_bind_table);
+        sk_del_node_init(sk);
+        write_unlock_bh(&l2tp_ip_lock);
+        ret = 0;
+out:
+        release_sock(sk);
+        return ret;
+out_in_use:
+        read_unlock_bh(&l2tp_ip_lock);
+        return ret;
+}
+static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+        int rc;
+        struct inet_sock *inet = inet_sk(sk);
+        struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
+        struct rtable *rt;
+        __be32 saddr;
+        int oif;
+        rc = -EINVAL;
+        if (addr_len < sizeof(*lsa))
+                goto out;
+        rc = -EAFNOSUPPORT;
+        if (lsa->l2tp_family != AF_INET)
+                goto out;
+        sk_dst_reset(sk);
+        oif = sk->sk_bound_dev_if;
+        saddr = inet->inet_saddr;
+        rc = -EINVAL;
+        if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
+                goto out;
+        rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr,
+                              RT_CONN_FLAGS(sk), oif,
+                              IPPROTO_L2TP,
+                              0, 0, sk, 1);
+        if (rc) {
+                if (rc == -ENETUNREACH)
+                        IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
+                goto out;
+        }
+        rc = -ENETUNREACH;
+        if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+                ip_rt_put(rt);
+                goto out;
+        }
+        l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
+        if (!inet->inet_saddr)
+                inet->inet_saddr = rt->rt_src;
+        if (!inet->inet_rcv_saddr)
+                inet->inet_rcv_saddr = rt->rt_src;
+        inet->inet_daddr = rt->rt_dst;
+        sk->sk_state = TCP_ESTABLISHED;
+        inet->inet_id = jiffies;
+        sk_dst_set(sk, &rt->u.dst);
+        write_lock_bh(&l2tp_ip_lock);
+        hlist_del_init(&sk->sk_bind_node);
+        sk_add_bind_node(sk, &l2tp_ip_bind_table);
+        write_unlock_bh(&l2tp_ip_lock);
+        rc = 0;
+out:
+        return rc;
+}
+static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
+                           int *uaddr_len, int peer)
+{
+        struct sock *sk         = sock->sk;
+        struct inet_sock *inet  = inet_sk(sk);
+        struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+        struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
+        memset(lsa, 0, sizeof(*lsa));
+        lsa->l2tp_family = AF_INET;
+        if (peer) {
+                if (!inet->inet_dport)
+                        return -ENOTCONN;
+                lsa->l2tp_conn_id = lsk->peer_conn_id;
+                lsa->l2tp_addr.s_addr = inet->inet_daddr;
+        } else {
+                __be32 addr = inet->inet_rcv_saddr;
+                if (!addr)
+                        addr = inet->inet_saddr;
+                lsa->l2tp_conn_id = lsk->conn_id;
+                lsa->l2tp_addr.s_addr = addr;
+        }
+        *uaddr_len = sizeof(*lsa);
+        return 0;
+}
+static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
+{
+        int rc;
+        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+                goto drop;
+        nf_reset(skb);
+        /* Charge it to the socket, dropping if the queue is full. */
+        rc = sock_queue_rcv_skb(sk, skb);
+        if (rc < 0)
+                goto drop;
+        return 0;
+drop:
+        IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS);
+        kfree_skb(skb);
+        return -1;
+}
+/* Userspace will call sendmsg() on the tunnel socket to send L2TP
+ * control frames.
+ */
+static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len)
+{
+        struct sk_buff *skb;
+        int rc;
+        struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk);
+        struct inet_sock *inet = inet_sk(sk);
+        struct ip_options *opt = inet->opt;
+        struct rtable *rt = NULL;
+        int connected = 0;
+        __be32 daddr;
+        if (sock_flag(sk, SOCK_DEAD))
+                return -ENOTCONN;
+        /* Get and verify the address. */
+        if (msg->msg_name) {
+                struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name;
+                if (msg->msg_namelen < sizeof(*lip))
+                        return -EINVAL;
+                if (lip->l2tp_family != AF_INET) {
+                        if (lip->l2tp_family != AF_UNSPEC)
+                                return -EAFNOSUPPORT;
+                }
+                daddr = lip->l2tp_addr.s_addr;
+        } else {
+                if (sk->sk_state != TCP_ESTABLISHED)
+                        return -EDESTADDRREQ;
+                daddr = inet->inet_daddr;
+                connected = 1;
+        }
+        /* Allocate a socket buffer */
+        rc = -ENOMEM;
+        skb = sock_wmalloc(sk, 2 + NET_SKB_PAD + sizeof(struct iphdr) +
+                           4 + len, 0, GFP_KERNEL);
+        if (!skb)
+                goto error;
+        /* Reserve space for headers, putting IP header on 4-byte boundary. */
+        skb_reserve(skb, 2 + NET_SKB_PAD);
+        skb_reset_network_header(skb);
+        skb_reserve(skb, sizeof(struct iphdr));
+        skb_reset_transport_header(skb);
+        /* Insert 0 session_id */
+        *((__be32 *) skb_put(skb, 4)) = 0;
+        /* Copy user data into skb */
+        rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+        if (rc < 0) {
+                kfree_skb(skb);
+                goto error;
+        }
+        if (connected)
+                rt = (struct rtable *) __sk_dst_check(sk, 0);
+        if (rt == NULL) {
+                /* Use correct destination address if we have options. */
+                if (opt && opt->srr)
+                        daddr = opt->faddr;
+                {
+                        struct flowi fl = { .oif = sk->sk_bound_dev_if,
+                                            .nl_u = { .ip4_u = {
+                                                        .daddr = daddr,
+                                                        .saddr = inet->inet_saddr,
+                                                        .tos = RT_CONN_FLAGS(sk) } },
+                                            .proto = sk->sk_protocol,
+                                            .flags = inet_sk_flowi_flags(sk),
+                                            .uli_u = { .ports = {
+                                                         .sport = inet->inet_sport,
+                                                         .dport = inet->inet_dport } } };
+                        /* If this fails, retransmit mechanism of transport layer will
+                         * keep trying until route appears or the connection times
+                         * itself out.
+                         */
+                        security_sk_classify_flow(sk, &fl);
+                        if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
+                                goto no_route;
+                }
+                sk_setup_caps(sk, &rt->u.dst);
+        }
+        skb_dst_set(skb, dst_clone(&rt->u.dst));
+        /* Queue the packet to IP for output */
+        rc = ip_queue_xmit(skb, 0);
+error:
+        /* Update stats */
+        if (rc >= 0) {
+                lsa->tx_packets++;
+                lsa->tx_bytes += len;
+                rc = len;
+        } else {
+                lsa->tx_errors++;
+        }
+        return rc;
+no_route:
+        IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+        kfree_skb(skb);
+        return -EHOSTUNREACH;
+}
+static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+                           size_t len, int noblock, int flags, int *addr_len)
+{
+        struct inet_sock *inet = inet_sk(sk);
+        struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+        size_t copied = 0;
+        int err = -EOPNOTSUPP;
+        struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+        struct sk_buff *skb;
+        if (flags & MSG_OOB)
+                goto out;
+        if (addr_len)
+                *addr_len = sizeof(*sin);
+        skb = skb_recv_datagram(sk, flags, noblock, &err);
+        if (!skb)
+                goto out;
+        copied = skb->len;
+        if (len < copied) {
+                msg->msg_flags |= MSG_TRUNC;
+                copied = len;
+        }
+        err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+        if (err)
+                goto done;
+        sock_recv_timestamp(msg, sk, skb);
+        /* Copy the address. */
+        if (sin) {
+                sin->sin_family = AF_INET;
+                sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
+                sin->sin_port = 0;
+                memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+        }
+        if (inet->cmsg_flags)
+                ip_cmsg_recv(msg, skb);
+        if (flags & MSG_TRUNC)
+                copied = skb->len;
+done:
+        skb_free_datagram(sk, skb);
+out:
+        if (err) {
+                lsk->rx_errors++;
+                return err;
+        }
+        lsk->rx_packets++;
+        lsk->rx_bytes += copied;
+        return copied;
+}
+struct proto l2tp_ip_prot = {
+        .name              = "L2TP/IP",
+        .owner             = THIS_MODULE,
+        .init              = l2tp_ip_open,
+        .close             = l2tp_ip_close,
+        .bind              = l2tp_ip_bind,
+        .connect           = l2tp_ip_connect,
+        .disconnect        = udp_disconnect,
+        .ioctl             = udp_ioctl,
+        .destroy           = l2tp_ip_destroy_sock,
+        .setsockopt        = ip_setsockopt,
+        .getsockopt        = ip_getsockopt,
+        .sendmsg           = l2tp_ip_sendmsg,
+        .recvmsg           = l2tp_ip_recvmsg,
+        .backlog_rcv       = l2tp_ip_backlog_recv,
+        .hash              = inet_hash,
+        .unhash            = inet_unhash,
+        .obj_size          = sizeof(struct l2tp_ip_sock),
+#ifdef CONFIG_COMPAT
+        .compat_setsockopt = compat_ip_setsockopt,
+        .compat_getsockopt = compat_ip_getsockopt,
+#endif
+};
+static const struct proto_ops l2tp_ip_ops = {
+        .family            = PF_INET,
+        .owner             = THIS_MODULE,
+        .release           = inet_release,
+        .bind              = inet_bind,
+        .connect           = inet_dgram_connect,
+        .socketpair        = sock_no_socketpair,
+        .accept            = sock_no_accept,
+        .getname           = l2tp_ip_getname,
+        .poll              = datagram_poll,
+        .ioctl             = inet_ioctl,
+        .listen            = sock_no_listen,
+        .shutdown          = inet_shutdown,
+        .setsockopt        = sock_common_setsockopt,
+        .getsockopt        = sock_common_getsockopt,
+        .sendmsg           = inet_sendmsg,
+        .recvmsg           = sock_common_recvmsg,
+        .mmap              = sock_no_mmap,
+        .sendpage          = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+        .compat_setsockopt = compat_sock_common_setsockopt,
+        .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+static struct inet_protosw l2tp_ip_protosw = {
+        .type           = SOCK_DGRAM,
+        .protocol       = IPPROTO_L2TP,
+        .prot           = &l2tp_ip_prot,
+        .ops            = &l2tp_ip_ops,
+        .no_check       = 0,
+};
+static struct net_protocol l2tp_ip_protocol __read_mostly = {
+        .handler        = l2tp_ip_recv,
+};
+static int __init l2tp_ip_init(void)
+{
+        int err;
+        printk(KERN_INFO "L2TP IP encapsulation support (L2TPv3)\n");
+        err = proto_register(&l2tp_ip_prot, 1);
+        if (err != 0)
+                goto out;
+        err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+        if (err)
+                goto out1;
+        inet_register_protosw(&l2tp_ip_protosw);
+        return 0;
+out1:
+        proto_unregister(&l2tp_ip_prot);
+out:
+        return err;
+}
+static void __exit l2tp_ip_exit(void)
+{
+        inet_unregister_protosw(&l2tp_ip_protosw);
+        inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+        proto_unregister(&l2tp_ip_prot);
+}
+module_init(l2tp_ip_init);
+module_exit(l2tp_ip_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP over IP");
+MODULE_VERSION("1.0");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
new file mode 100644
index 000000000000..4c1e540732d7
--- /dev/null
+++ b/net/l2tp/l2tp_netlink.c
@@ -0,0 +1,840 @@
+/*
+ * L2TP netlink layer, for management
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * Partly based on the IrDA nelink implementation
+ * (see net/irda/irnetlink.c) which is:
+ * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org>
+ * which is in turn partly based on the wireless netlink code:
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/udp.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <net/net_namespace.h>
+#include <linux/l2tp.h>
+#include "l2tp_core.h"
+static struct genl_family l2tp_nl_family = {
+        .id             = GENL_ID_GENERATE,
+        .name           = L2TP_GENL_NAME,
+        .version        = L2TP_GENL_VERSION,
+        .hdrsize        = 0,
+        .maxattr        = L2TP_ATTR_MAX,
+};
+/* Accessed under genl lock */
+static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
+static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+{
+        u32 tunnel_id;
+        u32 session_id;
+        char *ifname;
+        struct l2tp_tunnel *tunnel;
+        struct l2tp_session *session = NULL;
+        struct net *net = genl_info_net(info);
+        if (info->attrs[L2TP_ATTR_IFNAME]) {
+                ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+                session = l2tp_session_find_by_ifname(net, ifname);
+        } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
+                   (info->attrs[L2TP_ATTR_CONN_ID])) {
+                tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+                session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+                tunnel = l2tp_tunnel_find(net, tunnel_id);
+                if (tunnel)
+                        session = l2tp_session_find(net, tunnel, session_id);
+        }
+        return session;
+}
+static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
+{
+        struct sk_buff *msg;
+        void *hdr;
+        int ret = -ENOBUFS;
+        msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+        if (!msg) {
+                ret = -ENOMEM;
+                goto out;
+        }
+        hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+                          &l2tp_nl_family, 0, L2TP_CMD_NOOP);
+        if (IS_ERR(hdr)) {
+                ret = PTR_ERR(hdr);
+                goto err_out;
+        }
+        genlmsg_end(msg, hdr);
+        return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+err_out:
+        nlmsg_free(msg);
+out:
+        return ret;
+}
+static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
+{
+        u32 tunnel_id;
+        u32 peer_tunnel_id;
+        int proto_version;
+        int fd;
+        int ret = 0;
+        struct l2tp_tunnel_cfg cfg = { 0, };
+        struct l2tp_tunnel *tunnel;
+        struct net *net = genl_info_net(info);
+        if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+        if (!info->attrs[L2TP_ATTR_PEER_CONN_ID]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        peer_tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_CONN_ID]);
+        if (!info->attrs[L2TP_ATTR_PROTO_VERSION]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        proto_version = nla_get_u8(info->attrs[L2TP_ATTR_PROTO_VERSION]);
+        if (!info->attrs[L2TP_ATTR_ENCAP_TYPE]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
+        fd = -1;
+        if (info->attrs[L2TP_ATTR_FD]) {
+                fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+        } else {
+                if (info->attrs[L2TP_ATTR_IP_SADDR])
+                        cfg.local_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_SADDR]);
+                if (info->attrs[L2TP_ATTR_IP_DADDR])
+                        cfg.peer_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_DADDR]);
+                if (info->attrs[L2TP_ATTR_UDP_SPORT])
+                        cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
+                if (info->attrs[L2TP_ATTR_UDP_DPORT])
+                        cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
+                if (info->attrs[L2TP_ATTR_UDP_CSUM])
+                        cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
+        }
+        if (info->attrs[L2TP_ATTR_DEBUG])
+                cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+        tunnel = l2tp_tunnel_find(net, tunnel_id);
+        if (tunnel != NULL) {
+                ret = -EEXIST;
+                goto out;
+        }
+        ret = -EINVAL;
+        switch (cfg.encap) {
+        case L2TP_ENCAPTYPE_UDP:
+        case L2TP_ENCAPTYPE_IP:
+                ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id,
+                                         peer_tunnel_id, &cfg, &tunnel);
+                break;
+        }
+out:
+        return ret;
+}
+static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info)
+{
+        struct l2tp_tunnel *tunnel;
+        u32 tunnel_id;
+        int ret = 0;
+        struct net *net = genl_info_net(info);
+        if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+        tunnel = l2tp_tunnel_find(net, tunnel_id);
+        if (tunnel == NULL) {
+                ret = -ENODEV;
+                goto out;
+        }
+        (void) l2tp_tunnel_delete(tunnel);
+out:
+        return ret;
+}
+static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info)
+{
+        struct l2tp_tunnel *tunnel;
+        u32 tunnel_id;
+        int ret = 0;
+        struct net *net = genl_info_net(info);
+        if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+        tunnel = l2tp_tunnel_find(net, tunnel_id);
+        if (tunnel == NULL) {
+                ret = -ENODEV;
+                goto out;
+        }
+        if (info->attrs[L2TP_ATTR_DEBUG])
+                tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+out:
+        return ret;
+}
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+                               struct l2tp_tunnel *tunnel)
+{
+        void *hdr;
+        struct nlattr *nest;
+        struct sock *sk = NULL;
+        struct inet_sock *inet;
+        hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
+                          L2TP_CMD_TUNNEL_GET);
+        if (IS_ERR(hdr))
+                return PTR_ERR(hdr);
+        NLA_PUT_U8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version);
+        NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+        NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+        NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, tunnel->debug);
+        NLA_PUT_U16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap);
+        nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+        if (nest == NULL)
+                goto nla_put_failure;
+        NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, tunnel->stats.tx_packets);
+        NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, tunnel->stats.tx_bytes);
+        NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, tunnel->stats.tx_errors);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, tunnel->stats.rx_packets);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, tunnel->stats.rx_bytes);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, tunnel->stats.rx_seq_discards);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, tunnel->stats.rx_oos_packets);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, tunnel->stats.rx_errors);
+        nla_nest_end(skb, nest);
+        sk = tunnel->sock;
+        if (!sk)
+                goto out;
+        inet = inet_sk(sk);
+        switch (tunnel->encap) {
+        case L2TP_ENCAPTYPE_UDP:
+                NLA_PUT_U16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport));
+                NLA_PUT_U16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport));
+                NLA_PUT_U8(skb, L2TP_ATTR_UDP_CSUM, (sk->sk_no_check != UDP_CSUM_NOXMIT));
+                /* NOBREAK */
+        case L2TP_ENCAPTYPE_IP:
+                NLA_PUT_BE32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr);
+                NLA_PUT_BE32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr);
+                break;
+        }
+out:
+        return genlmsg_end(skb, hdr);
+nla_put_failure:
+        genlmsg_cancel(skb, hdr);
+        return -1;
+}
+static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
+{
+        struct l2tp_tunnel *tunnel;
+        struct sk_buff *msg;
+        u32 tunnel_id;
+        int ret = -ENOBUFS;
+        struct net *net = genl_info_net(info);
+        if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+        tunnel = l2tp_tunnel_find(net, tunnel_id);
+        if (tunnel == NULL) {
+                ret = -ENODEV;
+                goto out;
+        }
+        msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+        if (!msg) {
+                ret = -ENOMEM;
+                goto out;
+        }
+        ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq,
+                                  NLM_F_ACK, tunnel);
+        if (ret < 0)
+                goto err_out;
+        return genlmsg_unicast(net, msg, info->snd_pid);
+err_out:
+        nlmsg_free(msg);
+out:
+        return ret;
+}
+static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+        int ti = cb->args[0];
+        struct l2tp_tunnel *tunnel;
+        struct net *net = sock_net(skb->sk);
+        for (;;) {
+                tunnel = l2tp_tunnel_find_nth(net, ti);
+                if (tunnel == NULL)
+                        goto out;
+                if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid,
+                                        cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                        tunnel) <= 0)
+                        goto out;
+                ti++;
+        }
+out:
+        cb->args[0] = ti;
+        return skb->len;
+}
+static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *info)
+{
+        u32 tunnel_id = 0;
+        u32 session_id;
+        u32 peer_session_id;
+        int ret = 0;
+        struct l2tp_tunnel *tunnel;
+        struct l2tp_session *session;
+        struct l2tp_session_cfg cfg = { 0, };
+        struct net *net = genl_info_net(info);
+        if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+        tunnel = l2tp_tunnel_find(net, tunnel_id);
+        if (!tunnel) {
+                ret = -ENODEV;
+                goto out;
+        }
+        if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+        session = l2tp_session_find(net, tunnel, session_id);
+        if (session) {
+                ret = -EEXIST;
+                goto out;
+        }
+        if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
+        if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
+                ret = -EINVAL;
+                goto out;
+        }
+        cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
+        if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
+                ret = -EINVAL;
+                goto out;
+        }
+        if (tunnel->version > 2) {
+                if (info->attrs[L2TP_ATTR_OFFSET])
+                        cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
+                if (info->attrs[L2TP_ATTR_DATA_SEQ])
+                        cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+                cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT;
+                if (info->attrs[L2TP_ATTR_L2SPEC_TYPE])
+                        cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
+                cfg.l2specific_len = 4;
+                if (info->attrs[L2TP_ATTR_L2SPEC_LEN])
+                        cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]);
+                if (info->attrs[L2TP_ATTR_COOKIE]) {
+                        u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
+                        if (len > 8) {
+                                ret = -EINVAL;
+                                goto out;
+                        }
+                        cfg.cookie_len = len;
+                        memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
+                }
+                if (info->attrs[L2TP_ATTR_PEER_COOKIE]) {
+                        u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
+                        if (len > 8) {
+                                ret = -EINVAL;
+                                goto out;
+                        }
+                        cfg.peer_cookie_len = len;
+                        memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
+                }
+                if (info->attrs[L2TP_ATTR_IFNAME])
+                        cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+                if (info->attrs[L2TP_ATTR_VLAN_ID])
+                        cfg.vlan_id = nla_get_u16(info->attrs[L2TP_ATTR_VLAN_ID]);
+        }
+        if (info->attrs[L2TP_ATTR_DEBUG])
+                cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+        if (info->attrs[L2TP_ATTR_RECV_SEQ])
+                cfg.recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+        if (info->attrs[L2TP_ATTR_SEND_SEQ])
+                cfg.send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+        if (info->attrs[L2TP_ATTR_LNS_MODE])
+                cfg.lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+        if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+                cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+        if (info->attrs[L2TP_ATTR_MTU])
+                cfg.mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+        if (info->attrs[L2TP_ATTR_MRU])
+                cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+        if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
+            (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
+                ret = -EPROTONOSUPPORT;
+                goto out;
+        }
+        /* Check that pseudowire-specific params are present */
+        switch (cfg.pw_type) {
+        case L2TP_PWTYPE_NONE:
+                break;
+        case L2TP_PWTYPE_ETH_VLAN:
+                if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
+                        ret = -EINVAL;
+                        goto out;
+                }
+                break;
+        case L2TP_PWTYPE_ETH:
+                break;
+        case L2TP_PWTYPE_PPP:
+        case L2TP_PWTYPE_PPP_AC:
+                break;
+        case L2TP_PWTYPE_IP:
+        default:
+                ret = -EPROTONOSUPPORT;
+                break;
+        }
+        ret = -EPROTONOSUPPORT;
+        if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
+                ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
+                        session_id, peer_session_id, &cfg);
+out:
+        return ret;
+}
+static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *info)
+{
+        int ret = 0;
+        struct l2tp_session *session;
+        u16 pw_type;
+        session = l2tp_nl_session_find(info);
+        if (session == NULL) {
+                ret = -ENODEV;
+                goto out;
+        }
+        pw_type = session->pwtype;
+        if (pw_type < __L2TP_PWTYPE_MAX)
+                if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
+                        ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
+out:
+        return ret;
+}
+static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *info)
+{
+        int ret = 0;
+        struct l2tp_session *session;
+        session = l2tp_nl_session_find(info);
+        if (session == NULL) {
+                ret = -ENODEV;
+                goto out;
+        }
+        if (info->attrs[L2TP_ATTR_DEBUG])
+                session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+        if (info->attrs[L2TP_ATTR_DATA_SEQ])
+                session->data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+        if (info->attrs[L2TP_ATTR_RECV_SEQ])
+                session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+        if (info->attrs[L2TP_ATTR_SEND_SEQ])
+                session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+        if (info->attrs[L2TP_ATTR_LNS_MODE])
+                session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+        if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+                session->reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+        if (info->attrs[L2TP_ATTR_MTU])
+                session->mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+        if (info->attrs[L2TP_ATTR_MRU])
+                session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+out:
+        return ret;
+}
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+                                struct l2tp_session *session)
+{
+        void *hdr;
+        struct nlattr *nest;
+        struct l2tp_tunnel *tunnel = session->tunnel;
+        struct sock *sk = NULL;
+        sk = tunnel->sock;
+        hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+        if (IS_ERR(hdr))
+                return PTR_ERR(hdr);
+        NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+        NLA_PUT_U32(skb, L2TP_ATTR_SESSION_ID, session->session_id);
+        NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+        NLA_PUT_U32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id);
+        NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, session->debug);
+        NLA_PUT_U16(skb, L2TP_ATTR_PW_TYPE, session->pwtype);
+        NLA_PUT_U16(skb, L2TP_ATTR_MTU, session->mtu);
+        if (session->mru)
+                NLA_PUT_U16(skb, L2TP_ATTR_MRU, session->mru);
+        if (session->ifname && session->ifname[0])
+                NLA_PUT_STRING(skb, L2TP_ATTR_IFNAME, session->ifname);
+        if (session->cookie_len)
+                NLA_PUT(skb, L2TP_ATTR_COOKIE, session->cookie_len, &session->cookie[0]);
+        if (session->peer_cookie_len)
+                NLA_PUT(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, &session->peer_cookie[0]);
+        NLA_PUT_U8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq);
+        NLA_PUT_U8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq);
+        NLA_PUT_U8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode);
+#ifdef CONFIG_XFRM
+        if ((sk) && (sk->sk_policy[0] || sk->sk_policy[1]))
+                NLA_PUT_U8(skb, L2TP_ATTR_USING_IPSEC, 1);
+#endif
+        if (session->reorder_timeout)
+                NLA_PUT_MSECS(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout);
+        nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+        if (nest == NULL)
+                goto nla_put_failure;
+        NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, session->stats.tx_packets);
+        NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, session->stats.tx_bytes);
+        NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, session->stats.tx_errors);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, session->stats.rx_packets);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, session->stats.rx_bytes);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, session->stats.rx_seq_discards);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, session->stats.rx_oos_packets);
+        NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, session->stats.rx_errors);
+        nla_nest_end(skb, nest);
+        return genlmsg_end(skb, hdr);
+ nla_put_failure:
+        genlmsg_cancel(skb, hdr);
+        return -1;
+}
+static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
+{
+        struct l2tp_session *session;
+        struct sk_buff *msg;
+        int ret;
+        session = l2tp_nl_session_find(info);
+        if (session == NULL) {
+                ret = -ENODEV;
+                goto out;
+        }
+        msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+        if (!msg) {
+                ret = -ENOMEM;
+                goto out;
+        }
+        ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq,
+                                   0, session);
+        if (ret < 0)
+                goto err_out;
+        return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+err_out:
+        nlmsg_free(msg);
+out:
+        return ret;
+}
+static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+        struct net *net = sock_net(skb->sk);
+        struct l2tp_session *session;
+        struct l2tp_tunnel *tunnel = NULL;
+        int ti = cb->args[0];
+        int si = cb->args[1];
+        for (;;) {
+                if (tunnel == NULL) {
+                        tunnel = l2tp_tunnel_find_nth(net, ti);
+                        if (tunnel == NULL)
+                                goto out;
+                }
+                session = l2tp_session_find_nth(tunnel, si);
+                if (session == NULL) {
+                        ti++;
+                        tunnel = NULL;
+                        si = 0;
+                        continue;
+                }
+                if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid,
+                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                         session) <= 0)
+                        break;
+                si++;
+        }
+out:
+        cb->args[0] = ti;
+        cb->args[1] = si;
+        return skb->len;
+}
+static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
+        [L2TP_ATTR_NONE]                = { .type = NLA_UNSPEC, },
+        [L2TP_ATTR_PW_TYPE]             = { .type = NLA_U16, },
+        [L2TP_ATTR_ENCAP_TYPE]          = { .type = NLA_U16, },
+        [L2TP_ATTR_OFFSET]              = { .type = NLA_U16, },
+        [L2TP_ATTR_DATA_SEQ]            = { .type = NLA_U8, },
+        [L2TP_ATTR_L2SPEC_TYPE]         = { .type = NLA_U8, },
+        [L2TP_ATTR_L2SPEC_LEN]          = { .type = NLA_U8, },
+        [L2TP_ATTR_PROTO_VERSION]       = { .type = NLA_U8, },
+        [L2TP_ATTR_CONN_ID]             = { .type = NLA_U32, },
+        [L2TP_ATTR_PEER_CONN_ID]        = { .type = NLA_U32, },
+        [L2TP_ATTR_SESSION_ID]          = { .type = NLA_U32, },
+        [L2TP_ATTR_PEER_SESSION_ID]     = { .type = NLA_U32, },
+        [L2TP_ATTR_UDP_CSUM]            = { .type = NLA_U8, },
+        [L2TP_ATTR_VLAN_ID]             = { .type = NLA_U16, },
+        [L2TP_ATTR_DEBUG]               = { .type = NLA_U32, },
+        [L2TP_ATTR_RECV_SEQ]            = { .type = NLA_U8, },
+        [L2TP_ATTR_SEND_SEQ]            = { .type = NLA_U8, },
+        [L2TP_ATTR_LNS_MODE]            = { .type = NLA_U8, },
+        [L2TP_ATTR_USING_IPSEC]         = { .type = NLA_U8, },
+        [L2TP_ATTR_RECV_TIMEOUT]        = { .type = NLA_MSECS, },
+        [L2TP_ATTR_FD]                  = { .type = NLA_U32, },
+        [L2TP_ATTR_IP_SADDR]            = { .type = NLA_U32, },
+        [L2TP_ATTR_IP_DADDR]            = { .type = NLA_U32, },
+        [L2TP_ATTR_UDP_SPORT]           = { .type = NLA_U16, },
+        [L2TP_ATTR_UDP_DPORT]           = { .type = NLA_U16, },
+        [L2TP_ATTR_MTU]                 = { .type = NLA_U16, },
+        [L2TP_ATTR_MRU]                 = { .type = NLA_U16, },
+        [L2TP_ATTR_STATS]               = { .type = NLA_NESTED, },
+        [L2TP_ATTR_IFNAME] = {
+                .type = NLA_NUL_STRING,
+                .len = IFNAMSIZ - 1,
+        },
+        [L2TP_ATTR_COOKIE] = {
+                .type = NLA_BINARY,
+                .len = 8,
+        },
+        [L2TP_ATTR_PEER_COOKIE] = {
+                .type = NLA_BINARY,
+                .len = 8,
+        },
+};
+static struct genl_ops l2tp_nl_ops[] = {
+        {
+                .cmd = L2TP_CMD_NOOP,
+                .doit = l2tp_nl_cmd_noop,
+                .policy = l2tp_nl_policy,
+                /* can be retrieved by unprivileged users */
+        },
+        {
+                .cmd = L2TP_CMD_TUNNEL_CREATE,
+                .doit = l2tp_nl_cmd_tunnel_create,
+                .policy = l2tp_nl_policy,
+                .flags = GENL_ADMIN_PERM,
+        },
+        {
+                .cmd = L2TP_CMD_TUNNEL_DELETE,
+                .doit = l2tp_nl_cmd_tunnel_delete,
+                .policy = l2tp_nl_policy,
+                .flags = GENL_ADMIN_PERM,
+        },
+        {
+                .cmd = L2TP_CMD_TUNNEL_MODIFY,
+                .doit = l2tp_nl_cmd_tunnel_modify,
+                .policy = l2tp_nl_policy,
+                .flags = GENL_ADMIN_PERM,
+        },
+        {
+                .cmd = L2TP_CMD_TUNNEL_GET,
+                .doit = l2tp_nl_cmd_tunnel_get,
+                .dumpit = l2tp_nl_cmd_tunnel_dump,
+                .policy = l2tp_nl_policy,
+                .flags = GENL_ADMIN_PERM,
+        },
+        {
+                .cmd = L2TP_CMD_SESSION_CREATE,
+                .doit = l2tp_nl_cmd_session_create,
+                .policy = l2tp_nl_policy,
+                .flags = GENL_ADMIN_PERM,
+        },
+        {
+                .cmd = L2TP_CMD_SESSION_DELETE,
+                .doit = l2tp_nl_cmd_session_delete,
+                .policy = l2tp_nl_policy,
+                .flags = GENL_ADMIN_PERM,
+        },
+        {
+                .cmd = L2TP_CMD_SESSION_MODIFY,
+                .doit = l2tp_nl_cmd_session_modify,
+                .policy = l2tp_nl_policy,
+                .flags = GENL_ADMIN_PERM,
+        },
+        {
+                .cmd = L2TP_CMD_SESSION_GET,
+                .doit = l2tp_nl_cmd_session_get,
+                .dumpit = l2tp_nl_cmd_session_dump,
+                .policy = l2tp_nl_policy,
+                .flags = GENL_ADMIN_PERM,
+        },
+};
+int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
+{
+        int ret;
+        ret = -EINVAL;
+        if (pw_type >= __L2TP_PWTYPE_MAX)
+                goto err;
+        genl_lock();
+        ret = -EBUSY;
+        if (l2tp_nl_cmd_ops[pw_type])
+                goto out;
+        l2tp_nl_cmd_ops[pw_type] = ops;
+out:
+        genl_unlock();
+err:
+        return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_register_ops);
+void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
+{
+        if (pw_type < __L2TP_PWTYPE_MAX) {
+                genl_lock();
+                l2tp_nl_cmd_ops[pw_type] = NULL;
+                genl_unlock();
+        }
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
+static int l2tp_nl_init(void)
+{
+        int err;
+        printk(KERN_INFO "L2TP netlink interface\n");
+        err = genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops,
+                                            ARRAY_SIZE(l2tp_nl_ops));
+        return err;
+}
+static void l2tp_nl_cleanup(void)
+{
+        genl_unregister_family(&l2tp_nl_family);
+}
+module_init(l2tp_nl_init);
+module_exit(l2tp_nl_cleanup);
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP netlink");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
+             __stringify(NETLINK_GENERIC) "-type-" "l2tp");
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
new file mode 100644
index 000000000000..90d82b3f2889
--- /dev/null
+++ b/net/l2tp/l2tp_ppp.c
@@ -0,0 +1,1837 @@
+/*****************************************************************************
+ * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
+ *
+ * PPPoX    --- Generic PPP encapsulation socket family
+ * PPPoL2TP --- PPP over L2TP (RFC 2661)
+ *
+ * Version:     2.0.0
+ *
+ * Authors:     James Chapman (jchapman@katalix.com)
+ *
+ * Based on original work by Martijn van Oosterhout <kleptog@svana.org>
+ *
+ * License:
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ */
+/* This driver handles only L2TP data frames; control frames are handled by a
+ * userspace application.
+ *
+ * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
+ * attaches it to a bound UDP socket with local tunnel_id / session_id and
+ * peer tunnel_id / session_id set. Data can then be sent or received using
+ * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
+ * can be read or modified using ioctl() or [gs]etsockopt() calls.
+ *
+ * When a PPPoL2TP socket is connected with local and peer session_id values
+ * zero, the socket is treated as a special tunnel management socket.
+ *
+ * Here's example userspace code to create a socket for sending/receiving data
+ * over an L2TP session:-
+ *
+ *      struct sockaddr_pppol2tp sax;
+ *      int fd;
+ *      int session_fd;
+ *
+ *      fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ *
+ *      sax.sa_family = AF_PPPOX;
+ *      sax.sa_protocol = PX_PROTO_OL2TP;
+ *      sax.pppol2tp.fd = tunnel_fd;    // bound UDP socket
+ *      sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ *      sax.pppol2tp.addr.sin_port = addr->sin_port;
+ *      sax.pppol2tp.addr.sin_family = AF_INET;
+ *      sax.pppol2tp.s_tunnel  = tunnel_id;
+ *      sax.pppol2tp.s_session = session_id;
+ *      sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+ *      sax.pppol2tp.d_session = peer_session_id;
+ *
+ *      session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
+ *
+ * A pppd plugin that allows PPP traffic to be carried over L2TP using
+ * this driver is available from the OpenL2TP project at
+ * http://openl2tp.sourceforge.net.
+ */
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/if_pppox.h>
+#include <linux/if_pppol2tp.h>
+#include <net/sock.h>
+#include <linux/ppp_channel.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/file.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/proc_fs.h>
+#include <linux/l2tp.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+#include "l2tp_core.h"
+#define PPPOL2TP_DRV_VERSION    "V2.0"
+/* Space for UDP, L2TP and PPP headers */
+#define PPPOL2TP_HEADER_OVERHEAD        40
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)                       \
+        do {                                                            \
+                if ((_mask) & (_type))                                  \
+                        printk(_lvl "PPPOL2TP: " _fmt, ##args);         \
+        } while (0)
+/* Number of bytes to build transmit L2TP headers.
+ * Unfortunately the size is different depending on whether sequence numbers
+ * are enabled.
+ */
+#define PPPOL2TP_L2TP_HDR_SIZE_SEQ              10
+#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ            6
+/* Private data of each session. This data lives at the end of struct
+ * l2tp_session, referenced via session->priv[].
+ */
+struct pppol2tp_session {
+        int                     owner;          /* pid that opened the socket */
+        struct sock             *sock;          /* Pointer to the session
+                                                 * PPPoX socket */
+        struct sock             *tunnel_sock;   /* Pointer to the tunnel UDP
+                                                 * socket */
+        int                     flags;          /* accessed by PPPIOCGFLAGS.
+                                                 * Unused. */
+};
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
+static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
+static const struct proto_ops pppol2tp_ops;
+/* Helpers to obtain tunnel/session contexts from sockets.
+ */
+static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+{
+        struct l2tp_session *session;
+        if (sk == NULL)
+                return NULL;
+        sock_hold(sk);
+        session = (struct l2tp_session *)(sk->sk_user_data);
+        if (session == NULL) {
+                sock_put(sk);
+                goto out;
+        }
+        BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+out:
+        return session;
+}
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
+{
+        /* Skip PPP header, if present.  In testing, Microsoft L2TP clients
+         * don't send the PPP header (PPP header compression enabled), but
+         * other clients can include the header. So we cope with both cases
+         * here. The PPP header is always FF03 when using L2TP.
+         *
+         * Note that skb->data[] isn't dereferenced from a u16 ptr here since
+         * the field may be unaligned.
+         */
+        if (!pskb_may_pull(skb, 2))
+                return 1;
+        if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+                skb_pull(skb, 2);
+        return 0;
+}
+/* Receive message. This is the recvmsg for the PPPoL2TP socket.
+ */
+static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
+                            struct msghdr *msg, size_t len,
+                            int flags)
+{
+        int err;
+        struct sk_buff *skb;
+        struct sock *sk = sock->sk;
+        err = -EIO;
+        if (sk->sk_state & PPPOX_BOUND)
+                goto end;
+        msg->msg_namelen = 0;
+        err = 0;
+        skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+                                flags & MSG_DONTWAIT, &err);
+        if (!skb)
+                goto end;
+        if (len > skb->len)
+                len = skb->len;
+        else if (len < skb->len)
+                msg->msg_flags |= MSG_TRUNC;
+        err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+        if (likely(err == 0))
+                err = len;
+        kfree_skb(skb);
+end:
+        return err;
+}
+static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
+{
+        struct pppol2tp_session *ps = l2tp_session_priv(session);
+        struct sock *sk = NULL;
+        /* If the socket is bound, send it in to PPP's input queue. Otherwise
+         * queue it on the session socket.
+         */
+        sk = ps->sock;
+        if (sk == NULL)
+                goto no_sock;
+        if (sk->sk_state & PPPOX_BOUND) {
+                struct pppox_sock *po;
+                PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+                       "%s: recv %d byte data frame, passing to ppp\n",
+                       session->name, data_len);
+                /* We need to forget all info related to the L2TP packet
+                 * gathered in the skb as we are going to reuse the same
+                 * skb for the inner packet.
+                 * Namely we need to:
+                 * - reset xfrm (IPSec) information as it applies to
+                 *   the outer L2TP packet and not to the inner one
+                 * - release the dst to force a route lookup on the inner
+                 *   IP packet since skb->dst currently points to the dst
+                 *   of the UDP tunnel
+                 * - reset netfilter information as it doesn't apply
+                 *   to the inner packet either
+                 */
+                secpath_reset(skb);
+                skb_dst_drop(skb);
+                nf_reset(skb);
+                po = pppox_sk(sk);
+                ppp_input(&po->chan, skb);
+        } else {
+                PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+                       "%s: socket not bound\n", session->name);
+                /* Not bound. Nothing we can do, so discard. */
+                session->stats.rx_errors++;
+                kfree_skb(skb);
+        }
+        return;
+no_sock:
+        PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+               "%s: no socket\n", session->name);
+        kfree_skb(skb);
+}
+static void pppol2tp_session_sock_hold(struct l2tp_session *session)
+{
+        struct pppol2tp_session *ps = l2tp_session_priv(session);
+        if (ps->sock)
+                sock_hold(ps->sock);
+}
+static void pppol2tp_session_sock_put(struct l2tp_session *session)
+{
+        struct pppol2tp_session *ps = l2tp_session_priv(session);
+        if (ps->sock)
+                sock_put(ps->sock);
+}
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
+ * when a user application does a sendmsg() on the session socket. L2TP and
+ * PPP headers must be inserted into the user's data.
+ */
+static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+                            size_t total_len)
+{
+        static const unsigned char ppph[2] = { 0xff, 0x03 };
+        struct sock *sk = sock->sk;
+        struct sk_buff *skb;
+        int error;
+        struct l2tp_session *session;
+        struct l2tp_tunnel *tunnel;
+        struct pppol2tp_session *ps;
+        int uhlen;
+        error = -ENOTCONN;
+        if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+                goto error;
+        /* Get session and tunnel contexts */
+        error = -EBADF;
+        session = pppol2tp_sock_to_session(sk);
+        if (session == NULL)
+                goto error;
+        ps = l2tp_session_priv(session);
+        tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+        if (tunnel == NULL)
+                goto error_put_sess;
+        uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+        /* Allocate a socket buffer */
+        error = -ENOMEM;
+        skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
+                           uhlen + session->hdr_len +
+                           sizeof(ppph) + total_len,
+                           0, GFP_KERNEL);
+        if (!skb)
+                goto error_put_sess_tun;
+        /* Reserve space for headers. */
+        skb_reserve(skb, NET_SKB_PAD);
+        skb_reset_network_header(skb);
+        skb_reserve(skb, sizeof(struct iphdr));
+        skb_reset_transport_header(skb);
+        skb_reserve(skb, uhlen);
+        /* Add PPP header */
+        skb->data[0] = ppph[0];
+        skb->data[1] = ppph[1];
+        skb_put(skb, 2);
+        /* Copy user data into skb */
+        error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
+        if (error < 0) {
+                kfree_skb(skb);
+                goto error_put_sess_tun;
+        }
+        skb_put(skb, total_len);
+        l2tp_xmit_skb(session, skb, session->hdr_len);
+        sock_put(ps->tunnel_sock);
+        return error;
+error_put_sess_tun:
+        sock_put(ps->tunnel_sock);
+error_put_sess:
+        sock_put(sk);
+error:
+        return error;
+}
+/* Transmit function called by generic PPP driver.  Sends PPP frame
+ * over PPPoL2TP socket.
+ *
+ * This is almost the same as pppol2tp_sendmsg(), but rather than
+ * being called with a msghdr from userspace, it is called with a skb
+ * from the kernel.
+ *
+ * The supplied skb from ppp doesn't have enough headroom for the
+ * insertion of L2TP, UDP and IP headers so we need to allocate more
+ * headroom in the skb. This will create a cloned skb. But we must be
+ * careful in the error case because the caller will expect to free
+ * the skb it supplied, not our cloned skb. So we take care to always
+ * leave the original skb unfreed if we return an error.
+ */
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+        static const u8 ppph[2] = { 0xff, 0x03 };
+        struct sock *sk = (struct sock *) chan->private;
+        struct sock *sk_tun;
+        struct l2tp_session *session;
+        struct l2tp_tunnel *tunnel;
+        struct pppol2tp_session *ps;
+        int old_headroom;
+        int new_headroom;
+        if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+                goto abort;
+        /* Get session and tunnel contexts from the socket */
+        session = pppol2tp_sock_to_session(sk);
+        if (session == NULL)
+                goto abort;
+        ps = l2tp_session_priv(session);
+        sk_tun = ps->tunnel_sock;
+        if (sk_tun == NULL)
+                goto abort_put_sess;
+        tunnel = l2tp_sock_to_tunnel(sk_tun);
+        if (tunnel == NULL)
+                goto abort_put_sess;
+        old_headroom = skb_headroom(skb);
+        if (skb_cow_head(skb, sizeof(ppph)))
+                goto abort_put_sess_tun;
+        new_headroom = skb_headroom(skb);
+        skb->truesize += new_headroom - old_headroom;
+        /* Setup PPP header */
+        __skb_push(skb, sizeof(ppph));
+        skb->data[0] = ppph[0];
+        skb->data[1] = ppph[1];
+        l2tp_xmit_skb(session, skb, session->hdr_len);
+        sock_put(sk_tun);
+        sock_put(sk);
+        return 1;
+abort_put_sess_tun:
+        sock_put(sk_tun);
+abort_put_sess:
+        sock_put(sk);
+abort:
+        /* Free the original skb */
+        kfree_skb(skb);
+        return 1;
+}
+/*****************************************************************************
+ * Session (and tunnel control) socket create/destroy.
+ *****************************************************************************/
+/* Called by l2tp_core when a session socket is being closed.
+ */
+static void pppol2tp_session_close(struct l2tp_session *session)
+{
+        struct pppol2tp_session *ps = l2tp_session_priv(session);
+        struct sock *sk = ps->sock;
+        struct sk_buff *skb;
+        BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+        if (session->session_id == 0)
+                goto out;
+        if (sk != NULL) {
+                lock_sock(sk);
+                if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+                        pppox_unbind_sock(sk);
+                        sk->sk_state = PPPOX_DEAD;
+                        sk->sk_state_change(sk);
+                }
+                /* Purge any queued data */
+                skb_queue_purge(&sk->sk_receive_queue);
+                skb_queue_purge(&sk->sk_write_queue);
+                while ((skb = skb_dequeue(&session->reorder_q))) {
+                        kfree_skb(skb);
+                        sock_put(sk);
+                }
+                release_sock(sk);
+        }
+out:
+        return;
+}
+/* Really kill the session socket. (Called from sock_put() if
+ * refcnt == 0.)
+ */
+static void pppol2tp_session_destruct(struct sock *sk)
+{
+        struct l2tp_session *session;
+        if (sk->sk_user_data != NULL) {
+                session = sk->sk_user_data;
+                if (session == NULL)
+                        goto out;
+                sk->sk_user_data = NULL;
+                BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+                l2tp_session_dec_refcount(session);
+        }
+out:
+        return;
+}
+/* Called when the PPPoX socket (session) is closed.
+ */
+static int pppol2tp_release(struct socket *sock)
+{
+        struct sock *sk = sock->sk;
+        struct l2tp_session *session;
+        int error;
+        if (!sk)
+                return 0;
+        error = -EBADF;
+        lock_sock(sk);
+        if (sock_flag(sk, SOCK_DEAD) != 0)
+                goto error;
+        pppox_unbind_sock(sk);
+        /* Signal the death of the socket. */
+        sk->sk_state = PPPOX_DEAD;
+        sock_orphan(sk);
+        sock->sk = NULL;
+        session = pppol2tp_sock_to_session(sk);
+        /* Purge any queued data */
+        skb_queue_purge(&sk->sk_receive_queue);
+        skb_queue_purge(&sk->sk_write_queue);
+        if (session != NULL) {
+                struct sk_buff *skb;
+                while ((skb = skb_dequeue(&session->reorder_q))) {
+                        kfree_skb(skb);
+                        sock_put(sk);
+                }
+                sock_put(sk);
+        }
+        release_sock(sk);
+        /* This will delete the session context via
+         * pppol2tp_session_destruct() if the socket's refcnt drops to
+         * zero.
+         */
+        sock_put(sk);
+        return 0;
+error:
+        release_sock(sk);
+        return error;
+}
+static struct proto pppol2tp_sk_proto = {
+        .name     = "PPPOL2TP",
+        .owner    = THIS_MODULE,
+        .obj_size = sizeof(struct pppox_sock),
+};
+static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb)
+{
+        int rc;
+        rc = l2tp_udp_encap_recv(sk, skb);
+        if (rc)
+                kfree_skb(skb);
+        return NET_RX_SUCCESS;
+}
+/* socket() handler. Initialize a new struct sock.
+ */
+static int pppol2tp_create(struct net *net, struct socket *sock)
+{
+        int error = -ENOMEM;
+        struct sock *sk;
+        sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
+        if (!sk)
+                goto out;
+        sock_init_data(sock, sk);
+        sock->state  = SS_UNCONNECTED;
+        sock->ops    = &pppol2tp_ops;
+        sk->sk_backlog_rcv = pppol2tp_backlog_recv;
+        sk->sk_protocol    = PX_PROTO_OL2TP;
+        sk->sk_family      = PF_PPPOX;
+        sk->sk_state       = PPPOX_NONE;
+        sk->sk_type        = SOCK_STREAM;
+        sk->sk_destruct    = pppol2tp_session_destruct;
+        error = 0;
+out:
+        return error;
+}
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+static void pppol2tp_show(struct seq_file *m, void *arg)
+{
+        struct l2tp_session *session = arg;
+        struct pppol2tp_session *ps = l2tp_session_priv(session);
+        if (ps) {
+                struct pppox_sock *po = pppox_sk(ps->sock);
+                if (po)
+                        seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
+        }
+}
+#endif
+/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
+ */
+static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+                            int sockaddr_len, int flags)
+{
+        struct sock *sk = sock->sk;
+        struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+        struct sockaddr_pppol2tpv3 *sp3 = (struct sockaddr_pppol2tpv3 *) uservaddr;
+        struct pppox_sock *po = pppox_sk(sk);
+        struct l2tp_session *session = NULL;
+        struct l2tp_tunnel *tunnel;
+        struct pppol2tp_session *ps;
+        struct dst_entry *dst;
+        struct l2tp_session_cfg cfg = { 0, };
+        int error = 0;
+        u32 tunnel_id, peer_tunnel_id;
+        u32 session_id, peer_session_id;
+        int ver = 2;
+        int fd;
+        lock_sock(sk);
+        error = -EINVAL;
+        if (sp->sa_protocol != PX_PROTO_OL2TP)
+                goto end;
+        /* Check for already bound sockets */
+        error = -EBUSY;
+        if (sk->sk_state & PPPOX_CONNECTED)
+                goto end;
+        /* We don't supporting rebinding anyway */
+        error = -EALREADY;
+        if (sk->sk_user_data)
+                goto end; /* socket is already attached */
+        /* Get params from socket address. Handle L2TPv2 and L2TPv3 */
+        if (sockaddr_len == sizeof(struct sockaddr_pppol2tp)) {
+                fd = sp->pppol2tp.fd;
+                tunnel_id = sp->pppol2tp.s_tunnel;
+                peer_tunnel_id = sp->pppol2tp.d_tunnel;
+                session_id = sp->pppol2tp.s_session;
+                peer_session_id = sp->pppol2tp.d_session;
+        } else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3)) {
+                ver = 3;
+                fd = sp3->pppol2tp.fd;
+                tunnel_id = sp3->pppol2tp.s_tunnel;
+                peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+                session_id = sp3->pppol2tp.s_session;
+                peer_session_id = sp3->pppol2tp.d_session;
+        } else {
+                error = -EINVAL;
+                goto end; /* bad socket address */
+        }
+        /* Don't bind if tunnel_id is 0 */
+        error = -EINVAL;
+        if (tunnel_id == 0)
+                goto end;
+        tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
+        /* Special case: create tunnel context if session_id and
+         * peer_session_id is 0. Otherwise look up tunnel using supplied
+         * tunnel id.
+         */
+        if ((session_id == 0) && (peer_session_id == 0)) {
+                if (tunnel == NULL) {
+                        struct l2tp_tunnel_cfg tcfg = {
+                                .encap = L2TP_ENCAPTYPE_UDP,
+                                .debug = 0,
+                        };
+                        error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
+                        if (error < 0)
+                                goto end;
+                }
+        } else {
+                /* Error if we can't find the tunnel */
+                error = -ENOENT;
+                if (tunnel == NULL)
+                        goto end;
+                /* Error if socket is not prepped */
+                if (tunnel->sock == NULL)
+                        goto end;
+        }
+        if (tunnel->recv_payload_hook == NULL)
+                tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
+        if (tunnel->peer_tunnel_id == 0) {
+                if (ver == 2)
+                        tunnel->peer_tunnel_id = sp->pppol2tp.d_tunnel;
+                else
+                        tunnel->peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+        }
+        /* Create session if it doesn't already exist. We handle the
+         * case where a session was previously created by the netlink
+         * interface by checking that the session doesn't already have
+         * a socket and its tunnel socket are what we expect. If any
+         * of those checks fail, return EEXIST to the caller.
+         */
+        session = l2tp_session_find(sock_net(sk), tunnel, session_id);
+        if (session == NULL) {
+                /* Default MTU must allow space for UDP/L2TP/PPP
+                 * headers.
+                 */
+                cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+                /* Allocate and initialize a new session context. */
+                session = l2tp_session_create(sizeof(struct pppol2tp_session),
+                                              tunnel, session_id,
+                                              peer_session_id, &cfg);
+                if (session == NULL) {
+                        error = -ENOMEM;
+                        goto end;
+                }
+        } else {
+                ps = l2tp_session_priv(session);
+                error = -EEXIST;
+                if (ps->sock != NULL)
+                        goto end;
+                /* consistency checks */
+                if (ps->tunnel_sock != tunnel->sock)
+                        goto end;
+        }
+        /* Associate session with its PPPoL2TP socket */
+        ps = l2tp_session_priv(session);
+        ps->owner            = current->pid;
+        ps->sock             = sk;
+        ps->tunnel_sock = tunnel->sock;
+        session->recv_skb       = pppol2tp_recv;
+        session->session_close  = pppol2tp_session_close;
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+        session->show           = pppol2tp_show;
+#endif
+        /* We need to know each time a skb is dropped from the reorder
+         * queue.
+         */
+        session->ref = pppol2tp_session_sock_hold;
+        session->deref = pppol2tp_session_sock_put;
+        /* If PMTU discovery was enabled, use the MTU that was discovered */
+        dst = sk_dst_get(sk);
+        if (dst != NULL) {
+                u32 pmtu = dst_mtu(__sk_dst_get(sk));
+                if (pmtu != 0)
+                        session->mtu = session->mru = pmtu -
+                                PPPOL2TP_HEADER_OVERHEAD;
+                dst_release(dst);
+        }
+        /* Special case: if source & dest session_id == 0x0000, this
+         * socket is being created to manage the tunnel. Just set up
+         * the internal context for use by ioctl() and sockopt()
+         * handlers.
+         */
+        if ((session->session_id == 0) &&
+            (session->peer_session_id == 0)) {
+                error = 0;
+                goto out_no_ppp;
+        }
+        /* The only header we need to worry about is the L2TP
+         * header. This size is different depending on whether
+         * sequence numbers are enabled for the data channel.
+         */
+        po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+        po->chan.private = sk;
+        po->chan.ops     = &pppol2tp_chan_ops;
+        po->chan.mtu     = session->mtu;
+        error = ppp_register_net_channel(sock_net(sk), &po->chan);
+        if (error)
+                goto end;
+out_no_ppp:
+        /* This is how we get the session context from the socket. */
+        sk->sk_user_data = session;
+        sk->sk_state = PPPOX_CONNECTED;
+        PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+               "%s: created\n", session->name);
+end:
+        release_sock(sk);
+        return error;
+}
+#ifdef CONFIG_L2TP_V3
+/* Called when creating sessions via the netlink interface.
+ */
+static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+        int error;
+        struct l2tp_tunnel *tunnel;
+        struct l2tp_session *session;
+        struct pppol2tp_session *ps;
+        tunnel = l2tp_tunnel_find(net, tunnel_id);
+        /* Error if we can't find the tunnel */
+        error = -ENOENT;
+        if (tunnel == NULL)
+                goto out;
+        /* Error if tunnel socket is not prepped */
+        if (tunnel->sock == NULL)
+                goto out;
+        /* Check that this session doesn't already exist */
+        error = -EEXIST;
+        session = l2tp_session_find(net, tunnel, session_id);
+        if (session != NULL)
+                goto out;
+        /* Default MTU values. */
+        if (cfg->mtu == 0)
+                cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+        if (cfg->mru == 0)
+                cfg->mru = cfg->mtu;
+        /* Allocate and initialize a new session context. */
+        error = -ENOMEM;
+        session = l2tp_session_create(sizeof(struct pppol2tp_session),
+                                      tunnel, session_id,
+                                      peer_session_id, cfg);
+        if (session == NULL)
+                goto out;
+        ps = l2tp_session_priv(session);
+        ps->tunnel_sock = tunnel->sock;
+        PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+               "%s: created\n", session->name);
+        error = 0;
+out:
+        return error;
+}
+/* Called when deleting sessions via the netlink interface.
+ */
+static int pppol2tp_session_delete(struct l2tp_session *session)
+{
+        struct pppol2tp_session *ps = l2tp_session_priv(session);
+        if (ps->sock == NULL)
+                l2tp_session_dec_refcount(session);
+        return 0;
+}
+#endif /* CONFIG_L2TP_V3 */
+/* getname() support.
+ */
+static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
+                            int *usockaddr_len, int peer)
+{
+        int len = 0;
+        int error = 0;
+        struct l2tp_session *session;
+        struct l2tp_tunnel *tunnel;
+        struct sock *sk = sock->sk;
+        struct inet_sock *inet;
+        struct pppol2tp_session *pls;
+        error = -ENOTCONN;
+        if (sk == NULL)
+                goto end;
+        if (sk->sk_state != PPPOX_CONNECTED)
+                goto end;
+        error = -EBADF;
+        session = pppol2tp_sock_to_session(sk);
+        if (session == NULL)
+                goto end;
+        pls = l2tp_session_priv(session);
+        tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
+        if (tunnel == NULL) {
+                error = -EBADF;
+                goto end_put_sess;
+        }
+        inet = inet_sk(sk);
+        if (tunnel->version == 2) {
+                struct sockaddr_pppol2tp sp;
+                len = sizeof(sp);
+                memset(&sp, 0, len);
+                sp.sa_family    = AF_PPPOX;
+                sp.sa_protocol  = PX_PROTO_OL2TP;
+                sp.pppol2tp.fd  = tunnel->fd;
+                sp.pppol2tp.pid = pls->owner;
+                sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+                sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+                sp.pppol2tp.s_session = session->session_id;
+                sp.pppol2tp.d_session = session->peer_session_id;
+                sp.pppol2tp.addr.sin_family = AF_INET;
+                sp.pppol2tp.addr.sin_port = inet->inet_dport;
+                sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+                memcpy(uaddr, &sp, len);
+        } else if (tunnel->version == 3) {
+                struct sockaddr_pppol2tpv3 sp;
+                len = sizeof(sp);
+                memset(&sp, 0, len);
+                sp.sa_family    = AF_PPPOX;
+                sp.sa_protocol  = PX_PROTO_OL2TP;
+                sp.pppol2tp.fd  = tunnel->fd;
+                sp.pppol2tp.pid = pls->owner;
+                sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+                sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+                sp.pppol2tp.s_session = session->session_id;
+                sp.pppol2tp.d_session = session->peer_session_id;
+                sp.pppol2tp.addr.sin_family = AF_INET;
+                sp.pppol2tp.addr.sin_port = inet->inet_dport;
+                sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+                memcpy(uaddr, &sp, len);
+        }
+        *usockaddr_len = len;
+        sock_put(pls->tunnel_sock);
+end_put_sess:
+        sock_put(sk);
+        error = 0;
+end:
+        return error;
+}
+/****************************************************************************
+ * ioctl() handlers.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. However, in order to control kernel tunnel features, we allow
+ * userspace to create a special "tunnel" PPPoX socket which is used for
+ * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
+ * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
+ * calls.
+ ****************************************************************************/
+static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
+                                struct l2tp_stats *stats)
+{
+        dest->tx_packets = stats->tx_packets;
+        dest->tx_bytes = stats->tx_bytes;
+        dest->tx_errors = stats->tx_errors;
+        dest->rx_packets = stats->rx_packets;
+        dest->rx_bytes = stats->rx_bytes;
+        dest->rx_seq_discards = stats->rx_seq_discards;
+        dest->rx_oos_packets = stats->rx_oos_packets;
+        dest->rx_errors = stats->rx_errors;
+}
+/* Session ioctl helper.
+ */
+static int pppol2tp_session_ioctl(struct l2tp_session *session,
+                                  unsigned int cmd, unsigned long arg)
+{
+        struct ifreq ifr;
+        int err = 0;
+        struct sock *sk;
+        int val = (int) arg;
+        struct pppol2tp_session *ps = l2tp_session_priv(session);
+        struct l2tp_tunnel *tunnel = session->tunnel;
+        struct pppol2tp_ioc_stats stats;
+        PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+               "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
+               session->name, cmd, arg);
+        sk = ps->sock;
+        sock_hold(sk);
+        switch (cmd) {
+        case SIOCGIFMTU:
+                err = -ENXIO;
+                if (!(sk->sk_state & PPPOX_CONNECTED))
+                        break;
+                err = -EFAULT;
+                if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+                        break;
+                ifr.ifr_mtu = session->mtu;
+                if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
+                        break;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get mtu=%d\n", session->name, session->mtu);
+                err = 0;
+                break;
+        case SIOCSIFMTU:
+                err = -ENXIO;
+                if (!(sk->sk_state & PPPOX_CONNECTED))
+                        break;
+                err = -EFAULT;
+                if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+                        break;
+                session->mtu = ifr.ifr_mtu;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: set mtu=%d\n", session->name, session->mtu);
+                err = 0;
+                break;
+        case PPPIOCGMRU:
+                err = -ENXIO;
+                if (!(sk->sk_state & PPPOX_CONNECTED))
+                        break;
+                err = -EFAULT;
+                if (put_user(session->mru, (int __user *) arg))
+                        break;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get mru=%d\n", session->name, session->mru);
+                err = 0;
+                break;
+        case PPPIOCSMRU:
+                err = -ENXIO;
+                if (!(sk->sk_state & PPPOX_CONNECTED))
+                        break;
+                err = -EFAULT;
+                if (get_user(val, (int __user *) arg))
+                        break;
+                session->mru = val;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: set mru=%d\n", session->name, session->mru);
+                err = 0;
+                break;
+        case PPPIOCGFLAGS:
+                err = -EFAULT;
+                if (put_user(ps->flags, (int __user *) arg))
+                        break;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get flags=%d\n", session->name, ps->flags);
+                err = 0;
+                break;
+        case PPPIOCSFLAGS:
+                err = -EFAULT;
+                if (get_user(val, (int __user *) arg))
+                        break;
+                ps->flags = val;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: set flags=%d\n", session->name, ps->flags);
+                err = 0;
+                break;
+        case PPPIOCGL2TPSTATS:
+                err = -ENXIO;
+                if (!(sk->sk_state & PPPOX_CONNECTED))
+                        break;
+                memset(&stats, 0, sizeof(stats));
+                stats.tunnel_id = tunnel->tunnel_id;
+                stats.session_id = session->session_id;
+                pppol2tp_copy_stats(&stats, &session->stats);
+                if (copy_to_user((void __user *) arg, &stats,
+                                 sizeof(stats)))
+                        break;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get L2TP stats\n", session->name);
+                err = 0;
+                break;
+        default:
+                err = -ENOSYS;
+                break;
+        }
+        sock_put(sk);
+        return err;
+}
+/* Tunnel ioctl helper.
+ *
+ * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
+ * specifies a session_id, the session ioctl handler is called. This allows an
+ * application to retrieve session stats via a tunnel socket.
+ */
+static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
+                                 unsigned int cmd, unsigned long arg)
+{
+        int err = 0;
+        struct sock *sk;
+        struct pppol2tp_ioc_stats stats;
+        PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+               "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n",
+               tunnel->name, cmd, arg);
+        sk = tunnel->sock;
+        sock_hold(sk);
+        switch (cmd) {
+        case PPPIOCGL2TPSTATS:
+                err = -ENXIO;
+                if (!(sk->sk_state & PPPOX_CONNECTED))
+                        break;
+                if (copy_from_user(&stats, (void __user *) arg,
+                                   sizeof(stats))) {
+                        err = -EFAULT;
+                        break;
+                }
+                if (stats.session_id != 0) {
+                        /* resend to session ioctl handler */
+                        struct l2tp_session *session =
+                                l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
+                        if (session != NULL)
+                                err = pppol2tp_session_ioctl(session, cmd, arg);
+                        else
+                                err = -EBADR;
+                        break;
+                }
+#ifdef CONFIG_XFRM
+                stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
+#endif
+                pppol2tp_copy_stats(&stats, &tunnel->stats);
+                if (copy_to_user((void __user *) arg, &stats, sizeof(stats))) {
+                        err = -EFAULT;
+                        break;
+                }
+                PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get L2TP stats\n", tunnel->name);
+                err = 0;
+                break;
+        default:
+                err = -ENOSYS;
+                break;
+        }
+        sock_put(sk);
+        return err;
+}
+/* Main ioctl() handler.
+ * Dispatch to tunnel or session helpers depending on the socket.
+ */
+static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
+                          unsigned long arg)
+{
+        struct sock *sk = sock->sk;
+        struct l2tp_session *session;
+        struct l2tp_tunnel *tunnel;
+        struct pppol2tp_session *ps;
+        int err;
+        if (!sk)
+                return 0;
+        err = -EBADF;
+        if (sock_flag(sk, SOCK_DEAD) != 0)
+                goto end;
+        err = -ENOTCONN;
+        if ((sk->sk_user_data == NULL) ||
+            (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
+                goto end;
+        /* Get session context from the socket */
+        err = -EBADF;
+        session = pppol2tp_sock_to_session(sk);
+        if (session == NULL)
+                goto end;
+        /* Special case: if session's session_id is zero, treat ioctl as a
+         * tunnel ioctl
+         */
+        ps = l2tp_session_priv(session);
+        if ((session->session_id == 0) &&
+            (session->peer_session_id == 0)) {
+                err = -EBADF;
+                tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+                if (tunnel == NULL)
+                        goto end_put_sess;
+                err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
+                sock_put(ps->tunnel_sock);
+                goto end_put_sess;
+        }
+        err = pppol2tp_session_ioctl(session, cmd, arg);
+end_put_sess:
+        sock_put(sk);
+end:
+        return err;
+}
+/*****************************************************************************
+ * setsockopt() / getsockopt() support.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. In order to control kernel tunnel features, we allow userspace to
+ * create a special "tunnel" PPPoX socket which is used for control only.
+ * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
+ * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
+ *****************************************************************************/
+/* Tunnel setsockopt() helper.
+ */
+static int pppol2tp_tunnel_setsockopt(struct sock *sk,
+                                      struct l2tp_tunnel *tunnel,
+                                      int optname, int val)
+{
+        int err = 0;
+        switch (optname) {
+        case PPPOL2TP_SO_DEBUG:
+                tunnel->debug = val;
+                PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
+                break;
+        default:
+                err = -ENOPROTOOPT;
+                break;
+        }
+        return err;
+}
+/* Session setsockopt helper.
+ */
+static int pppol2tp_session_setsockopt(struct sock *sk,
+                                       struct l2tp_session *session,
+                                       int optname, int val)
+{
+        int err = 0;
+        struct pppol2tp_session *ps = l2tp_session_priv(session);
+        switch (optname) {
+        case PPPOL2TP_SO_RECVSEQ:
+                if ((val != 0) && (val != 1)) {
+                        err = -EINVAL;
+                        break;
+                }
+                session->recv_seq = val ? -1 : 0;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: set recv_seq=%d\n", session->name, session->recv_seq);
+                break;
+        case PPPOL2TP_SO_SENDSEQ:
+                if ((val != 0) && (val != 1)) {
+                        err = -EINVAL;
+                        break;
+                }
+                session->send_seq = val ? -1 : 0;
+                {
+                        struct sock *ssk      = ps->sock;
+                        struct pppox_sock *po = pppox_sk(ssk);
+                        po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
+                                PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+                }
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: set send_seq=%d\n", session->name, session->send_seq);
+                break;
+        case PPPOL2TP_SO_LNSMODE:
+                if ((val != 0) && (val != 1)) {
+                        err = -EINVAL;
+                        break;
+                }
+                session->lns_mode = val ? -1 : 0;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: set lns_mode=%d\n", session->name, session->lns_mode);
+                break;
+        case PPPOL2TP_SO_DEBUG:
+                session->debug = val;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: set debug=%x\n", session->name, session->debug);
+                break;
+        case PPPOL2TP_SO_REORDERTO:
+                session->reorder_timeout = msecs_to_jiffies(val);
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: set reorder_timeout=%d\n", session->name, session->reorder_timeout);
+                break;
+        default:
+                err = -ENOPROTOOPT;
+                break;
+        }
+        return err;
+}
+/* Main setsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session setsockopt
+ * handler, according to whether the PPPoL2TP socket is a for a regular
+ * session or the special tunnel type.
+ */
+static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
+                               char __user *optval, unsigned int optlen)
+{
+        struct sock *sk = sock->sk;
+        struct l2tp_session *session;
+        struct l2tp_tunnel *tunnel;
+        struct pppol2tp_session *ps;
+        int val;
+        int err;
+        if (level != SOL_PPPOL2TP)
+                return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+        if (optlen < sizeof(int))
+                return -EINVAL;
+        if (get_user(val, (int __user *)optval))
+                return -EFAULT;
+        err = -ENOTCONN;
+        if (sk->sk_user_data == NULL)
+                goto end;
+        /* Get session context from the socket */
+        err = -EBADF;
+        session = pppol2tp_sock_to_session(sk);
+        if (session == NULL)
+                goto end;
+        /* Special case: if session_id == 0x0000, treat as operation on tunnel
+         */
+        ps = l2tp_session_priv(session);
+        if ((session->session_id == 0) &&
+            (session->peer_session_id == 0)) {
+                err = -EBADF;
+                tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+                if (tunnel == NULL)
+                        goto end_put_sess;
+                err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
+                sock_put(ps->tunnel_sock);
+        } else
+                err = pppol2tp_session_setsockopt(sk, session, optname, val);
+        err = 0;
+end_put_sess:
+        sock_put(sk);
+end:
+        return err;
+}
+/* Tunnel getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_tunnel_getsockopt(struct sock *sk,
+                                      struct l2tp_tunnel *tunnel,
+                                      int optname, int *val)
+{
+        int err = 0;
+        switch (optname) {
+        case PPPOL2TP_SO_DEBUG:
+                *val = tunnel->debug;
+                PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
+                break;
+        default:
+                err = -ENOPROTOOPT;
+                break;
+        }
+        return err;
+}
+/* Session getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_session_getsockopt(struct sock *sk,
+                                       struct l2tp_session *session,
+                                       int optname, int *val)
+{
+        int err = 0;
+        switch (optname) {
+        case PPPOL2TP_SO_RECVSEQ:
+                *val = session->recv_seq;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get recv_seq=%d\n", session->name, *val);
+                break;
+        case PPPOL2TP_SO_SENDSEQ:
+                *val = session->send_seq;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get send_seq=%d\n", session->name, *val);
+                break;
+        case PPPOL2TP_SO_LNSMODE:
+                *val = session->lns_mode;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get lns_mode=%d\n", session->name, *val);
+                break;
+        case PPPOL2TP_SO_DEBUG:
+                *val = session->debug;
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get debug=%d\n", session->name, *val);
+                break;
+        case PPPOL2TP_SO_REORDERTO:
+                *val = (int) jiffies_to_msecs(session->reorder_timeout);
+                PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+                       "%s: get reorder_timeout=%d\n", session->name, *val);
+                break;
+        default:
+                err = -ENOPROTOOPT;
+        }
+        return err;
+}
+/* Main getsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session getsockopt
+ * handler, according to whether the PPPoX socket is a for a regular session
+ * or the special tunnel type.
+ */
+static int pppol2tp_getsockopt(struct socket *sock, int level,
+                               int optname, char __user *optval, int __user *optlen)
+{
+        struct sock *sk = sock->sk;
+        struct l2tp_session *session;
+        struct l2tp_tunnel *tunnel;
+        int val, len;
+        int err;
+        struct pppol2tp_session *ps;
+        if (level != SOL_PPPOL2TP)
+                return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+        if (get_user(len, (int __user *) optlen))
+                return -EFAULT;
+        len = min_t(unsigned int, len, sizeof(int));
+        if (len < 0)
+                return -EINVAL;
+        err = -ENOTCONN;
+        if (sk->sk_user_data == NULL)
+                goto end;
+        /* Get the session context */
+        err = -EBADF;
+        session = pppol2tp_sock_to_session(sk);
+        if (session == NULL)
+                goto end;
+        /* Special case: if session_id == 0x0000, treat as operation on tunnel */
+        ps = l2tp_session_priv(session);
+        if ((session->session_id == 0) &&
+            (session->peer_session_id == 0)) {
+                err = -EBADF;
+                tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+                if (tunnel == NULL)
+                        goto end_put_sess;
+                err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
+                sock_put(ps->tunnel_sock);
+        } else
+                err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+        err = -EFAULT;
+        if (put_user(len, (int __user *) optlen))
+                goto end_put_sess;
+        if (copy_to_user((void __user *) optval, &val, len))
+                goto end_put_sess;
+        err = 0;
+end_put_sess:
+        sock_put(sk);
+end:
+        return err;
+}
+/*****************************************************************************
+ * /proc filesystem for debug
+ * Since the original pppol2tp driver provided /proc/net/pppol2tp for
+ * L2TPv2, we dump only L2TPv2 tunnels and sessions here.
+ *****************************************************************************/
+static unsigned int pppol2tp_net_id;
+#ifdef CONFIG_PROC_FS
+struct pppol2tp_seq_data {
+        struct seq_net_private p;
+        int tunnel_idx;                 /* current tunnel */
+        int session_idx;                /* index of session within current tunnel */
+        struct l2tp_tunnel *tunnel;
+        struct l2tp_session *session;   /* NULL means get next tunnel */
+};
+static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
+{
+        for (;;) {
+                pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
+                pd->tunnel_idx++;
+                if (pd->tunnel == NULL)
+                        break;
+                /* Ignore L2TPv3 tunnels */
+                if (pd->tunnel->version < 3)
+                        break;
+        }
+}
+static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
+{
+        pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+        pd->session_idx++;
+        if (pd->session == NULL) {
+                pd->session_idx = 0;
+                pppol2tp_next_tunnel(net, pd);
+        }
+}
+static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
+{
+        struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
+        loff_t pos = *offs;
+        struct net *net;
+        if (!pos)
+                goto out;
+        BUG_ON(m->private == NULL);
+        pd = m->private;
+        net = seq_file_net(m);
+        if (pd->tunnel == NULL)
+                pppol2tp_next_tunnel(net, pd);
+        else
+                pppol2tp_next_session(net, pd);
+        /* NULL tunnel and session indicates end of list */
+        if ((pd->tunnel == NULL) && (pd->session == NULL))
+                pd = NULL;
+out:
+        return pd;
+}
+static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+        (*pos)++;
+        return NULL;
+}
+static void pppol2tp_seq_stop(struct seq_file *p, void *v)
+{
+        /* nothing to do */
+}
+static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
+{
+        struct l2tp_tunnel *tunnel = v;
+        seq_printf(m, "\nTUNNEL '%s', %c %d\n",
+                   tunnel->name,
+                   (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
+                   atomic_read(&tunnel->ref_count) - 1);
+        seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+                   tunnel->debug,
+                   (unsigned long long)tunnel->stats.tx_packets,
+                   (unsigned long long)tunnel->stats.tx_bytes,
+                   (unsigned long long)tunnel->stats.tx_errors,
+                   (unsigned long long)tunnel->stats.rx_packets,
+                   (unsigned long long)tunnel->stats.rx_bytes,
+                   (unsigned long long)tunnel->stats.rx_errors);
+}
+static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
+{
+        struct l2tp_session *session = v;
+        struct l2tp_tunnel *tunnel = session->tunnel;
+        struct pppol2tp_session *ps = l2tp_session_priv(session);
+        struct pppox_sock *po = pppox_sk(ps->sock);
+        u32 ip = 0;
+        u16 port = 0;
+        if (tunnel->sock) {
+                struct inet_sock *inet = inet_sk(tunnel->sock);
+                ip = ntohl(inet->inet_saddr);
+                port = ntohs(inet->inet_sport);
+        }
+        seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
+                   "%04X/%04X %d %c\n",
+                   session->name, ip, port,
+                   tunnel->tunnel_id,
+                   session->session_id,
+                   tunnel->peer_tunnel_id,
+                   session->peer_session_id,
+                   ps->sock->sk_state,
+                   (session == ps->sock->sk_user_data) ?
+                   'Y' : 'N');
+        seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
+                   session->mtu, session->mru,
+                   session->recv_seq ? 'R' : '-',
+                   session->send_seq ? 'S' : '-',
+                   session->lns_mode ? "LNS" : "LAC",
+                   session->debug,
+                   jiffies_to_msecs(session->reorder_timeout));
+        seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+                   session->nr, session->ns,
+                   (unsigned long long)session->stats.tx_packets,
+                   (unsigned long long)session->stats.tx_bytes,
+                   (unsigned long long)session->stats.tx_errors,
+                   (unsigned long long)session->stats.rx_packets,
+                   (unsigned long long)session->stats.rx_bytes,
+                   (unsigned long long)session->stats.rx_errors);
+        if (po)
+                seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
+}
+static int pppol2tp_seq_show(struct seq_file *m, void *v)
+{
+        struct pppol2tp_seq_data *pd = v;
+        /* display header on line 1 */
+        if (v == SEQ_START_TOKEN) {
+                seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
+                seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
+                seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+                seq_puts(m, "  SESSION name, addr/port src-tid/sid "
+                         "dest-tid/sid state user-data-ok\n");
+                seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
+                seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+                goto out;
+        }
+        /* Show the tunnel or session context.
+         */
+        if (pd->session == NULL)
+                pppol2tp_seq_tunnel_show(m, pd->tunnel);
+        else
+                pppol2tp_seq_session_show(m, pd->session);
+out:
+        return 0;
+}
+static const struct seq_operations pppol2tp_seq_ops = {
+        .start          = pppol2tp_seq_start,
+        .next           = pppol2tp_seq_next,
+        .stop           = pppol2tp_seq_stop,
+        .show           = pppol2tp_seq_show,
+};
+/* Called when our /proc file is opened. We allocate data for use when
+ * iterating our tunnel / session contexts and store it in the private
+ * data of the seq_file.
+ */
+static int pppol2tp_proc_open(struct inode *inode, struct file *file)
+{
+        return seq_open_net(inode, file, &pppol2tp_seq_ops,
+                            sizeof(struct pppol2tp_seq_data));
+}
+static const struct file_operations pppol2tp_proc_fops = {
+        .owner          = THIS_MODULE,
+        .open           = pppol2tp_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release_net,
+};
+#endif /* CONFIG_PROC_FS */
+/*****************************************************************************
+ * Network namespace
+ *****************************************************************************/
+static __net_init int pppol2tp_init_net(struct net *net)
+{
+        struct proc_dir_entry *pde;
+        int err = 0;
+        pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
+        if (!pde) {
+                err = -ENOMEM;
+                goto out;
+        }
+out:
+        return err;
+}
+static __net_exit void pppol2tp_exit_net(struct net *net)
+{
+        proc_net_remove(net, "pppol2tp");
+}
+static struct pernet_operations pppol2tp_net_ops = {
+        .init = pppol2tp_init_net,
+        .exit = pppol2tp_exit_net,
+        .id   = &pppol2tp_net_id,
+};
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+static const struct proto_ops pppol2tp_ops = {
+        .family         = AF_PPPOX,
+        .owner          = THIS_MODULE,
+        .release        = pppol2tp_release,
+        .bind           = sock_no_bind,
+        .connect        = pppol2tp_connect,
+        .socketpair     = sock_no_socketpair,
+        .accept         = sock_no_accept,
+        .getname        = pppol2tp_getname,
+        .poll           = datagram_poll,
+        .listen         = sock_no_listen,
+        .shutdown       = sock_no_shutdown,
+        .setsockopt     = pppol2tp_setsockopt,
+        .getsockopt     = pppol2tp_getsockopt,
+        .sendmsg        = pppol2tp_sendmsg,
+        .recvmsg        = pppol2tp_recvmsg,
+        .mmap           = sock_no_mmap,
+        .ioctl          = pppox_ioctl,
+};
+static struct pppox_proto pppol2tp_proto = {
+        .create         = pppol2tp_create,
+        .ioctl          = pppol2tp_ioctl
+};
+#ifdef CONFIG_L2TP_V3
+static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = {
+        .session_create = pppol2tp_session_create,
+        .session_delete = pppol2tp_session_delete,
+};
+#endif /* CONFIG_L2TP_V3 */
+static int __init pppol2tp_init(void)
+{
+        int err;
+        err = register_pernet_device(&pppol2tp_net_ops);
+        if (err)
+                goto out;
+        err = proto_register(&pppol2tp_sk_proto, 0);
+        if (err)
+                goto out_unregister_pppol2tp_pernet;
+        err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
+        if (err)
+                goto out_unregister_pppol2tp_proto;
+#ifdef CONFIG_L2TP_V3
+        err = l2tp_nl_register_ops(L2TP_PWTYPE_PPP, &pppol2tp_nl_cmd_ops);
+        if (err)
+                goto out_unregister_pppox;
+#endif
+        printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
+               PPPOL2TP_DRV_VERSION);
+out:
+        return err;
+#ifdef CONFIG_L2TP_V3
+out_unregister_pppox:
+        unregister_pppox_proto(PX_PROTO_OL2TP);
+#endif
+out_unregister_pppol2tp_proto:
+        proto_unregister(&pppol2tp_sk_proto);
+out_unregister_pppol2tp_pernet:
+        unregister_pernet_device(&pppol2tp_net_ops);
+        goto out;
+}
+static void __exit pppol2tp_exit(void)
+{
+#ifdef CONFIG_L2TP_V3
+        l2tp_nl_unregister_ops(L2TP_PWTYPE_PPP);
+#endif
+        unregister_pppox_proto(PX_PROTO_OL2TP);
+        proto_unregister(&pppol2tp_sk_proto);
+        unregister_pernet_device(&pppol2tp_net_ops);
+}
+module_init(pppol2tp_init);
+module_exit(pppol2tp_exit);
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("PPP over L2TP over UDP");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PPPOL2TP_DRV_VERSION);
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 78167e81dfeb..2bb0ddff8c0f 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -144,12 +144,6 @@ static struct packet_type llc_tr_packet_type __read_mostly = {
 static int __init llc_init(void)
 {
-        struct net_device *dev;
-        dev = first_net_device(&init_net);
-        if (dev != NULL)
-                dev = next_net_device(dev);
        dev_add_pack(&llc_packet_type);
        dev_add_pack(&llc_tr_packet_type);
        return 0;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index a952b7f8c648..334c359da5e8 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -15,8 +15,12 @@ comment "CFG80211 needs to be enabled for MAC80211"
 if MAC80211 != n
+config MAC80211_HAS_RC
+        def_bool n
 config MAC80211_RC_PID
        bool "PID controller based rate control algorithm" if EMBEDDED
+        select MAC80211_HAS_RC
        ---help---
          This option enables a TX rate control algorithm for
          mac80211 that uses a PID controller to select the TX
@@ -24,12 +28,14 @@ config MAC80211_RC_PID
 config MAC80211_RC_MINSTREL
        bool "Minstrel" if EMBEDDED
+        select MAC80211_HAS_RC
        default y
        ---help---
          This option enables the 'minstrel' TX rate control algorithm
 choice
        prompt "Default rate control algorithm"
+        depends on MAC80211_HAS_RC
        default MAC80211_RC_DEFAULT_MINSTREL
        ---help---
          This option selects the default rate control algorithm
@@ -62,6 +68,9 @@ config MAC80211_RC_DEFAULT
 endif
+comment "Some wireless drivers require a rate control algorithm"
+        depends on MAC80211_HAS_RC=n
 config MAC80211_MESH
        bool "Enable mac80211 mesh networking (pre-802.11s) support"
        depends on MAC80211 && EXPERIMENTAL
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index edc872e22c9b..c41aaba839fa 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1403,6 +1403,32 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
        return 0;
 }
+static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
+                                         struct net_device *dev,
+                                         s32 rssi_thold, u32 rssi_hyst)
+{
+        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+        struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+        struct ieee80211_vif *vif = &sdata->vif;
+        struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+        if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI))
+                return -EOPNOTSUPP;
+        if (rssi_thold == bss_conf->cqm_rssi_thold &&
+            rssi_hyst == bss_conf->cqm_rssi_hyst)
+                return 0;
+        bss_conf->cqm_rssi_thold = rssi_thold;
+        bss_conf->cqm_rssi_hyst = rssi_hyst;
+        /* tell the driver upon association, unless already associated */
+        if (sdata->u.mgd.associated)
+                ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
+        return 0;
+}
 static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
                                      struct net_device *dev,
                                      const u8 *addr,
@@ -1507,4 +1533,5 @@ struct cfg80211_ops mac80211_config_ops = {
        .remain_on_channel = ieee80211_remain_on_channel,
        .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
        .action = ieee80211_action,
+        .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
 };
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index d92800bb2d2f..23e720034577 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -57,7 +57,6 @@ STA_FILE(tx_filtered, tx_filtered_count, LU);
 STA_FILE(tx_retry_failed, tx_retry_failed, LU);
 STA_FILE(tx_retry_count, tx_retry_count, LU);
 STA_FILE(last_signal, last_signal, D);
-STA_FILE(last_noise, last_noise, D);
 STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU);
 static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
@@ -289,7 +288,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
        DEBUGFS_ADD(tx_retry_failed);
        DEBUGFS_ADD(tx_retry_count);
        DEBUGFS_ADD(last_signal);
-        DEBUGFS_ADD(last_noise);
        DEBUGFS_ADD(wep_weak_iv_count);
        DEBUGFS_ADD(ht_capa);
 }
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c3d844093a2f..9179196da264 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -84,16 +84,14 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 }
 static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
-                                        int mc_count,
+                                        struct netdev_hw_addr_list *mc_list)
-                                        struct dev_addr_list *mc_list)
 {
        u64 ret = 0;
        if (local->ops->prepare_multicast)
-                ret = local->ops->prepare_multicast(&local->hw, mc_count,
+                ret = local->ops->prepare_multicast(&local->hw, mc_list);
-                                                    mc_list);
-        trace_drv_prepare_multicast(local, mc_count, ret);
+        trace_drv_prepare_multicast(local, mc_list->count, ret);
        return ret;
 }
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index e2976da4e0d9..e6f3b0c7a71f 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -265,17 +265,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
                        sta->sta.supp_rates[band] = supp_rates |
                                ieee80211_mandatory_rates(local, band);
+                        if (sta->sta.supp_rates[band] != prev_rates) {
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
-                        if (sta->sta.supp_rates[band] != prev_rates)
                                printk(KERN_DEBUG "%s: updated supp_rates set "
-                                    "for %pM based on beacon info (0x%llx | "
+                                    "for %pM based on beacon/probe_response "
-                                    "0x%llx -> 0x%llx)\n",
+                                    "(0x%x -> 0x%x)\n",
-                                    sdata->name,
+                                    sdata->name, sta->sta.addr,
-                                    sta->sta.addr,
+                                    prev_rates, sta->sta.supp_rates[band]);
-                                    (unsigned long long) prev_rates,
-                                    (unsigned long long) supp_rates,
-                                    (unsigned long long) sta->sta.supp_rates[band]);
 #endif
+                                rate_control_rate_init(sta);
+                        }
                        rcu_read_unlock();
                } else {
                        rcu_read_unlock();
@@ -371,6 +370,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
                       sdata->name, mgmt->bssid);
 #endif
                ieee80211_sta_join_ibss(sdata, bss);
+                supp_rates = ieee80211_sta_get_rates(local, elems, band);
                ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
                                       supp_rates, GFP_KERNEL);
        }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 241533e1bc03..7fdacf9408b1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -327,7 +327,7 @@ struct ieee80211_if_managed {
        struct work_struct work;
        struct work_struct monitor_work;
        struct work_struct chswitch_work;
-        struct work_struct beacon_loss_work;
+        struct work_struct beacon_connection_loss_work;
        unsigned long probe_timeout;
        int probe_send_count;
@@ -646,8 +646,7 @@ struct ieee80211_local {
        struct work_struct recalc_smps;
        /* aggregated multicast list */
-        struct dev_addr_list *mc_list;
+        struct netdev_hw_addr_list mc_list;
-        int mc_count;
        bool tim_in_locked_section; /* see ieee80211_beacon_get() */
@@ -745,6 +744,7 @@ struct ieee80211_local {
        int scan_channel_idx;
        int scan_ies_len;
+        unsigned long leave_oper_channel_time;
        enum mac80211_scan_state next_scan_state;
        struct delayed_work scan_work;
        struct ieee80211_sub_if_data *scan_sdata;
@@ -1155,7 +1155,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
                             int powersave);
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
                             struct ieee80211_hdr *hdr);
-void ieee80211_beacon_loss_work(struct work_struct *work);
+void ieee80211_beacon_connection_loss_work(struct work_struct *work);
 void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
                                     enum queue_stop_reason reason);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e08fa8eda1b3..50deb017fd6e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -413,8 +413,7 @@ static int ieee80211_stop(struct net_device *dev)
        netif_addr_lock_bh(dev);
        spin_lock_bh(&local->filter_lock);
-        __dev_addr_unsync(&local->mc_list, &local->mc_count,
+        __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len);
-                          &dev->mc_list, &dev->mc_count);
        spin_unlock_bh(&local->filter_lock);
        netif_addr_unlock_bh(dev);
@@ -487,7 +486,7 @@ static int ieee80211_stop(struct net_device *dev)
                cancel_work_sync(&sdata->u.mgd.work);
                cancel_work_sync(&sdata->u.mgd.chswitch_work);
                cancel_work_sync(&sdata->u.mgd.monitor_work);
-                cancel_work_sync(&sdata->u.mgd.beacon_loss_work);
+                cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
                /*
                 * When we get here, the interface is marked down.
@@ -597,8 +596,7 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
                sdata->flags ^= IEEE80211_SDATA_PROMISC;
        }
        spin_lock_bh(&local->filter_lock);
-        __dev_addr_sync(&local->mc_list, &local->mc_count,
+        __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
-                        &dev->mc_list, &dev->mc_count);
        spin_unlock_bh(&local->filter_lock);
        ieee80211_queue_work(&local->hw, &local->reconfig_filter);
 }
@@ -816,6 +814,118 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
        return 0;
 }
+static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
+                                       struct net_device *dev,
+                                       enum nl80211_iftype type)
+{
+        struct ieee80211_sub_if_data *sdata;
+        u64 mask, start, addr, val, inc;
+        u8 *m;
+        u8 tmp_addr[ETH_ALEN];
+        int i;
+        /* default ... something at least */
+        memcpy(dev->perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
+        if (is_zero_ether_addr(local->hw.wiphy->addr_mask) &&
+            local->hw.wiphy->n_addresses <= 1)
+                return;
+        mutex_lock(&local->iflist_mtx);
+        switch (type) {
+        case NL80211_IFTYPE_MONITOR:
+                /* doesn't matter */
+                break;
+        case NL80211_IFTYPE_WDS:
+        case NL80211_IFTYPE_AP_VLAN:
+                /* match up with an AP interface */
+                list_for_each_entry(sdata, &local->interfaces, list) {
+                        if (sdata->vif.type != NL80211_IFTYPE_AP)
+                                continue;
+                        memcpy(dev->perm_addr, sdata->vif.addr, ETH_ALEN);
+                        break;
+                }
+                /* keep default if no AP interface present */
+                break;
+        default:
+                /* assign a new address if possible -- try n_addresses first */
+                for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
+                        bool used = false;
+                        list_for_each_entry(sdata, &local->interfaces, list) {
+                                if (memcmp(local->hw.wiphy->addresses[i].addr,
+                                           sdata->vif.addr, ETH_ALEN) == 0) {
+                                        used = true;
+                                        break;
+                                }
+                        }
+                        if (!used) {
+                                memcpy(dev->perm_addr,
+                                       local->hw.wiphy->addresses[i].addr,
+                                       ETH_ALEN);
+                                break;
+                        }
+                }
+                /* try mask if available */
+                if (is_zero_ether_addr(local->hw.wiphy->addr_mask))
+                        break;
+                m = local->hw.wiphy->addr_mask;
+                mask =  ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
+                        ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
+                        ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
+                if (__ffs64(mask) + hweight64(mask) != fls64(mask)) {
+                        /* not a contiguous mask ... not handled now! */
+                        printk(KERN_DEBUG "not contiguous\n");
+                        break;
+                }
+                m = local->hw.wiphy->perm_addr;
+                start = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
+                        ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
+                        ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
+                inc = 1ULL<<__ffs64(mask);
+                val = (start & mask);
+                addr = (start & ~mask) | (val & mask);
+                do {
+                        bool used = false;
+                        tmp_addr[5] = addr >> 0*8;
+                        tmp_addr[4] = addr >> 1*8;
+                        tmp_addr[3] = addr >> 2*8;
+                        tmp_addr[2] = addr >> 3*8;
+                        tmp_addr[1] = addr >> 4*8;
+                        tmp_addr[0] = addr >> 5*8;
+                        val += inc;
+                        list_for_each_entry(sdata, &local->interfaces, list) {
+                                if (memcmp(tmp_addr, sdata->vif.addr,
+                                                        ETH_ALEN) == 0) {
+                                        used = true;
+                                        break;
+                                }
+                        }
+                        if (!used) {
+                                memcpy(dev->perm_addr, tmp_addr, ETH_ALEN);
+                                break;
+                        }
+                        addr = (start & ~mask) | (val & mask);
+                } while (addr != start);
+                break;
+        }
+        mutex_unlock(&local->iflist_mtx);
+}
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
                     struct net_device **new_dev, enum nl80211_iftype type,
                     struct vif_params *params)
@@ -845,8 +955,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
        if (ret < 0)
                goto fail;
-        memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
+        ieee80211_assign_perm_addr(local, ndev, type);
-        memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN);
+        memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
        SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
        /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b887e484ae04..50c1b1ada884 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -71,7 +71,7 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
        spin_lock_bh(&local->filter_lock);
        changed_flags = local->filter_flags ^ new_flags;
-        mc = drv_prepare_multicast(local, local->mc_count, local->mc_list);
+        mc = drv_prepare_multicast(local, &local->mc_list);
        spin_unlock_bh(&local->filter_lock);
        /* be a bit nasty */
@@ -388,6 +388,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
        local->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
        INIT_LIST_HEAD(&local->interfaces);
+        __hw_addr_init(&local->mc_list);
        mutex_init(&local->iflist_mtx);
        mutex_init(&local->scan_mtx);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c8cd169fc10e..71ff42a0465b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -754,6 +754,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
        /* And the BSSID changed - we're associated now */
        bss_info_changed |= BSS_CHANGED_BSSID;
+        /* Tell the driver to monitor connection quality (if supported) */
+        if ((local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI) &&
+            sdata->vif.bss_conf.cqm_rssi_thold)
+                bss_info_changed |= BSS_CHANGED_CQM;
        ieee80211_bss_info_change_notify(sdata, bss_info_changed);
        mutex_lock(&local->iflist_mtx);
@@ -855,6 +860,9 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
        if (is_multicast_ether_addr(hdr->addr1))
                return;
+        if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+                return;
        mod_timer(&sdata->u.mgd.conn_mon_timer,
                  round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
 }
@@ -932,23 +940,68 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
        mutex_unlock(&ifmgd->mtx);
 }
-void ieee80211_beacon_loss_work(struct work_struct *work)
+static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
+{
+        struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+        struct ieee80211_local *local = sdata->local;
+        u8 bssid[ETH_ALEN];
+        mutex_lock(&ifmgd->mtx);
+        if (!ifmgd->associated) {
+                mutex_unlock(&ifmgd->mtx);
+                return;
+        }
+        memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
+        printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
+        ieee80211_set_disassoc(sdata);
+        ieee80211_recalc_idle(local);
+        mutex_unlock(&ifmgd->mtx);
+        /*
+         * must be outside lock due to cfg80211,
+         * but that's not a problem.
+         */
+        ieee80211_send_deauth_disassoc(sdata, bssid,
+                                       IEEE80211_STYPE_DEAUTH,
+                                       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
+                                       NULL);
+}
+void ieee80211_beacon_connection_loss_work(struct work_struct *work)
 {
        struct ieee80211_sub_if_data *sdata =
                container_of(work, struct ieee80211_sub_if_data,
-                             u.mgd.beacon_loss_work);
+                             u.mgd.beacon_connection_loss_work);
-        ieee80211_mgd_probe_ap(sdata, true);
+        if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+                __ieee80211_connection_loss(sdata);
+        else
+                ieee80211_mgd_probe_ap(sdata, true);
 }
 void ieee80211_beacon_loss(struct ieee80211_vif *vif)
 {
        struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+        struct ieee80211_hw *hw = &sdata->local->hw;
-        ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work);
+        WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);
+        ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
 }
 EXPORT_SYMBOL(ieee80211_beacon_loss);
+void ieee80211_connection_loss(struct ieee80211_vif *vif)
+{
+        struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+        struct ieee80211_hw *hw = &sdata->local->hw;
+        WARN_ON(!(hw->flags & IEEE80211_HW_CONNECTION_MONITOR));
+        ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
+}
+EXPORT_SYMBOL(ieee80211_connection_loss);
 static enum rx_mgmt_action __must_check
 ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
                         struct ieee80211_mgmt *mgmt, size_t len)
@@ -1638,7 +1691,8 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
        if (local->quiescing)
                return;
-        ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work);
+        ieee80211_queue_work(&sdata->local->hw,
+                             &sdata->u.mgd.beacon_connection_loss_work);
 }
 static void ieee80211_sta_conn_mon_timer(unsigned long data)
@@ -1690,7 +1744,7 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
         */
        cancel_work_sync(&ifmgd->work);
-        cancel_work_sync(&ifmgd->beacon_loss_work);
+        cancel_work_sync(&ifmgd->beacon_connection_loss_work);
        if (del_timer_sync(&ifmgd->timer))
                set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
@@ -1724,7 +1778,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
        INIT_WORK(&ifmgd->work, ieee80211_sta_work);
        INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
        INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
-        INIT_WORK(&ifmgd->beacon_loss_work, ieee80211_beacon_loss_work);
+        INIT_WORK(&ifmgd->beacon_connection_loss_work,
+                  ieee80211_beacon_connection_loss_work);
        setup_timer(&ifmgd->timer, ieee80211_sta_timer,
                    (unsigned long) sdata);
        setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
@@ -2136,3 +2191,13 @@ int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
        *cookie = (unsigned long) skb;
        return 0;
 }
+void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
+                               enum nl80211_cqm_rssi_threshold_event rssi_event,
+                               gfp_t gfp)
+{
+        struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+        cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp);
+}
+EXPORT_SYMBOL(ieee80211_cqm_rssi_notify);
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 818abfae9007..f65ce6dcc8e2 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -542,7 +542,7 @@ minstrel_free(void *priv)
        kfree(priv);
 }
-static struct rate_control_ops mac80211_minstrel = {
+struct rate_control_ops mac80211_minstrel = {
        .name = "minstrel",
        .tx_status = minstrel_tx_status,
        .get_rate = minstrel_get_rate,
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 38bf4168fc3a..0f5a83370aa6 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -80,7 +80,18 @@ struct minstrel_priv {
        unsigned int lookaround_rate_mrr;
 };
+struct minstrel_debugfs_info {
+        size_t len;
+        char buf[];
+};
+extern struct rate_control_ops mac80211_minstrel;
 void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
 void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
+/* debugfs */
+int minstrel_stats_open(struct inode *inode, struct file *file);
+ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos);
+int minstrel_stats_release(struct inode *inode, struct file *file);
 #endif
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 0e1f12b1b6dd..241e76f3fdf2 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -53,21 +53,15 @@
 #include <net/mac80211.h>
 #include "rc80211_minstrel.h"
-struct minstrel_stats_info {
+int
-        struct minstrel_sta_info *mi;
-        char buf[4096];
-        size_t len;
-};
-static int
 minstrel_stats_open(struct inode *inode, struct file *file)
 {
        struct minstrel_sta_info *mi = inode->i_private;
-        struct minstrel_stats_info *ms;
+        struct minstrel_debugfs_info *ms;
        unsigned int i, tp, prob, eprob;
        char *p;
-        ms = kmalloc(sizeof(*ms), GFP_KERNEL);
+        ms = kmalloc(sizeof(*ms) + 4096, GFP_KERNEL);
        if (!ms)
                return -ENOMEM;
@@ -107,36 +101,19 @@ minstrel_stats_open(struct inode *inode, struct file *file)
        return 0;
 }
-static ssize_t
+ssize_t
-minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *o)
+minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
 {
-        struct minstrel_stats_info *ms;
+        struct minstrel_debugfs_info *ms;
-        char *src;
        ms = file->private_data;
-        src = ms->buf;
+        return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
-        len = min(len, ms->len);
-        if (len <= *o)
-                return 0;
-        src += *o;
-        len -= *o;
-        *o += len;
-        if (copy_to_user(buf, src, len))
-                return -EFAULT;
-        return len;
 }
-static int
+int
 minstrel_stats_release(struct inode *inode, struct file *file)
 {
-        struct minstrel_stats_info *ms = file->private_data;
+        kfree(file->private_data);
-        kfree(ms);
        return 0;
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 04ea07f0e78a..e0c944fb6fc9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -179,14 +179,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                pos++;
        }
-        /* IEEE80211_RADIOTAP_DBM_ANTNOISE */
-        if (local->hw.flags & IEEE80211_HW_NOISE_DBM) {
-                *pos = status->noise;
-                rthdr->it_present |=
-                        cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTNOISE);
-                pos++;
-        }
        /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */
        /* IEEE80211_RADIOTAP_ANTENNA */
@@ -1078,7 +1070,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
        sta->rx_fragments++;
        sta->rx_bytes += rx->skb->len;
        sta->last_signal = status->signal;
-        sta->last_noise = status->noise;
        /*
         * Change STA power saving mode only at the end of a frame
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 85507bd9e341..1ce4ce8af80f 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -14,6 +14,8 @@
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
+#include <linux/pm_qos_params.h>
+#include <net/sch_generic.h>
 #include <linux/slab.h>
 #include <net/mac80211.h>
@@ -322,6 +324,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
        ieee80211_offchannel_stop_beaconing(local);
+        local->leave_oper_channel_time = 0;
        local->next_scan_state = SCAN_DECISION;
        local->scan_channel_idx = 0;
@@ -426,11 +429,28 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
        return rc;
 }
+static unsigned long
+ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
+{
+        /*
+         * TODO: channel switching also consumes quite some time,
+         * add that delay as well to get a better estimation
+         */
+        if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN)
+                return IEEE80211_PASSIVE_CHANNEL_TIME;
+        return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
+}
 static int ieee80211_scan_state_decision(struct ieee80211_local *local,
                                         unsigned long *next_delay)
 {
        bool associated = false;
+        bool tx_empty = true;
+        bool bad_latency;
+        bool listen_int_exceeded;
+        unsigned long min_beacon_int = 0;
        struct ieee80211_sub_if_data *sdata;
+        struct ieee80211_channel *next_chan;
        /* if no more bands/channels left, complete scan and advance to the idle state */
        if (local->scan_channel_idx >= local->scan_req->n_channels) {
@@ -438,7 +458,11 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
                return 1;
        }
-        /* check if at least one STA interface is associated */
+        /*
+         * check if at least one STA interface is associated,
+         * check if at least one STA interface has pending tx frames
+         * and grab the lowest used beacon interval
+         */
        mutex_lock(&local->iflist_mtx);
        list_for_each_entry(sdata, &local->interfaces, list) {
                if (!ieee80211_sdata_running(sdata))
@@ -447,7 +471,16 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
                if (sdata->vif.type == NL80211_IFTYPE_STATION) {
                        if (sdata->u.mgd.associated) {
                                associated = true;
-                                break;
+                                if (sdata->vif.bss_conf.beacon_int <
+                                    min_beacon_int || min_beacon_int == 0)
+                                        min_beacon_int =
+                                                sdata->vif.bss_conf.beacon_int;
+                                if (!qdisc_all_tx_empty(sdata->dev)) {
+                                        tx_empty = false;
+                                        break;
+                                }
                        }
                }
        }
@@ -456,11 +489,34 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
        if (local->scan_channel) {
                /*
                 * we're currently scanning a different channel, let's
-                 * switch back to the operating channel now if at least
+                 * see if we can scan another channel without interfering
-                 * one interface is associated. Otherwise just scan the
+                 * with the current traffic situation.
-                 * next channel
+                 *
+                 * Since we don't know if the AP has pending frames for us
+                 * we can only check for our tx queues and use the current
+                 * pm_qos requirements for rx. Hence, if no tx traffic occurs
+                 * at all we will scan as many channels in a row as the pm_qos
+                 * latency allows us to. Additionally we also check for the
+                 * currently negotiated listen interval to prevent losing
+                 * frames unnecessarily.
+                 *
+                 * Otherwise switch back to the operating channel.
                 */
-                if (associated)
+                next_chan = local->scan_req->channels[local->scan_channel_idx];
+                bad_latency = time_after(jiffies +
+                                ieee80211_scan_get_channel_time(next_chan),
+                                local->leave_oper_channel_time +
+                                usecs_to_jiffies(pm_qos_requirement(PM_QOS_NETWORK_LATENCY)));
+                listen_int_exceeded = time_after(jiffies +
+                                ieee80211_scan_get_channel_time(next_chan),
+                                local->leave_oper_channel_time +
+                                usecs_to_jiffies(min_beacon_int * 1024) *
+                                local->hw.conf.listen_interval);
+                if (associated && ( !tx_empty || bad_latency ||
+                    listen_int_exceeded))
                        local->next_scan_state = SCAN_ENTER_OPER_CHANNEL;
                else
                        local->next_scan_state = SCAN_SET_CHANNEL;
@@ -492,6 +548,9 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca
        else
                *next_delay = HZ / 10;
+        /* remember when we left the operating channel */
+        local->leave_oper_channel_time = jiffies;
        /* advance to the next channel to be scanned */
        local->next_scan_state = SCAN_SET_CHANNEL;
 }
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 822d84522937..2b635909de5c 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -200,7 +200,6 @@ struct sta_ampdu_mlme {
 * @rx_fragments: number of received MPDUs
 * @rx_dropped: number of dropped MPDUs from this STA
 * @last_signal: signal of last received frame from this STA
- * @last_noise: noise of last received frame from this STA
 * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
 * @tx_filtered_count: number of frames the hardware filtered for this STA
 * @tx_retry_failed: number of frames that failed retry
@@ -267,7 +266,6 @@ struct sta_info {
        unsigned long rx_fragments;
        unsigned long rx_dropped;
        int last_signal;
-        int last_noise;
        __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
        /* Updated from TX status path only, no locking requirements */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 56d5b9a6ec5b..11805a3a626f 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -171,7 +171,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
        struct net_device *prev_dev = NULL;
        struct sta_info *sta, *tmp;
        int retry_count = -1, i;
-        bool injected;
+        bool send_to_cooked;
        for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
                /* the HW cannot have attempted that rate */
@@ -296,11 +296,15 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
        /* this was a transmitted frame, but now we want to reuse it */
        skb_orphan(skb);
+        /* Need to make a copy before skb->cb gets cleared */
+        send_to_cooked = !!(info->flags & IEEE80211_TX_CTL_INJECTED) ||
+                        (type != IEEE80211_FTYPE_DATA);
        /*
         * This is a bit racy but we can avoid a lot of work
         * with this test...
         */
-        if (!local->monitors && !local->cooked_mntrs) {
+        if (!local->monitors && (!send_to_cooked || !local->cooked_mntrs)) {
                dev_kfree_skb(skb);
                return;
        }
@@ -345,9 +349,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
        /* for now report the total retry_count */
        rthdr->data_retries = retry_count;
-        /* Need to make a copy before skb->cb gets cleared */
-        injected = !!(info->flags & IEEE80211_TX_CTL_INJECTED);
        /* XXX: is this sufficient for BPF? */
        skb_set_mac_header(skb, 0);
        skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -362,8 +363,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
                                continue;
                        if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
-                            !injected &&
+                            !send_to_cooked)
-                            (type == IEEE80211_FTYPE_DATA))
                                continue;
                        if (prev_dev) {
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index cfc473e1b050..db25fa9ef135 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2011,14 +2011,12 @@ void ieee80211_tx_pending(unsigned long data)
                while (!skb_queue_empty(&local->pending[i])) {
                        struct sk_buff *skb = __skb_dequeue(&local->pending[i]);
                        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-                        struct ieee80211_sub_if_data *sdata;
                        if (WARN_ON(!info->control.vif)) {
                                kfree_skb(skb);
                                continue;
                        }
-                        sdata = vif_to_sdata(info->control.vif);
                        spin_unlock_irqrestore(&local->queue_stop_reason_lock,
                                                flags);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 795424396aff..6464a1972a69 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -545,7 +545,7 @@ static int netlink_autobind(struct socket *sock)
        struct hlist_head *head;
        struct sock *osk;
        struct hlist_node *node;
-        s32 pid = current->tgid;
+        s32 pid = task_tgid_vnr(current);
        int err;
        static s32 rover = -4097;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 06438fa2b1e5..aa4308afcc7f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -21,15 +21,17 @@
 static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
-static inline void genl_lock(void)
+void genl_lock(void)
 {
        mutex_lock(&genl_mutex);
 }
+EXPORT_SYMBOL(genl_lock);
-static inline void genl_unlock(void)
+void genl_unlock(void)
 {
        mutex_unlock(&genl_mutex);
 }
+EXPORT_SYMBOL(genl_unlock);
 #define GENL_FAM_TAB_SIZE       16
 #define GENL_FAM_TAB_MASK       (GENL_FAM_TAB_SIZE - 1)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index cc90363d7e7a..d7d0310dca9d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1692,9 +1692,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
                if (i->alen != dev->addr_len)
                        return -EINVAL;
                if (what > 0)
-                        return dev_mc_add(dev, i->addr, i->alen, 0);
+                        return dev_mc_add(dev, i->addr);
                else
-                        return dev_mc_delete(dev, i->addr, i->alen, 0);
+                        return dev_mc_del(dev, i->addr);
                break;
        case PACKET_MR_PROMISC:
                return dev_set_promiscuity(dev, what);
@@ -1706,9 +1706,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
                if (i->alen != dev->addr_len)
                        return -EINVAL;
                if (what > 0)
-                        return dev_unicast_add(dev, i->addr);
+                        return dev_uc_add(dev, i->addr);
                else
-                        return dev_unicast_delete(dev, i->addr);
+                        return dev_uc_del(dev, i->addr);
                break;
        default:
                break;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index f81862baf4d0..7919a9edb8e9 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -160,7 +160,8 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
        poll_wait(file, sk->sk_sleep, wait);
-        poll_wait(file, &rds_poll_waitq, wait);
+        if (rs->rs_seen_congestion)
+                poll_wait(file, &rds_poll_waitq, wait);
        read_lock_irqsave(&rs->rs_recv_lock, flags);
        if (!rs->rs_cong_monitor) {
@@ -182,6 +183,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
                mask |= (POLLOUT | POLLWRNORM);
        read_unlock_irqrestore(&rs->rs_recv_lock, flags);
+        /* clear state any time we wake a seen-congested socket */
+        if (mask)
+                rs->rs_seen_congestion = 0;
        return mask;
 }
@@ -447,7 +452,6 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
                              struct rds_info_lengths *lens)
 {
        struct rds_sock *rs;
-        struct sock *sk;
        struct rds_incoming *inc;
        unsigned long flags;
        unsigned int total = 0;
@@ -457,7 +461,6 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
        spin_lock_irqsave(&rds_sock_lock, flags);
        list_for_each_entry(rs, &rds_sock_list, rs_item) {
-                sk = rds_rs_to_sk(rs);
                read_lock(&rs->rs_recv_lock);
                /* XXX too lazy to maintain counts.. */
diff --git a/net/rds/cong.c b/net/rds/cong.c
index f1da27ceb064..0871a29f0780 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -219,8 +219,6 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
        spin_lock_irqsave(&rds_cong_lock, flags);
        list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
-                if (conn->c_loopback)
-                        continue;
                if (!test_and_set_bit(0, &conn->c_map_queued)) {
                        rds_stats_inc(s_cong_update_queued);
                        queue_delayed_work(rds_wq, &conn->c_send_w, 0);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 88d0856cb797..10ed0d55f759 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -204,9 +204,10 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
                rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
                break;
        default:
-                rds_ib_conn_error(conn, "RDS/IB: Fatal QP Event %u "
+                rdsdebug("Fatal QP Event %u "
                        "- connection %pI4->%pI4, reconnecting\n",
                        event->event, &conn->c_laddr, &conn->c_faddr);
+                rds_conn_drop(conn);
                break;
        }
 }
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 059989fdb7d7..a54cd63f9e35 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -235,8 +235,8 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
 {
        flush_workqueue(rds_wq);
        rds_ib_flush_mr_pool(pool, 1);
-        BUG_ON(atomic_read(&pool->item_count));
+        WARN_ON(atomic_read(&pool->item_count));
-        BUG_ON(atomic_read(&pool->free_pinned));
+        WARN_ON(atomic_read(&pool->free_pinned));
        kfree(pool);
 }
@@ -441,6 +441,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
                        /* FIXME we need a way to tell a r/w MR
                         * from a r/o MR */
+                        BUG_ON(in_interrupt());
                        set_page_dirty(page);
                        put_page(page);
                }
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index c7dd11b835f0..c74e9904a6b2 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -469,8 +469,8 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
                set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
                rds_ib_stats_inc(s_ib_ack_send_failure);
-                /* Need to finesse this later. */
-                BUG();
+                rds_ib_conn_error(ic->conn, "sending ack failed\n");
        } else
                rds_ib_stats_inc(s_ib_ack_sent);
 }
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index a10fab6886d1..17fa80803ab0 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -243,8 +243,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
                                struct rds_message *rm;
                                rm = rds_send_get_message(conn, send->s_op);
-                                if (rm)
+                                if (rm) {
+                                        if (rm->m_rdma_op)
+                                                rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
                                        rds_ib_send_rdma_complete(rm, wc.status);
+                                        rds_message_put(rm);
+                                }
                        }
                        oldest = (oldest + 1) % ic->i_send_ring.w_nr;
@@ -482,6 +486,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
        BUG_ON(off % RDS_FRAG_SIZE);
        BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
+        /* Do not send cong updates to IB loopback */
+        if (conn->c_loopback
+            && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
+                rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
+                return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+        }
        /* FIXME we may overallocate here */
        if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
                i = 1;
@@ -574,8 +585,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
                rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
                adv_credits += posted;
                BUG_ON(adv_credits > 255);
-        } else if (ic->i_rm != rm)
+        }
-                BUG();
        send = &ic->i_sends[pos];
        first = send;
@@ -714,8 +724,8 @@ add_header:
                        ic->i_rm = prev->s_rm;
                        prev->s_rm = NULL;
                }
-                /* Finesse this later */
-                BUG();
+                rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
                goto out;
        }
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 3e9460f935d8..a9d951b4fbae 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -157,9 +157,11 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
        case IB_EVENT_QP_REQ_ERR:
        case IB_EVENT_QP_FATAL:
        default:
-                rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n",
+                rdsdebug("Fatal QP Event %u "
+                        "- connection %pI4->%pI4, reconnecting\n",
                        event->event, &conn->c_laddr,
                        &conn->c_faddr);
+                rds_conn_drop(conn);
                break;
        }
 }
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index da43ee840ca3..3d479067d54d 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -469,8 +469,8 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
                set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
                rds_iw_stats_inc(s_iw_ack_send_failure);
-                /* Need to finesse this later. */
-                BUG();
+                rds_iw_conn_error(ic->conn, "sending ack failed\n");
        } else
                rds_iw_stats_inc(s_iw_ack_sent);
 }
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 1379e9d66a78..52182ff7519e 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -616,8 +616,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
                rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
                adv_credits += posted;
                BUG_ON(adv_credits > 255);
-        } else if (ic->i_rm != rm)
+        }
-                BUG();
        send = &ic->i_sends[pos];
        first = send;
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 0d7a159158b8..dd9879379457 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -81,16 +81,9 @@ static int rds_loop_xmit_cong_map(struct rds_connection *conn,
                                  struct rds_cong_map *map,
                                  unsigned long offset)
 {
-        unsigned long i;
        BUG_ON(offset);
        BUG_ON(map != conn->c_lcong);
-        for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
-                memcpy((void *)conn->c_fcong->m_page_addrs[i],
-                       (void *)map->m_page_addrs[i], PAGE_SIZE);
-        }
        rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
        return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 5ce9437cad67..75fd13bb631b 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -439,8 +439,10 @@ void rds_rdma_free_op(struct rds_rdma_op *ro)
                /* Mark page dirty if it was possibly modified, which
                 * is the case for a RDMA_READ which copies from remote
                 * to local memory */
-                if (!ro->r_write)
+                if (!ro->r_write) {
+                        BUG_ON(in_interrupt());
                        set_page_dirty(page);
+                }
                put_page(page);
        }
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 9ece910ea394..5ea82fc47c3e 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -101,7 +101,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                break;
        case RDMA_CM_EVENT_DISCONNECTED:
-                printk(KERN_WARNING "RDS/RDMA: DISCONNECT event - dropping connection "
+                rdsdebug("DISCONNECT event - dropping connection "
                        "%pI4->%pI4\n", &conn->c_laddr,
                         &conn->c_faddr);
                rds_conn_drop(conn);
@@ -109,8 +109,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
        default:
                /* things like device disconnect? */
-                printk(KERN_ERR "unknown event %u\n", event->event);
+                printk(KERN_ERR "RDS: unknown event %u!\n", event->event);
-                BUG();
                break;
        }
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 85d6f897ecc7..4bec6e2ed495 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -388,6 +388,8 @@ struct rds_sock {
        /* flag indicating we were congested or not */
        int                     rs_congested;
+        /* seen congestion (ENOBUFS) when sending? */
+        int                     rs_seen_congestion;
        /* rs_lock protects all these adjacent members before the newline */
        spinlock_t              rs_lock;
diff --git a/net/rds/send.c b/net/rds/send.c
index f04b929ded92..53d6795ac9d0 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -508,12 +508,13 @@ EXPORT_SYMBOL_GPL(rds_send_get_message);
 */
 void rds_send_remove_from_sock(struct list_head *messages, int status)
 {
-        unsigned long flags = 0; /* silence gcc :P */
+        unsigned long flags;
        struct rds_sock *rs = NULL;
        struct rds_message *rm;
-        local_irq_save(flags);
        while (!list_empty(messages)) {
+                int was_on_sock = 0;
                rm = list_entry(messages->next, struct rds_message,
                                m_conn_item);
                list_del_init(&rm->m_conn_item);
@@ -528,20 +529,19 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
                 * while we're messing with it. It does not prevent the
                 * message from being removed from the socket, though.
                 */
-                spin_lock(&rm->m_rs_lock);
+                spin_lock_irqsave(&rm->m_rs_lock, flags);
                if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags))
                        goto unlock_and_drop;
                if (rs != rm->m_rs) {
                        if (rs) {
-                                spin_unlock(&rs->rs_lock);
                                rds_wake_sk_sleep(rs);
                                sock_put(rds_rs_to_sk(rs));
                        }
                        rs = rm->m_rs;
-                        spin_lock(&rs->rs_lock);
                        sock_hold(rds_rs_to_sk(rs));
                }
+                spin_lock(&rs->rs_lock);
                if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
                        struct rds_rdma_op *ro = rm->m_rdma_op;
@@ -558,21 +558,22 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
                                        notifier->n_status = status;
                                rm->m_rdma_op->r_notifier = NULL;
                        }
-                        rds_message_put(rm);
+                        was_on_sock = 1;
                        rm->m_rs = NULL;
                }
+                spin_unlock(&rs->rs_lock);
 unlock_and_drop:
-                spin_unlock(&rm->m_rs_lock);
+                spin_unlock_irqrestore(&rm->m_rs_lock, flags);
                rds_message_put(rm);
+                if (was_on_sock)
+                        rds_message_put(rm);
        }
        if (rs) {
-                spin_unlock(&rs->rs_lock);
                rds_wake_sk_sleep(rs);
                sock_put(rds_rs_to_sk(rs));
        }
-        local_irq_restore(flags);
 }
 /*
@@ -634,9 +635,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
                list_move(&rm->m_sock_item, &list);
                rds_send_sndbuf_remove(rs, rm);
                clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
-                /* If this is a RDMA operation, notify the app. */
-                __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
        }
        /* order flag updates with the rs lock */
@@ -645,9 +643,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
        spin_unlock_irqrestore(&rs->rs_lock, flags);
-        if (wake)
-                rds_wake_sk_sleep(rs);
        conn = NULL;
        /* now remove the messages from the conn list as needed */
@@ -655,6 +650,10 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
                /* We do this here rather than in the loop above, so that
                 * we don't have to nest m_rs_lock under rs->rs_lock */
                spin_lock_irqsave(&rm->m_rs_lock, flags2);
+                /* If this is a RDMA operation, notify the app. */
+                spin_lock(&rs->rs_lock);
+                __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
+                spin_unlock(&rs->rs_lock);
                rm->m_rs = NULL;
                spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
@@ -683,6 +682,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
        if (conn)
                spin_unlock_irqrestore(&conn->c_lock, flags);
+        if (wake)
+                rds_wake_sk_sleep(rs);
        while (!list_empty(&list)) {
                rm = list_entry(list.next, struct rds_message, m_sock_item);
                list_del_init(&rm->m_sock_item);
@@ -816,7 +818,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
        int ret = 0;
        int queued = 0, allocated_mr = 0;
        int nonblock = msg->msg_flags & MSG_DONTWAIT;
-        long timeo = sock_rcvtimeo(sk, nonblock);
+        long timeo = sock_sndtimeo(sk, nonblock);
        /* Mirror Linux UDP mirror of BSD error message compatibility */
        /* XXX: Perhaps MSG_MORE someday */
@@ -895,8 +897,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
                queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
        ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
-        if (ret)
+        if (ret) {
+                rs->rs_seen_congestion = 1;
                goto out;
+        }
        while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
                                  dport, &queued)) {
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e08ec912d8b0..1aba6878fa5d 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -98,6 +98,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
                                goto out;
                        }
+                        rds_stats_add(s_copy_to_user, to_copy);
                        size -= to_copy;
                        ret += to_copy;
                        skb_off += to_copy;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 34fdcc059e54..a28b895ff0d1 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -240,7 +240,9 @@ void rds_tcp_write_space(struct sock *sk)
        tc->t_last_seen_una = rds_tcp_snd_una(tc);
        rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
-        queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+                queue_delayed_work(rds_wq, &conn->c_send_w, 0);
 out:
        read_unlock(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 00fa10e59af8..786c20eaaf5e 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -259,7 +259,7 @@ void rds_threads_exit(void)
 int __init rds_threads_init(void)
 {
-        rds_wq = create_singlethread_workqueue("krdsd");
+        rds_wq = create_workqueue("krdsd");
        if (rds_wq == NULL)
                return -ENOMEM;
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index a9fa86f65983..51875a0c5d48 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -629,6 +629,49 @@ static ssize_t rfkill_persistent_show(struct device *dev,
        return sprintf(buf, "%d\n", rfkill->persistent);
 }
+static ssize_t rfkill_hard_show(struct device *dev,
+                                 struct device_attribute *attr,
+                                 char *buf)
+{
+        struct rfkill *rfkill = to_rfkill(dev);
+        return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 );
+}
+static ssize_t rfkill_soft_show(struct device *dev,
+                                 struct device_attribute *attr,
+                                 char *buf)
+{
+        struct rfkill *rfkill = to_rfkill(dev);
+        return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 );
+}
+static ssize_t rfkill_soft_store(struct device *dev,
+                                  struct device_attribute *attr,
+                                  const char *buf, size_t count)
+{
+        struct rfkill *rfkill = to_rfkill(dev);
+        unsigned long state;
+        int err;
+        if (!capable(CAP_NET_ADMIN))
+                return -EPERM;
+        err = strict_strtoul(buf, 0, &state);
+        if (err)
+                return err;
+        if (state > 1 )
+                return -EINVAL;
+        mutex_lock(&rfkill_global_mutex);
+        rfkill_set_block(rfkill, state);
+        mutex_unlock(&rfkill_global_mutex);
+        return err ?: count;
+}
 static u8 user_state_from_blocked(unsigned long state)
 {
        if (state & RFKILL_BLOCK_HW)
@@ -644,14 +687,8 @@ static ssize_t rfkill_state_show(struct device *dev,
                                 char *buf)
 {
        struct rfkill *rfkill = to_rfkill(dev);
-        unsigned long flags;
-        u32 state;
-        spin_lock_irqsave(&rfkill->lock, flags);
-        state = rfkill->state;
-        spin_unlock_irqrestore(&rfkill->lock, flags);
-        return sprintf(buf, "%d\n", user_state_from_blocked(state));
+        return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state));
 }
 static ssize_t rfkill_state_store(struct device *dev,
@@ -701,6 +738,8 @@ static struct device_attribute rfkill_dev_attrs[] = {
        __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
        __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
        __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
+        __ATTR(soft, S_IRUGO|S_IWUSR, rfkill_soft_show, rfkill_soft_store),
+        __ATTR(hard, S_IRUGO, rfkill_hard_show, NULL),
        __ATTR_NULL
 };
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d8e0171d9a4b..019045174fc3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -668,7 +668,8 @@ nlmsg_failure:
 }
 static int
-act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
+act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
+               struct tc_action *a, int event)
 {
        struct sk_buff *skb;
@@ -680,7 +681,7 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
                return -EINVAL;
        }
-        return rtnl_unicast(skb, &init_net, pid);
+        return rtnl_unicast(skb, net, pid);
 }
 static struct tc_action *
@@ -750,7 +751,8 @@ static struct tc_action *create_a(int i)
        return act;
 }
-static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
+static int tca_action_flush(struct net *net, struct nlattr *nla,
+                            struct nlmsghdr *n, u32 pid)
 {
        struct sk_buff *skb;
        unsigned char *b;
@@ -809,7 +811,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
        nlh->nlmsg_flags |= NLM_F_ROOT;
        module_put(a->ops->owner);
        kfree(a);
-        err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+        err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
        if (err > 0)
                return 0;
@@ -826,7 +828,8 @@ noflush_out:
 }
 static int
-tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
+tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
+              u32 pid, int event)
 {
        int i, ret;
        struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
@@ -838,7 +841,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
        if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
                if (tb[1] != NULL)
-                        return tca_action_flush(tb[1], n, pid);
+                        return tca_action_flush(net, tb[1], n, pid);
                else
                        return -EINVAL;
        }
@@ -859,7 +862,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
        }
        if (event == RTM_GETACTION)
-                ret = act_get_notify(pid, n, head, event);
+                ret = act_get_notify(net, pid, n, head, event);
        else { /* delete */
                struct sk_buff *skb;
@@ -878,7 +881,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
                /* now do the delete */
                tcf_action_destroy(head, 0);
-                ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+                ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
                                     n->nlmsg_flags&NLM_F_ECHO);
                if (ret > 0)
                        return 0;
@@ -889,8 +892,8 @@ err:
        return ret;
 }
-static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
+static int tcf_add_notify(struct net *net, struct tc_action *a,
-                          u16 flags)
+                          u32 pid, u32 seq, int event, u16 flags)
 {
        struct tcamsg *t;
        struct nlmsghdr *nlh;
@@ -923,7 +926,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
        NETLINK_CB(skb).dst_group = RTNLGRP_TC;
-        err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+        err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
        if (err > 0)
                err = 0;
        return err;
@@ -936,7 +939,8 @@ nlmsg_failure:
 static int
-tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
+tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
+               u32 pid, int ovr)
 {
        int ret = 0;
        struct tc_action *act;
@@ -954,7 +958,7 @@ tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
        /* dump then free all the actions after update; inserted policy
         * stays intact
         * */
-        ret = tcf_add_notify(act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
+        ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
        for (a = act; a; a = act) {
                act = a->next;
                kfree(a);
@@ -970,9 +974,6 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        u32 pid = skb ? NETLINK_CB(skb).pid : 0;
        int ret = 0, ovr = 0;
-        if (!net_eq(net, &init_net))
-                return -EINVAL;
        ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
        if (ret < 0)
                return ret;
@@ -995,15 +996,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                if (n->nlmsg_flags&NLM_F_REPLACE)
                        ovr = 1;
 replay:
-                ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr);
+                ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
                if (ret == -EAGAIN)
                        goto replay;
                break;
        case RTM_DELACTION:
-                ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION);
+                ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
+                                    pid, RTM_DELACTION);
                break;
        case RTM_GETACTION:
-                ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION);
+                ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
+                                    pid, RTM_GETACTION);
                break;
        default:
                BUG();
@@ -1043,7 +1046,6 @@ find_dump_kind(const struct nlmsghdr *n)
 static int
 tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
-        struct net *net = sock_net(skb->sk);
        struct nlmsghdr *nlh;
        unsigned char *b = skb_tail_pointer(skb);
        struct nlattr *nest;
@@ -1053,9 +1055,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
        struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
        struct nlattr *kind = find_dump_kind(cb->nlh);
-        if (!net_eq(net, &init_net))
-                return 0;
        if (kind == NULL) {
                printk("tc_dump_action: action bad kind\n");
                return 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f082b27ff46d..5fd0c28ef79a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -99,8 +99,9 @@ out:
 }
 EXPORT_SYMBOL(unregister_tcf_proto_ops);
-static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+static int tfilter_notify(struct net *net, struct sk_buff *oskb,
-                          struct tcf_proto *tp, unsigned long fh, int event);
+                          struct nlmsghdr *n, struct tcf_proto *tp,
+                          unsigned long fh, int event);
 /* Select new prio value from the range, managed by kernel. */
@@ -138,9 +139,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        int err;
        int tp_created = 0;
-        if (!net_eq(net, &init_net))
-                return -EINVAL;
 replay:
        t = NLMSG_DATA(n);
        protocol = TC_H_MIN(t->tcm_info);
@@ -159,7 +157,7 @@ replay:
        /* Find head of filter chain. */
        /* Find link */
-        dev = __dev_get_by_index(&init_net, t->tcm_ifindex);
+        dev = __dev_get_by_index(net, t->tcm_ifindex);
        if (dev == NULL)
                return -ENODEV;
@@ -283,7 +281,7 @@ replay:
                        *back = tp->next;
                        spin_unlock_bh(root_lock);
-                        tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+                        tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
                        tcf_destroy(tp);
                        err = 0;
                        goto errout;
@@ -306,10 +304,10 @@ replay:
                case RTM_DELTFILTER:
                        err = tp->ops->delete(tp, fh);
                        if (err == 0)
-                                tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+                                tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
                        goto errout;
                case RTM_GETTFILTER:
-                        err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+                        err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
                        goto errout;
                default:
                        err = -EINVAL;
@@ -325,7 +323,7 @@ replay:
                        *back = tp;
                        spin_unlock_bh(root_lock);
                }
-                tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+                tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
        } else {
                if (tp_created)
                        tcf_destroy(tp);
@@ -371,8 +369,9 @@ nla_put_failure:
        return -1;
 }
-static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+static int tfilter_notify(struct net *net, struct sk_buff *oskb,
-                          struct tcf_proto *tp, unsigned long fh, int event)
+                          struct nlmsghdr *n, struct tcf_proto *tp,
+                          unsigned long fh, int event)
 {
        struct sk_buff *skb;
        u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -386,7 +385,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
                return -EINVAL;
        }
-        return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+        return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
                              n->nlmsg_flags & NLM_F_ECHO);
 }
@@ -419,12 +418,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
        const struct Qdisc_class_ops *cops;
        struct tcf_dump_args arg;
-        if (!net_eq(net, &init_net))
-                return 0;
        if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
                return skb->len;
-        if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+        if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
                return skb->len;
        if (!tcm->tcm_parent)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 17c5dfc67320..593eac056e8d 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -773,10 +773,10 @@ static int __init init_u32(void)
        printk("    Performance counters on\n");
 #endif
 #ifdef CONFIG_NET_CLS_IND
-        printk("    input device check on \n");
+        printk("    input device check on\n");
 #endif
 #ifdef CONFIG_NET_CLS_ACT
-        printk("    Actions configured \n");
+        printk("    Actions configured\n");
 #endif
        return register_tcf_proto_ops(&cls_u32_ops);
 }
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 145268ca57cf..9839b26674f4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -35,10 +35,12 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+                        struct nlmsghdr *n, u32 clid,
                        struct Qdisc *old, struct Qdisc *new);
-static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
-                         struct Qdisc *q, unsigned long cl, int event);
+                         struct nlmsghdr *n, struct Qdisc *q,
+                         unsigned long cl, int event);
 /*
@@ -639,11 +641,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 }
 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
-static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
+static void notify_and_destroy(struct net *net, struct sk_buff *skb,
+                               struct nlmsghdr *n, u32 clid,
                               struct Qdisc *old, struct Qdisc *new)
 {
        if (new || old)
-                qdisc_notify(skb, n, clid, old, new);
+                qdisc_notify(net, skb, n, clid, old, new);
        if (old)
                qdisc_destroy(old);
@@ -663,6 +666,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                       struct Qdisc *new, struct Qdisc *old)
 {
        struct Qdisc *q = old;
+        struct net *net = dev_net(dev);
        int err = 0;
        if (parent == NULL) {
@@ -699,12 +703,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                }
                if (!ingress) {
-                        notify_and_destroy(skb, n, classid, dev->qdisc, new);
+                        notify_and_destroy(net, skb, n, classid,
+                                           dev->qdisc, new);
                        if (new && !new->ops->attach)
                                atomic_inc(&new->refcnt);
                        dev->qdisc = new ? : &noop_qdisc;
                } else {
-                        notify_and_destroy(skb, n, classid, old, new);
+                        notify_and_destroy(net, skb, n, classid, old, new);
                }
                if (dev->flags & IFF_UP)
@@ -722,7 +727,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                                err = -ENOENT;
                }
                if (!err)
-                        notify_and_destroy(skb, n, classid, old, new);
+                        notify_and_destroy(net, skb, n, classid, old, new);
        }
        return err;
 }
@@ -948,10 +953,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        struct Qdisc *p = NULL;
        int err;
-        if (!net_eq(net, &init_net))
+        if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
-                return -EINVAL;
-        if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
                return -ENODEV;
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -991,7 +993,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
                        return err;
        } else {
-                qdisc_notify(skb, n, clid, NULL, q);
+                qdisc_notify(net, skb, n, clid, NULL, q);
        }
        return 0;
 }
@@ -1010,16 +1012,13 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        struct Qdisc *q, *p;
        int err;
-        if (!net_eq(net, &init_net))
-                return -EINVAL;
 replay:
        /* Reinit, just in case something touches this. */
        tcm = NLMSG_DATA(n);
        clid = tcm->tcm_parent;
        q = p = NULL;
-        if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+        if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
                return -ENODEV;
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1106,7 +1105,7 @@ replay:
                return -EINVAL;
        err = qdisc_change(q, tca);
        if (err == 0)
-                qdisc_notify(skb, n, clid, NULL, q);
+                qdisc_notify(net, skb, n, clid, NULL, q);
        return err;
 create_n_graft:
@@ -1196,8 +1195,9 @@ nla_put_failure:
        return -1;
 }
-static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
-                        u32 clid, struct Qdisc *old, struct Qdisc *new)
+                        struct nlmsghdr *n, u32 clid,
+                        struct Qdisc *old, struct Qdisc *new)
 {
        struct sk_buff *skb;
        u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1216,7 +1216,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
        }
        if (skb->len)
-                return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+                return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 err_out:
        kfree_skb(skb);
@@ -1275,15 +1275,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
        int s_idx, s_q_idx;
        struct net_device *dev;
-        if (!net_eq(net, &init_net))
-                return 0;
        s_idx = cb->args[0];
        s_q_idx = q_idx = cb->args[1];
        rcu_read_lock();
        idx = 0;
-        for_each_netdev_rcu(&init_net, dev) {
+        for_each_netdev_rcu(net, dev) {
                struct netdev_queue *dev_queue;
                if (idx < s_idx)
@@ -1335,10 +1332,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        u32 qid = TC_H_MAJ(clid);
        int err;
-        if (!net_eq(net, &init_net))
+        if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
-                return -EINVAL;
-        if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
                return -ENODEV;
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1419,10 +1413,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                        if (cops->delete)
                                err = cops->delete(q, cl);
                        if (err == 0)
-                                tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
+                                tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
                        goto out;
                case RTM_GETTCLASS:
-                        err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
+                        err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
                        goto out;
                default:
                        err = -EINVAL;
@@ -1435,7 +1429,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        if (cops->change)
                err = cops->change(q, clid, pid, tca, &new_cl);
        if (err == 0)
-                tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
+                tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
 out:
        if (cl)
@@ -1487,8 +1481,9 @@ nla_put_failure:
        return -1;
 }
-static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
-                          struct Qdisc *q, unsigned long cl, int event)
+                         struct nlmsghdr *n, struct Qdisc *q,
+                         unsigned long cl, int event)
 {
        struct sk_buff *skb;
        u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1502,7 +1497,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
                return -EINVAL;
        }
-        return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+        return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 }
 struct qdisc_dump_args
@@ -1577,12 +1572,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
        struct net_device *dev;
        int t, s_t;
-        if (!net_eq(net, &init_net))
-                return 0;
        if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
                return 0;
-        if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+        if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
                return 0;
        s_t = cb->args[0];
@@ -1692,7 +1684,7 @@ static int psched_show(struct seq_file *seq, void *v)
 static int psched_open(struct inode *inode, struct file *file)
 {
-        return single_open(file, psched_show, PDE(inode)->data);
+        return single_open(file, psched_show, NULL);
 }
 static const struct file_operations psched_fops = {
@@ -1702,15 +1694,53 @@ static const struct file_operations psched_fops = {
        .llseek = seq_lseek,
        .release = single_release,
 };
+static int __net_init psched_net_init(struct net *net)
+{
+        struct proc_dir_entry *e;
+        e = proc_net_fops_create(net, "psched", 0, &psched_fops);
+        if (e == NULL)
+                return -ENOMEM;
+        return 0;
+}
+static void __net_exit psched_net_exit(struct net *net)
+{
+        proc_net_remove(net, "psched");
+}
+#else
+static int __net_init psched_net_init(struct net *net)
+{
+        return 0;
+}
+static void __net_exit psched_net_exit(struct net *net)
+{
+}
 #endif
+static struct pernet_operations psched_net_ops = {
+        .init = psched_net_init,
+        .exit = psched_net_exit,
+};
 static int __init pktsched_init(void)
 {
+        int err;
+        err = register_pernet_subsys(&psched_net_ops);
+        if (err) {
+                printk(KERN_ERR "pktsched_init: "
+                       "cannot initialize per netns operations\n");
+                return err;
+        }
        register_qdisc(&pfifo_qdisc_ops);
        register_qdisc(&bfifo_qdisc_ops);
        register_qdisc(&pfifo_head_drop_qdisc_ops);
        register_qdisc(&mq_qdisc_ops);
-        proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
        rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
        rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ff4dd53eeff0..aeddabfb8e4e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -529,7 +529,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
        unsigned int size;
        int err = -ENOBUFS;
-        /* ensure that the Qdisc and the private data are 32-byte aligned */
+        /* ensure that the Qdisc and the private data are 64-byte aligned */
        size = QDISC_ALIGN(sizeof(*sch));
        size += ops->priv_size + (QDISC_ALIGNTO - 1);
@@ -591,6 +591,13 @@ void qdisc_reset(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_reset);
+static void qdisc_rcu_free(struct rcu_head *head)
+{
+        struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
+        kfree((char *) qdisc - qdisc->padded);
+}
 void qdisc_destroy(struct Qdisc *qdisc)
 {
        const struct Qdisc_ops  *ops = qdisc->ops;
@@ -614,7 +621,11 @@ void qdisc_destroy(struct Qdisc *qdisc)
        dev_put(qdisc_dev(qdisc));
        kfree_skb(qdisc->gso_skb);
-        kfree((char *) qdisc - qdisc->padded);
+        /*
+         * gen_estimator est_timer() might access qdisc->q.lock,
+         * wait a RCU grace period before freeing qdisc.
+         */
+        call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
 }
 EXPORT_SYMBOL(qdisc_destroy);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 9fb5d37c37ad..14db5689fb89 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -277,20 +277,7 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
                                         union sctp_addr *s2)
 {
-        struct in6_addr *a1 = &s1->v6.sin6_addr;
+        return ipv6_addr_diff(&s1->v6.sin6_addr, &s2->v6.sin6_addr);
-        struct in6_addr *a2 = &s2->v6.sin6_addr;
-        int i, j;
-        for (i = 0; i < 4 ; i++) {
-                __be32 a1xora2;
-                a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i];
-                if ((j = fls(ntohl(a1xora2))))
-                        return (i * 32 + 32 - j);
-        }
-        return (i*32);
 }
 /* Fills in the source address(saddr) based on the destination address(daddr)
@@ -372,13 +359,13 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
        }
        read_lock_bh(&in6_dev->lock);
-        for (ifp = in6_dev->addr_list; ifp; ifp = ifp->if_next) {
+        list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
                /* Add the address to the local list.  */
                addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC);
                if (addr) {
                        addr->a.v6.sin6_family = AF_INET6;
                        addr->a.v6.sin6_port = 0;
-                        addr->a.v6.sin6_addr = ifp->addr;
+                        ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr);
                        addr->a.v6.sin6_scope_id = dev->ifindex;
                        addr->valid = 1;
                        INIT_LIST_HEAD(&addr->list);
@@ -419,7 +406,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
 {
        addr->v6.sin6_family = AF_INET6;
        addr->v6.sin6_port = 0;
-        addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
+        ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr);
 }
 /* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -432,7 +419,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
                inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
                        addr->v4.sin_addr.s_addr;
        } else {
-                inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
+                ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr);
        }
 }
@@ -445,7 +432,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
                inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
                inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
        } else {
-                inet6_sk(sk)->daddr = addr->v6.sin6_addr;
+                ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr);
        }
 }
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 007e8baba089..c1941276f6e3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5482,7 +5482,6 @@ pp_found:
                 */
                int reuse = sk->sk_reuse;
                struct sock *sk2;
-                struct hlist_node *node;
                SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
                if (pp->fastreuse && sk->sk_reuse &&
diff --git a/net/socket.c b/net/socket.c
index 5e8d0af3c0e7..35bc198bbf68 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -620,10 +620,9 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
                        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
                                 sizeof(tv), &tv);
                } else {
-                        struct timespec ts;
+                        skb_get_timestampns(skb, &ts[0]);
-                        skb_get_timestampns(skb, &ts);
                        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
-                                 sizeof(ts), &ts);
+                                 sizeof(ts[0]), &ts[0]);
                }
        }
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 3308157436d2..a99825d7caa0 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -223,7 +223,7 @@ spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **ck
        /* only support SPKM_MIC_TOK */
        if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
-                dprintk("RPC:       ERROR unsupported SPKM3 token \n");
+                dprintk("RPC:       ERROR unsupported SPKM3 token\n");
                goto out;
        }
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index f0c05d3311c1..7dcfe0cc3500 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -60,7 +60,7 @@ int bc_send(struct rpc_rqst *req)
                rpc_put_task(task);
        }
        return ret;
-        dprintk("RPC:       bc_send ret= %d \n", ret);
+        dprintk("RPC:       bc_send ret= %d\n", ret);
 }
 #endif /* CONFIG_NFS_V4_1 */
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a3bfd4064912..90a051912c03 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -558,10 +558,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
                              struct tipc_bearer *unused1,
                              struct tipc_media_addr *unused2)
 {
-        static int send_count = 0;
        int bp_index;
-        int swap_time;
        /* Prepare buffer for broadcasting (if first time trying to send it) */
@@ -575,11 +572,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
                msg_set_mc_netid(msg, tipc_net_id);
        }
-        /* Determine if bearer pairs should be swapped following this attempt */
-        if ((swap_time = (++send_count >= 10)))
-                send_count = 0;
        /* Send buffer over bearers until all targets reached */
        bcbearer->remains = tipc_cltr_bcast_nodes;
@@ -595,21 +587,22 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
                if (bcbearer->remains_new.count == bcbearer->remains.count)
                        continue;       /* bearer pair doesn't add anything */
-                if (!p->publ.blocked &&
+                if (p->publ.blocked ||
-                    !p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
+                    p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
-                        if (swap_time && s && !s->publ.blocked)
+                        /* unable to send on primary bearer */
-                                goto swap;
+                        if (!s || s->publ.blocked ||
-                        else
+                            s->media->send_msg(buf, &s->publ,
-                                goto update;
+                                               &s->media->bcast_addr)) {
+                                /* unable to send on either bearer */
+                                continue;
+                        }
+                }
+                if (s) {
+                        bcbearer->bpairs[bp_index].primary = s;
+                        bcbearer->bpairs[bp_index].secondary = p;
                }
-                if (!s || s->publ.blocked ||
-                    s->media->send_msg(buf, &s->publ, &s->media->bcast_addr))
-                        continue;       /* unable to send using bearer pair */
-swap:
-                bcbearer->bpairs[bp_index].primary = s;
-                bcbearer->bpairs[bp_index].secondary = p;
-update:
                if (bcbearer->remains_new.count == 0)
                        return 0;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 52c571fedbe0..4e84c8431f32 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -49,7 +49,7 @@
 #include "config.h"
-#define TIPC_MOD_VER "1.6.4"
+#define TIPC_MOD_VER "2.0.0"
 #ifndef CONFIG_TIPC_ZONES
 #define CONFIG_TIPC_ZONES 3
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1a7e4665af80..c76e82e5f982 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -877,7 +877,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
                case TIMEOUT_EVT:
                        dbg_link("TIM ");
                        if (l_ptr->next_in_no != l_ptr->checkpoint) {
-                                dbg_link("-> WW \n");
+                                dbg_link("-> WW\n");
                                l_ptr->state = WORKING_WORKING;
                                l_ptr->fsm_msg_cnt = 0;
                                l_ptr->checkpoint = l_ptr->next_in_no;
@@ -934,7 +934,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
                        link_set_timer(l_ptr, cont_intv);
                        break;
                case RESET_MSG:
-                        dbg_link("RES \n");
+                        dbg_link("RES\n");
                        dbg_link(" -> RR\n");
                        l_ptr->state = RESET_RESET;
                        l_ptr->fsm_msg_cnt = 0;
@@ -947,7 +947,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
                        l_ptr->started = 1;
                        /* fall through */
                case TIMEOUT_EVT:
-                        dbg_link("TIM \n");
+                        dbg_link("TIM\n");
                        tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
                        l_ptr->fsm_msg_cnt++;
                        link_set_timer(l_ptr, cont_intv);
@@ -1553,7 +1553,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
        /* Continue retransmission now, if there is anything: */
-        if (r_q_size && buf && !skb_cloned(buf)) {
+        if (r_q_size && buf) {
                msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
                msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
                if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -1722,15 +1722,16 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
        dbg("Retransmitting %u in link %x\n", retransmits, l_ptr);
        if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
-                if (!skb_cloned(buf)) {
+                if (l_ptr->retransm_queue_size == 0) {
                        msg_dbg(msg, ">NO_RETR->BCONG>");
                        dbg_print_link(l_ptr, "   ");
                        l_ptr->retransm_queue_head = msg_seqno(msg);
                        l_ptr->retransm_queue_size = retransmits;
-                        return;
                } else {
-                        /* Don't retransmit if driver already has the buffer */
+                        err("Unexpected retransmit on link %s (qsize=%d)\n",
+                            l_ptr->name, l_ptr->retransm_queue_size);
                }
+                return;
        } else {
                /* Detect repeated retransmit failures on uncongested bearer */
@@ -1745,7 +1746,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
                }
        }
-        while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) {
+        while (retransmits && (buf != l_ptr->next_out) && buf) {
                msg = buf_msg(buf);
                msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
                msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
@@ -3294,7 +3295,7 @@ static void link_dump_rec_queue(struct link *l_ptr)
                        info("buffer %x invalid\n", crs);
                        return;
                }
-                msg_dbg(buf_msg(crs), "In rec queue: \n");
+                msg_dbg(buf_msg(crs), "In rec queue:\n");
                crs = crs->next;
        }
 }
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f25b1cdb64eb..d7cd1e064a80 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -116,7 +116,7 @@
 */
 DEFINE_RWLOCK(tipc_net_lock);
-struct _zone *tipc_zones[256] = { NULL, };
+static struct _zone *tipc_zones[256] = { NULL, };
 struct network tipc_net = { tipc_zones };
 struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
@@ -291,6 +291,6 @@ void tipc_net_stop(void)
        tipc_bclink_stop();
        net_stop();
        write_unlock_bh(&tipc_net_lock);
-        info("Left network mode \n");
+        info("Left network mode\n");
 }
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2c24e7d6d950..17cc394f424f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -278,7 +278,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
                        n_ptr->link_cnt++;
                        return n_ptr;
                }
-                err("Attempt to establish second link on <%s> to %s \n",
+                err("Attempt to establish second link on <%s> to %s\n",
                    l_ptr->b_ptr->publ.name,
                    addr_string_fill(addr_string, l_ptr->addr));
        }
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ff123e56114a..ab6eab4c45e2 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -274,7 +274,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 {
        struct subscription *sub;
        struct subscription *sub_temp;
-        __u32 type, lower, upper;
+        __u32 type, lower, upper, timeout, filter;
        int found = 0;
        /* Find first matching subscription, exit if not found */
@@ -282,12 +282,18 @@ static void subscr_cancel(struct tipc_subscr *s,
        type = ntohl(s->seq.type);
        lower = ntohl(s->seq.lower);
        upper = ntohl(s->seq.upper);
+        timeout = ntohl(s->timeout);
+        filter = ntohl(s->filter) & ~TIPC_SUB_CANCEL;
        list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
                                 subscription_list) {
                        if ((type == sub->seq.type) &&
                            (lower == sub->seq.lower) &&
-                            (upper == sub->seq.upper)) {
+                            (upper == sub->seq.upper) &&
+                            (timeout == sub->timeout) &&
+                            (filter == sub->filter) &&
+                             !memcmp(s->usr_handle,sub->evt.s.usr_handle,
+                                     sizeof(s->usr_handle)) ){
                                found = 1;
                                break;
                        }
@@ -304,7 +310,7 @@ static void subscr_cancel(struct tipc_subscr *s,
                k_term_timer(&sub->timer);
                spin_lock_bh(subscriber->lock);
        }
-        dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n",
+        dbg("Cancel: removing sub %u,%u,%u from subscriber %p list\n",
            sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
        subscr_del(sub);
 }
@@ -352,8 +358,7 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
        sub->seq.upper = ntohl(s->seq.upper);
        sub->timeout = ntohl(s->timeout);
        sub->filter = ntohl(s->filter);
-        if ((!(sub->filter & TIPC_SUB_PORTS) ==
+        if ((sub->filter && (sub->filter != TIPC_SUB_PORTS)) ||
-             !(sub->filter & TIPC_SUB_SERVICE)) ||
            (sub->seq.lower > sub->seq.upper)) {
                warn("Subscription rejected, illegal request\n");
                kfree(sub);
diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c
index 4dc82a54ba30..68bedf3e5443 100644
--- a/net/wimax/op-reset.c
+++ b/net/wimax/op-reset.c
@@ -110,7 +110,6 @@ int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
 {
        int result, ifindex;
        struct wimax_dev *wimax_dev;
-        struct device *dev;
        d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
        result = -ENODEV;
@@ -123,7 +122,6 @@ int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
        wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
        if (wimax_dev == NULL)
                goto error_no_wimax_dev;
-        dev = wimax_dev_to_dev(wimax_dev);
        /* Execute the operation and send the result back to user space */
        result = wimax_reset(wimax_dev);
        dev_put(wimax_dev->net_dev);
diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c
index 11ad3356eb56..aff8776e2d41 100644
--- a/net/wimax/op-state-get.c
+++ b/net/wimax/op-state-get.c
@@ -53,7 +53,6 @@ int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
 {
        int result, ifindex;
        struct wimax_dev *wimax_dev;
-        struct device *dev;
        d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
        result = -ENODEV;
@@ -66,7 +65,6 @@ int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
        wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
        if (wimax_dev == NULL)
                goto error_no_wimax_dev;
-        dev = wimax_dev_to_dev(wimax_dev);
        /* Execute the operation and send the result back to user space */
        result = wimax_state_get(wimax_dev);
        dev_put(wimax_dev->net_dev);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 22139fa46115..4bb734a95f57 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -895,3 +895,16 @@ void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
        nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
 }
 EXPORT_SYMBOL(cfg80211_action_tx_status);
+void cfg80211_cqm_rssi_notify(struct net_device *dev,
+                              enum nl80211_cqm_rssi_threshold_event rssi_event,
+                              gfp_t gfp)
+{
+        struct wireless_dev *wdev = dev->ieee80211_ptr;
+        struct wiphy *wiphy = wdev->wiphy;
+        struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+        /* Indicate roaming trigger event to user space */
+        nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
+}
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 030cf153bea2..596bf189549a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -150,6 +150,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
                                 .len = IEEE80211_MAX_DATA_LEN },
        [NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
        [NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
+        [NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 };
 /* policy for the attributes */
@@ -4779,6 +4780,84 @@ unlock_rtnl:
        return err;
 }
+static struct nla_policy
+nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = {
+        [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 },
+        [NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 },
+        [NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 },
+};
+static int nl80211_set_cqm_rssi(struct genl_info *info,
+                                s32 threshold, u32 hysteresis)
+{
+        struct cfg80211_registered_device *rdev;
+        struct wireless_dev *wdev;
+        struct net_device *dev;
+        int err;
+        if (threshold > 0)
+                return -EINVAL;
+        rtnl_lock();
+        err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+        if (err)
+                goto unlock_rdev;
+        wdev = dev->ieee80211_ptr;
+        if (!rdev->ops->set_cqm_rssi_config) {
+                err = -EOPNOTSUPP;
+                goto unlock_rdev;
+        }
+        if (wdev->iftype != NL80211_IFTYPE_STATION) {
+                err = -EOPNOTSUPP;
+                goto unlock_rdev;
+        }
+        err = rdev->ops->set_cqm_rssi_config(wdev->wiphy, dev,
+                                             threshold, hysteresis);
+unlock_rdev:
+        cfg80211_unlock_rdev(rdev);
+        dev_put(dev);
+        rtnl_unlock();
+        return err;
+}
+static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
+{
+        struct nlattr *attrs[NL80211_ATTR_CQM_MAX + 1];
+        struct nlattr *cqm;
+        int err;
+        cqm = info->attrs[NL80211_ATTR_CQM];
+        if (!cqm) {
+                err = -EINVAL;
+                goto out;
+        }
+        err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
+                               nl80211_attr_cqm_policy);
+        if (err)
+                goto out;
+        if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
+            attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
+                s32 threshold;
+                u32 hysteresis;
+                threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
+                hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
+                err = nl80211_set_cqm_rssi(info, threshold, hysteresis);
+        } else
+                err = -EINVAL;
+out:
+        return err;
+}
 static struct genl_ops nl80211_ops[] = {
        {
                .cmd = NL80211_CMD_GET_WIPHY,
@@ -5083,6 +5162,12 @@ static struct genl_ops nl80211_ops[] = {
                .policy = nl80211_policy,
                /* can be retrieved by unprivileged users */
        },
+        {
+                .cmd = NL80211_CMD_SET_CQM,
+                .doit = nl80211_set_cqm,
+                .policy = nl80211_policy,
+                .flags = GENL_ADMIN_PERM,
+        },
 };
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -5833,6 +5918,52 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
        nlmsg_free(msg);
 }
+void
+nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
+                             struct net_device *netdev,
+                             enum nl80211_cqm_rssi_threshold_event rssi_event,
+                             gfp_t gfp)
+{
+        struct sk_buff *msg;
+        struct nlattr *pinfoattr;
+        void *hdr;
+        msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+        if (!msg)
+                return;
+        hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM);
+        if (!hdr) {
+                nlmsg_free(msg);
+                return;
+        }
+        NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+        NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+        pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
+        if (!pinfoattr)
+                goto nla_put_failure;
+        NLA_PUT_U32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+                    rssi_event);
+        nla_nest_end(msg, pinfoattr);
+        if (genlmsg_end(msg, hdr) < 0) {
+                nlmsg_free(msg);
+                return;
+        }
+        genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+                                nl80211_mlme_mcgrp.id, gfp);
+        return;
+ nla_put_failure:
+        genlmsg_cancel(msg, hdr);
+        nlmsg_free(msg);
+}
 static int nl80211_netlink_notify(struct notifier_block * nb,
                                  unsigned long state,
                                  void *_notify)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 4ca511102c6c..2ad7fbc7d9f1 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -82,4 +82,10 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
                                   const u8 *buf, size_t len, bool ack,
                                   gfp_t gfp);
+void
+nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
+                             struct net_device *netdev,
+                             enum nl80211_cqm_rssi_threshold_event rssi_event,
+                             gfp_t gfp);
 #endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 4f5a47091fde..0ef17bc42bac 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -29,226 +29,226 @@ typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *,
 * know about.
 */
 static const struct iw_ioctl_description standard_ioctl[] = {
-        [SIOCSIWCOMMIT  - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWCOMMIT)] = {
                .header_type    = IW_HEADER_TYPE_NULL,
        },
-        [SIOCGIWNAME    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWNAME)] = {
                .header_type    = IW_HEADER_TYPE_CHAR,
                .flags          = IW_DESCR_FLAG_DUMP,
        },
-        [SIOCSIWNWID    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWNWID)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
                .flags          = IW_DESCR_FLAG_EVENT,
        },
-        [SIOCGIWNWID    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWNWID)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
                .flags          = IW_DESCR_FLAG_DUMP,
        },
-        [SIOCSIWFREQ    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWFREQ)] = {
                .header_type    = IW_HEADER_TYPE_FREQ,
                .flags          = IW_DESCR_FLAG_EVENT,
        },
-        [SIOCGIWFREQ    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWFREQ)] = {
                .header_type    = IW_HEADER_TYPE_FREQ,
                .flags          = IW_DESCR_FLAG_DUMP,
        },
-        [SIOCSIWMODE    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWMODE)] = {
                .header_type    = IW_HEADER_TYPE_UINT,
                .flags          = IW_DESCR_FLAG_EVENT,
        },
-        [SIOCGIWMODE    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWMODE)] = {
                .header_type    = IW_HEADER_TYPE_UINT,
                .flags          = IW_DESCR_FLAG_DUMP,
        },
-        [SIOCSIWSENS    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWSENS)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCGIWSENS    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWSENS)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCSIWRANGE   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWRANGE)] = {
                .header_type    = IW_HEADER_TYPE_NULL,
        },
-        [SIOCGIWRANGE   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWRANGE)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = sizeof(struct iw_range),
                .flags          = IW_DESCR_FLAG_DUMP,
        },
-        [SIOCSIWPRIV    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWPRIV)] = {
                .header_type    = IW_HEADER_TYPE_NULL,
        },
-        [SIOCGIWPRIV    - SIOCIWFIRST] = { /* (handled directly by us) */
+        [IW_IOCTL_IDX(SIOCGIWPRIV)] = { /* (handled directly by us) */
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = sizeof(struct iw_priv_args),
                .max_tokens     = 16,
                .flags          = IW_DESCR_FLAG_NOMAX,
        },
-        [SIOCSIWSTATS   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWSTATS)] = {
                .header_type    = IW_HEADER_TYPE_NULL,
        },
-        [SIOCGIWSTATS   - SIOCIWFIRST] = { /* (handled directly by us) */
+        [IW_IOCTL_IDX(SIOCGIWSTATS)] = { /* (handled directly by us) */
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = sizeof(struct iw_statistics),
                .flags          = IW_DESCR_FLAG_DUMP,
        },
-        [SIOCSIWSPY     - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWSPY)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = sizeof(struct sockaddr),
                .max_tokens     = IW_MAX_SPY,
        },
-        [SIOCGIWSPY     - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWSPY)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = sizeof(struct sockaddr) +
                                  sizeof(struct iw_quality),
                .max_tokens     = IW_MAX_SPY,
        },
-        [SIOCSIWTHRSPY  - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWTHRSPY)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = sizeof(struct iw_thrspy),
                .min_tokens     = 1,
                .max_tokens     = 1,
        },
-        [SIOCGIWTHRSPY  - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWTHRSPY)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = sizeof(struct iw_thrspy),
                .min_tokens     = 1,
                .max_tokens     = 1,
        },
-        [SIOCSIWAP      - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWAP)] = {
                .header_type    = IW_HEADER_TYPE_ADDR,
        },
-        [SIOCGIWAP      - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWAP)] = {
                .header_type    = IW_HEADER_TYPE_ADDR,
                .flags          = IW_DESCR_FLAG_DUMP,
        },
-        [SIOCSIWMLME    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWMLME)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .min_tokens     = sizeof(struct iw_mlme),
                .max_tokens     = sizeof(struct iw_mlme),
        },
-        [SIOCGIWAPLIST  - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWAPLIST)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = sizeof(struct sockaddr) +
                                  sizeof(struct iw_quality),
                .max_tokens     = IW_MAX_AP,
                .flags          = IW_DESCR_FLAG_NOMAX,
        },
-        [SIOCSIWSCAN    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWSCAN)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .min_tokens     = 0,
                .max_tokens     = sizeof(struct iw_scan_req),
        },
-        [SIOCGIWSCAN    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWSCAN)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_SCAN_MAX_DATA,
                .flags          = IW_DESCR_FLAG_NOMAX,
        },
-        [SIOCSIWESSID   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWESSID)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_ESSID_MAX_SIZE,
                .flags          = IW_DESCR_FLAG_EVENT,
        },
-        [SIOCGIWESSID   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWESSID)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_ESSID_MAX_SIZE,
                .flags          = IW_DESCR_FLAG_DUMP,
        },
-        [SIOCSIWNICKN   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWNICKN)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_ESSID_MAX_SIZE,
        },
-        [SIOCGIWNICKN   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWNICKN)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_ESSID_MAX_SIZE,
        },
-        [SIOCSIWRATE    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWRATE)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCGIWRATE    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWRATE)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCSIWRTS     - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWRTS)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCGIWRTS     - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWRTS)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCSIWFRAG    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWFRAG)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCGIWFRAG    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWFRAG)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCSIWTXPOW   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWTXPOW)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCGIWTXPOW   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWTXPOW)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCSIWRETRY   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWRETRY)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCGIWRETRY   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWRETRY)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCSIWENCODE  - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWENCODE)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_ENCODING_TOKEN_MAX,
                .flags          = IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT,
        },
-        [SIOCGIWENCODE  - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWENCODE)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_ENCODING_TOKEN_MAX,
                .flags          = IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT,
        },
-        [SIOCSIWPOWER   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWPOWER)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCGIWPOWER   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWPOWER)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCSIWGENIE   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWGENIE)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_GENERIC_IE_MAX,
        },
-        [SIOCGIWGENIE   - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWGENIE)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_GENERIC_IE_MAX,
        },
-        [SIOCSIWAUTH    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWAUTH)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCGIWAUTH    - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWAUTH)] = {
                .header_type    = IW_HEADER_TYPE_PARAM,
        },
-        [SIOCSIWENCODEEXT - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWENCODEEXT)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .min_tokens     = sizeof(struct iw_encode_ext),
                .max_tokens     = sizeof(struct iw_encode_ext) +
                                  IW_ENCODING_TOKEN_MAX,
        },
-        [SIOCGIWENCODEEXT - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCGIWENCODEEXT)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .min_tokens     = sizeof(struct iw_encode_ext),
                .max_tokens     = sizeof(struct iw_encode_ext) +
                                  IW_ENCODING_TOKEN_MAX,
        },
-        [SIOCSIWPMKSA - SIOCIWFIRST] = {
+        [IW_IOCTL_IDX(SIOCSIWPMKSA)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .min_tokens     = sizeof(struct iw_pmksa),
@@ -262,44 +262,44 @@ static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
 * we know about.
 */
 static const struct iw_ioctl_description standard_event[] = {
-        [IWEVTXDROP     - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVTXDROP)] = {
                .header_type    = IW_HEADER_TYPE_ADDR,
        },
-        [IWEVQUAL       - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVQUAL)] = {
                .header_type    = IW_HEADER_TYPE_QUAL,
        },
-        [IWEVCUSTOM     - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVCUSTOM)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_CUSTOM_MAX,
        },
-        [IWEVREGISTERED - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVREGISTERED)] = {
                .header_type    = IW_HEADER_TYPE_ADDR,
        },
-        [IWEVEXPIRED    - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVEXPIRED)] = {
                .header_type    = IW_HEADER_TYPE_ADDR,
        },
-        [IWEVGENIE      - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVGENIE)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_GENERIC_IE_MAX,
        },
-        [IWEVMICHAELMICFAILURE  - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVMICHAELMICFAILURE)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = sizeof(struct iw_michaelmicfailure),
        },
-        [IWEVASSOCREQIE - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVASSOCREQIE)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_GENERIC_IE_MAX,
        },
-        [IWEVASSOCRESPIE        - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVASSOCRESPIE)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = IW_GENERIC_IE_MAX,
        },
-        [IWEVPMKIDCAND  - IWEVFIRST] = {
+        [IW_EVENT_IDX(IWEVPMKIDCAND)] = {
                .header_type    = IW_HEADER_TYPE_POINT,
                .token_size     = 1,
                .max_tokens     = sizeof(struct iw_pmkid_cand),
@@ -450,11 +450,11 @@ void wireless_send_event(struct net_device *	dev,
        /* Get the description of the Event */
        if (cmd <= SIOCIWLAST) {
-                cmd_index = cmd - SIOCIWFIRST;
+                cmd_index = IW_IOCTL_IDX(cmd);
                if (cmd_index < standard_ioctl_num)
                        descr = &(standard_ioctl[cmd_index]);
        } else {
-                cmd_index = cmd - IWEVFIRST;
+                cmd_index = IW_EVENT_IDX(cmd);
                if (cmd_index < standard_event_num)
                        descr = &(standard_event[cmd_index]);
        }
@@ -663,7 +663,7 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
                return NULL;
        /* Try as a standard command */
-        index = cmd - SIOCIWFIRST;
+        index = IW_IOCTL_IDX(cmd);
        if (index < handlers->num_standard)
                return handlers->standard[index];
@@ -955,9 +955,9 @@ static int ioctl_standard_call(struct net_device *	dev,
        int                                     ret = -EINVAL;
        /* Get the description of the IOCTL */
-        if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+        if (IW_IOCTL_IDX(cmd) >= standard_ioctl_num)
                return -EOPNOTSUPP;
-        descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
+        descr = &(standard_ioctl[IW_IOCTL_IDX(cmd)]);
        /* Check if we have a pointer to user space data or not */
        if (descr->header_type != IW_HEADER_TYPE_POINT) {
@@ -1013,7 +1013,7 @@ static int compat_standard_call(struct net_device	*dev,
        struct iw_point iwp;
        int err;
-        descr = standard_ioctl + (cmd - SIOCIWFIRST);
+        descr = standard_ioctl + IW_IOCTL_IDX(cmd);
        if (descr->header_type != IW_HEADER_TYPE_POINT)
                return ioctl_standard_call(dev, iwr, cmd, info, handler);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 843e066649cb..7430ac26ec49 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -37,6 +37,8 @@
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
+static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
+static struct dst_entry *xfrm_policy_sk_bundles;
 static DEFINE_RWLOCK(xfrm_policy_lock);
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
@@ -44,12 +46,10 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 static struct kmem_cache *xfrm_dst_cache __read_mostly;
-static HLIST_HEAD(xfrm_policy_gc_list);
-static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
 static void xfrm_init_pmtu(struct dst_entry *dst);
+static int stale_bundle(struct dst_entry *dst);
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
                                                int dir);
@@ -156,7 +156,7 @@ static void xfrm_policy_timer(unsigned long data)
        read_lock(&xp->lock);
-        if (xp->walk.dead)
+        if (unlikely(xp->walk.dead))
                goto out;
        dir = xfrm_policy_id2dir(xp->index);
@@ -216,6 +216,35 @@ expired:
        xfrm_pol_put(xp);
 }
+static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
+{
+        struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+        if (unlikely(pol->walk.dead))
+                flo = NULL;
+        else
+                xfrm_pol_hold(pol);
+        return flo;
+}
+static int xfrm_policy_flo_check(struct flow_cache_object *flo)
+{
+        struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+        return !pol->walk.dead;
+}
+static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
+{
+        xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
+}
+static const struct flow_cache_ops xfrm_policy_fc_ops = {
+        .get = xfrm_policy_flo_get,
+        .check = xfrm_policy_flo_check,
+        .delete = xfrm_policy_flo_delete,
+};
 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
 * SPD calls.
@@ -236,6 +265,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
                atomic_set(&policy->refcnt, 1);
                setup_timer(&policy->timer, xfrm_policy_timer,
                                (unsigned long)policy);
+                policy->flo.ops = &xfrm_policy_fc_ops;
        }
        return policy;
 }
@@ -247,8 +277,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
 {
        BUG_ON(!policy->walk.dead);
-        BUG_ON(policy->bundles);
        if (del_timer(&policy->timer))
                BUG();
@@ -257,63 +285,20 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
 }
 EXPORT_SYMBOL(xfrm_policy_destroy);
-static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
-{
-        struct dst_entry *dst;
-        while ((dst = policy->bundles) != NULL) {
-                policy->bundles = dst->next;
-                dst_free(dst);
-        }
-        if (del_timer(&policy->timer))
-                atomic_dec(&policy->refcnt);
-        if (atomic_read(&policy->refcnt) > 1)
-                flow_cache_flush();
-        xfrm_pol_put(policy);
-}
-static void xfrm_policy_gc_task(struct work_struct *work)
-{
-        struct xfrm_policy *policy;
-        struct hlist_node *entry, *tmp;
-        struct hlist_head gc_list;
-        spin_lock_bh(&xfrm_policy_gc_lock);
-        gc_list.first = xfrm_policy_gc_list.first;
-        INIT_HLIST_HEAD(&xfrm_policy_gc_list);
-        spin_unlock_bh(&xfrm_policy_gc_lock);
-        hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
-                xfrm_policy_gc_kill(policy);
-}
-static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
 /* Rule must be locked. Release descentant resources, announce
 * entry dead. The rule must be unlinked from lists to the moment.
 */
 static void xfrm_policy_kill(struct xfrm_policy *policy)
 {
-        int dead;
-        write_lock_bh(&policy->lock);
-        dead = policy->walk.dead;
        policy->walk.dead = 1;
-        write_unlock_bh(&policy->lock);
-        if (unlikely(dead)) {
+        atomic_inc(&policy->genid);
-                WARN_ON(1);
-                return;
-        }
-        spin_lock_bh(&xfrm_policy_gc_lock);
+        if (del_timer(&policy->timer))
-        hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
+                xfrm_pol_put(policy);
-        spin_unlock_bh(&xfrm_policy_gc_lock);
-        schedule_work(&xfrm_policy_gc_work);
+        xfrm_pol_put(policy);
 }
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
@@ -555,7 +540,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
        struct xfrm_policy *delpol;
        struct hlist_head *chain;
        struct hlist_node *entry, *newpos;
-        struct dst_entry *gc_list;
        u32 mark = policy->mark.v & policy->mark.m;
        write_lock_bh(&xfrm_policy_lock);
@@ -605,34 +589,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
        else if (xfrm_bydst_should_resize(net, dir, NULL))
                schedule_work(&net->xfrm.policy_hash_work);
-        read_lock_bh(&xfrm_policy_lock);
-        gc_list = NULL;
-        entry = &policy->bydst;
-        hlist_for_each_entry_continue(policy, entry, bydst) {
-                struct dst_entry *dst;
-                write_lock(&policy->lock);
-                dst = policy->bundles;
-                if (dst) {
-                        struct dst_entry *tail = dst;
-                        while (tail->next)
-                                tail = tail->next;
-                        tail->next = gc_list;
-                        gc_list = dst;
-                        policy->bundles = NULL;
-                }
-                write_unlock(&policy->lock);
-        }
-        read_unlock_bh(&xfrm_policy_lock);
-        while (gc_list) {
-                struct dst_entry *dst = gc_list;
-                gc_list = dst->next;
-                dst_free(dst);
-        }
        return 0;
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
@@ -671,10 +627,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
        }
        write_unlock_bh(&xfrm_policy_lock);
-        if (ret && delete) {
+        if (ret && delete)
-                atomic_inc(&flow_cache_genid);
                xfrm_policy_kill(ret);
-        }
        return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
@@ -713,10 +667,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
        }
        write_unlock_bh(&xfrm_policy_lock);
-        if (ret && delete) {
+        if (ret && delete)
-                atomic_inc(&flow_cache_genid);
                xfrm_policy_kill(ret);
-        }
        return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
@@ -776,7 +728,6 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 {
        int dir, err = 0, cnt = 0;
-        struct xfrm_policy *dp;
        write_lock_bh(&xfrm_policy_lock);
@@ -794,10 +745,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
                                     &net->xfrm.policy_inexact[dir], bydst) {
                        if (pol->type != type)
                                continue;
-                        dp = __xfrm_policy_unlink(pol, dir);
+                        __xfrm_policy_unlink(pol, dir);
                        write_unlock_bh(&xfrm_policy_lock);
-                        if (dp)
+                        cnt++;
-                                cnt++;
                        xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
                                                 audit_info->sessionid,
@@ -816,10 +766,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
                                             bydst) {
                                if (pol->type != type)
                                        continue;
-                                dp = __xfrm_policy_unlink(pol, dir);
+                                __xfrm_policy_unlink(pol, dir);
                                write_unlock_bh(&xfrm_policy_lock);
-                                if (dp)
+                                cnt++;
-                                        cnt++;
                                xfrm_audit_policy_delete(pol, 1,
                                                         audit_info->loginuid,
@@ -835,7 +784,6 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
        }
        if (!cnt)
                err = -ESRCH;
-        atomic_inc(&flow_cache_genid);
 out:
        write_unlock_bh(&xfrm_policy_lock);
        return err;
@@ -989,32 +937,37 @@ fail:
        return ret;
 }
-static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+static struct xfrm_policy *
-                              u8 dir, void **objp, atomic_t **obj_refp)
+__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
 {
+#ifdef CONFIG_XFRM_SUB_POLICY
        struct xfrm_policy *pol;
-        int err = 0;
-#ifdef CONFIG_XFRM_SUB_POLICY
        pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
-        if (IS_ERR(pol)) {
+        if (pol != NULL)
-                err = PTR_ERR(pol);
+                return pol;
-                pol = NULL;
-        }
-        if (pol || err)
-                goto end;
-#endif
-        pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
-        if (IS_ERR(pol)) {
-                err = PTR_ERR(pol);
-                pol = NULL;
-        }
-#ifdef CONFIG_XFRM_SUB_POLICY
-end:
 #endif
-        if ((*objp = (void *) pol) != NULL)
+        return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
-                *obj_refp = &pol->refcnt;
+}
-        return err;
+static struct flow_cache_object *
+xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+                   u8 dir, struct flow_cache_object *old_obj, void *ctx)
+{
+        struct xfrm_policy *pol;
+        if (old_obj)
+                xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
+        pol = __xfrm_policy_lookup(net, fl, family, dir);
+        if (IS_ERR_OR_NULL(pol))
+                return ERR_CAST(pol);
+        /* Resolver returns two references:
+         * one for cache and one for caller of flow_cache_lookup() */
+        xfrm_pol_hold(pol);
+        return &pol->flo;
 }
 static inline int policy_to_flow_dir(int dir)
@@ -1104,8 +1057,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
        pol = __xfrm_policy_unlink(pol, dir);
        write_unlock_bh(&xfrm_policy_lock);
        if (pol) {
-                if (dir < XFRM_POLICY_MAX)
-                        atomic_inc(&flow_cache_genid);
                xfrm_policy_kill(pol);
                return 0;
        }
@@ -1132,6 +1083,9 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
                __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
        }
        if (old_pol)
+                /* Unlinking succeeds always. This is the only function
+                 * allowed to delete or replace socket policy.
+                 */
                __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
        write_unlock_bh(&xfrm_policy_lock);
@@ -1300,18 +1254,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
 * still valid.
 */
-static struct dst_entry *
-xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
-{
-        struct dst_entry *x;
-        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-        if (unlikely(afinfo == NULL))
-                return ERR_PTR(-EINVAL);
-        x = afinfo->find_bundle(fl, policy);
-        xfrm_policy_put_afinfo(afinfo);
-        return x;
-}
 static inline int xfrm_get_tos(struct flowi *fl, int family)
 {
        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1327,6 +1269,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
        return tos;
 }
+static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
+{
+        struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
+        struct dst_entry *dst = &xdst->u.dst;
+        if (xdst->route == NULL) {
+                /* Dummy bundle - if it has xfrms we were not
+                 * able to build bundle as template resolution failed.
+                 * It means we need to try again resolving. */
+                if (xdst->num_xfrms > 0)
+                        return NULL;
+        } else {
+                /* Real bundle */
+                if (stale_bundle(dst))
+                        return NULL;
+        }
+        dst_hold(dst);
+        return flo;
+}
+static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
+{
+        struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
+        struct dst_entry *dst = &xdst->u.dst;
+        if (!xdst->route)
+                return 0;
+        if (stale_bundle(dst))
+                return 0;
+        return 1;
+}
+static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
+{
+        struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
+        struct dst_entry *dst = &xdst->u.dst;
+        dst_free(dst);
+}
+static const struct flow_cache_ops xfrm_bundle_fc_ops = {
+        .get = xfrm_bundle_flo_get,
+        .check = xfrm_bundle_flo_check,
+        .delete = xfrm_bundle_flo_delete,
+};
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1349,9 +1339,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
                BUG();
        }
        xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
        xfrm_policy_put_afinfo(afinfo);
+        xdst->flo.ops = &xfrm_bundle_fc_ops;
        return xdst;
 }
@@ -1389,6 +1380,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
        return err;
 }
 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
 * all the metrics... Shortly, bundle a bundle.
 */
@@ -1452,7 +1444,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
                        dst_hold(dst);
                dst1->xfrm = xfrm[i];
-                xdst->genid = xfrm[i]->genid;
+                xdst->xfrm_genid = xfrm[i]->genid;
                dst1->obsolete = -1;
                dst1->flags |= DST_HOST;
@@ -1545,7 +1537,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
 #endif
 }
-static int stale_bundle(struct dst_entry *dst);
+static int xfrm_expand_policies(struct flowi *fl, u16 family,
+                                struct xfrm_policy **pols,
+                                int *num_pols, int *num_xfrms)
+{
+        int i;
+        if (*num_pols == 0 || !pols[0]) {
+                *num_pols = 0;
+                *num_xfrms = 0;
+                return 0;
+        }
+        if (IS_ERR(pols[0]))
+                return PTR_ERR(pols[0]);
+        *num_xfrms = pols[0]->xfrm_nr;
+#ifdef CONFIG_XFRM_SUB_POLICY
+        if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
+            pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+                pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
+                                                    XFRM_POLICY_TYPE_MAIN,
+                                                    fl, family,
+                                                    XFRM_POLICY_OUT);
+                if (pols[1]) {
+                        if (IS_ERR(pols[1])) {
+                                xfrm_pols_put(pols, *num_pols);
+                                return PTR_ERR(pols[1]);
+                        }
+                        (*num_pols) ++;
+                        (*num_xfrms) += pols[1]->xfrm_nr;
+                }
+        }
+#endif
+        for (i = 0; i < *num_pols; i++) {
+                if (pols[i]->action != XFRM_POLICY_ALLOW) {
+                        *num_xfrms = -1;
+                        break;
+                }
+        }
+        return 0;
+}
+static struct xfrm_dst *
+xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
+                               struct flowi *fl, u16 family,
+                               struct dst_entry *dst_orig)
+{
+        struct net *net = xp_net(pols[0]);
+        struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+        struct dst_entry *dst;
+        struct xfrm_dst *xdst;
+        int err;
+        /* Try to instantiate a bundle */
+        err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
+        if (err < 0) {
+                if (err != -EAGAIN)
+                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+                return ERR_PTR(err);
+        }
+        dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
+        if (IS_ERR(dst)) {
+                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
+                return ERR_CAST(dst);
+        }
+        xdst = (struct xfrm_dst *)dst;
+        xdst->num_xfrms = err;
+        if (num_pols > 1)
+                err = xfrm_dst_update_parent(dst, &pols[1]->selector);
+        else
+                err = xfrm_dst_update_origin(dst, fl);
+        if (unlikely(err)) {
+                dst_free(dst);
+                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+                return ERR_PTR(err);
+        }
+        xdst->num_pols = num_pols;
+        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
+        xdst->policy_genid = atomic_read(&pols[0]->genid);
+        return xdst;
+}
+static struct flow_cache_object *
+xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
+                   struct flow_cache_object *oldflo, void *ctx)
+{
+        struct dst_entry *dst_orig = (struct dst_entry *)ctx;
+        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+        struct xfrm_dst *xdst, *new_xdst;
+        int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
+        /* Check if the policies from old bundle are usable */
+        xdst = NULL;
+        if (oldflo) {
+                xdst = container_of(oldflo, struct xfrm_dst, flo);
+                num_pols = xdst->num_pols;
+                num_xfrms = xdst->num_xfrms;
+                pol_dead = 0;
+                for (i = 0; i < num_pols; i++) {
+                        pols[i] = xdst->pols[i];
+                        pol_dead |= pols[i]->walk.dead;
+                }
+                if (pol_dead) {
+                        dst_free(&xdst->u.dst);
+                        xdst = NULL;
+                        num_pols = 0;
+                        num_xfrms = 0;
+                        oldflo = NULL;
+                }
+        }
+        /* Resolve policies to use if we couldn't get them from
+         * previous cache entry */
+        if (xdst == NULL) {
+                num_pols = 1;
+                pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
+                err = xfrm_expand_policies(fl, family, pols,
+                                           &num_pols, &num_xfrms);
+                if (err < 0)
+                        goto inc_error;
+                if (num_pols == 0)
+                        return NULL;
+                if (num_xfrms <= 0)
+                        goto make_dummy_bundle;
+        }
+        new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
+        if (IS_ERR(new_xdst)) {
+                err = PTR_ERR(new_xdst);
+                if (err != -EAGAIN)
+                        goto error;
+                if (oldflo == NULL)
+                        goto make_dummy_bundle;
+                dst_hold(&xdst->u.dst);
+                return oldflo;
+        }
+        /* Kill the previous bundle */
+        if (xdst) {
+                /* The policies were stolen for newly generated bundle */
+                xdst->num_pols = 0;
+                dst_free(&xdst->u.dst);
+        }
+        /* Flow cache does not have reference, it dst_free()'s,
+         * but we do need to return one reference for original caller */
+        dst_hold(&new_xdst->u.dst);
+        return &new_xdst->flo;
+make_dummy_bundle:
+        /* We found policies, but there's no bundles to instantiate:
+         * either because the policy blocks, has no transformations or
+         * we could not build template (no xfrm_states).*/
+        xdst = xfrm_alloc_dst(net, family);
+        if (IS_ERR(xdst)) {
+                xfrm_pols_put(pols, num_pols);
+                return ERR_CAST(xdst);
+        }
+        xdst->num_pols = num_pols;
+        xdst->num_xfrms = num_xfrms;
+        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
+        dst_hold(&xdst->u.dst);
+        return &xdst->flo;
+inc_error:
+        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+error:
+        if (xdst != NULL)
+                dst_free(&xdst->u.dst);
+        else
+                xfrm_pols_put(pols, num_pols);
+        return ERR_PTR(err);
+}
 /* Main function: finds/creates a bundle for given flow.
 *
@@ -1555,245 +1726,152 @@ static int stale_bundle(struct dst_entry *dst);
 int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
                  struct sock *sk, int flags)
 {
-        struct xfrm_policy *policy;
        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-        int npols;
+        struct flow_cache_object *flo;
-        int pol_dead;
+        struct xfrm_dst *xdst;
-        int xfrm_nr;
+        struct dst_entry *dst, *dst_orig = *dst_p, *route;
-        int pi;
+        u16 family = dst_orig->ops->family;
-        struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
-        struct dst_entry *dst, *dst_orig = *dst_p;
-        int nx = 0;
-        int err;
-        u32 genid;
-        u16 family;
        u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+        int i, err, num_pols, num_xfrms, drop_pols = 0;
 restart:
-        genid = atomic_read(&flow_cache_genid);
+        dst = NULL;
-        policy = NULL;
+        xdst = NULL;
-        for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
+        route = NULL;
-                pols[pi] = NULL;
-        npols = 0;
-        pol_dead = 0;
-        xfrm_nr = 0;
        if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
-                policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+                num_pols = 1;
-                err = PTR_ERR(policy);
+                pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
-                if (IS_ERR(policy)) {
+                err = xfrm_expand_policies(fl, family, pols,
-                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+                                           &num_pols, &num_xfrms);
+                if (err < 0)
                        goto dropdst;
+                if (num_pols) {
+                        if (num_xfrms <= 0) {
+                                drop_pols = num_pols;
+                                goto no_transform;
+                        }
+                        xdst = xfrm_resolve_and_create_bundle(
+                                        pols, num_pols, fl,
+                                        family, dst_orig);
+                        if (IS_ERR(xdst)) {
+                                xfrm_pols_put(pols, num_pols);
+                                err = PTR_ERR(xdst);
+                                goto dropdst;
+                        }
+                        spin_lock_bh(&xfrm_policy_sk_bundle_lock);
+                        xdst->u.dst.next = xfrm_policy_sk_bundles;
+                        xfrm_policy_sk_bundles = &xdst->u.dst;
+                        spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
+                        route = xdst->route;
                }
        }
-        if (!policy) {
+        if (xdst == NULL) {
                /* To accelerate a bit...  */
                if ((dst_orig->flags & DST_NOXFRM) ||
                    !net->xfrm.policy_count[XFRM_POLICY_OUT])
                        goto nopol;
-                policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
+                flo = flow_cache_lookup(net, fl, family, dir,
-                                           dir, xfrm_policy_lookup);
+                                        xfrm_bundle_lookup, dst_orig);
-                err = PTR_ERR(policy);
+                if (flo == NULL)
-                if (IS_ERR(policy)) {
+                        goto nopol;
-                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+                if (IS_ERR(flo)) {
+                        err = PTR_ERR(flo);
                        goto dropdst;
                }
+                xdst = container_of(flo, struct xfrm_dst, flo);
+                num_pols = xdst->num_pols;
+                num_xfrms = xdst->num_xfrms;
+                memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
+                route = xdst->route;
+        }
+        dst = &xdst->u.dst;
+        if (route == NULL && num_xfrms > 0) {
+                /* The only case when xfrm_bundle_lookup() returns a
+                 * bundle with null route, is when the template could
+                 * not be resolved. It means policies are there, but
+                 * bundle could not be created, since we don't yet
+                 * have the xfrm_state's. We need to wait for KM to
+                 * negotiate new SA's or bail out with error.*/
+                if (net->xfrm.sysctl_larval_drop) {
+                        /* EREMOTE tells the caller to generate
+                         * a one-shot blackhole route. */
+                        dst_release(dst);
+                        xfrm_pols_put(pols, num_pols);
+                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
+                        return -EREMOTE;
+                }
+                if (flags & XFRM_LOOKUP_WAIT) {
+                        DECLARE_WAITQUEUE(wait, current);
+                        add_wait_queue(&net->xfrm.km_waitq, &wait);
+                        set_current_state(TASK_INTERRUPTIBLE);
+                        schedule();
+                        set_current_state(TASK_RUNNING);
+                        remove_wait_queue(&net->xfrm.km_waitq, &wait);
+                        if (!signal_pending(current)) {
+                                dst_release(dst);
+                                goto restart;
+                        }
+                        err = -ERESTART;
+                } else
+                        err = -EAGAIN;
+                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
+                goto error;
        }
-        if (!policy)
+no_transform:
+        if (num_pols == 0)
                goto nopol;
-        family = dst_orig->ops->family;
+        if ((flags & XFRM_LOOKUP_ICMP) &&
-        pols[0] = policy;
+            !(pols[0]->flags & XFRM_POLICY_ICMP)) {
-        npols ++;
+                err = -ENOENT;
-        xfrm_nr += pols[0]->xfrm_nr;
-        err = -ENOENT;
-        if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
                goto error;
+        }
-        policy->curlft.use_time = get_seconds();
+        for (i = 0; i < num_pols; i++)
+                pols[i]->curlft.use_time = get_seconds();
-        switch (policy->action) {
+        if (num_xfrms < 0) {
-        default:
-        case XFRM_POLICY_BLOCK:
                /* Prohibit the flow */
                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
                err = -EPERM;
                goto error;
+        } else if (num_xfrms > 0) {
-        case XFRM_POLICY_ALLOW:
+                /* Flow transformed */
-#ifndef CONFIG_XFRM_SUB_POLICY
+                *dst_p = dst;
-                if (policy->xfrm_nr == 0) {
+                dst_release(dst_orig);
-                        /* Flow passes not transformed. */
+        } else {
-                        xfrm_pol_put(policy);
+                /* Flow passes untransformed */
-                        return 0;
+                dst_release(dst);
-                }
-#endif
-                /* Try to find matching bundle.
-                 *
-                 * LATER: help from flow cache. It is optional, this
-                 * is required only for output policy.
-                 */
-                dst = xfrm_find_bundle(fl, policy, family);
-                if (IS_ERR(dst)) {
-                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-                        err = PTR_ERR(dst);
-                        goto error;
-                }
-                if (dst)
-                        break;
-#ifdef CONFIG_XFRM_SUB_POLICY
-                if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
-                        pols[1] = xfrm_policy_lookup_bytype(net,
-                                                            XFRM_POLICY_TYPE_MAIN,
-                                                            fl, family,
-                                                            XFRM_POLICY_OUT);
-                        if (pols[1]) {
-                                if (IS_ERR(pols[1])) {
-                                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
-                                        err = PTR_ERR(pols[1]);
-                                        goto error;
-                                }
-                                if (pols[1]->action == XFRM_POLICY_BLOCK) {
-                                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
-                                        err = -EPERM;
-                                        goto error;
-                                }
-                                npols ++;
-                                xfrm_nr += pols[1]->xfrm_nr;
-                        }
-                }
-                /*
-                 * Because neither flowi nor bundle information knows about
-                 * transformation template size. On more than one policy usage
-                 * we can realize whether all of them is bypass or not after
-                 * they are searched. See above not-transformed bypass
-                 * is surrounded by non-sub policy configuration, too.
-                 */
-                if (xfrm_nr == 0) {
-                        /* Flow passes not transformed. */
-                        xfrm_pols_put(pols, npols);
-                        return 0;
-                }
-#endif
-                nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
-                if (unlikely(nx<0)) {
-                        err = nx;
-                        if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
-                                /* EREMOTE tells the caller to generate
-                                 * a one-shot blackhole route.
-                                 */
-                                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-                                xfrm_pol_put(policy);
-                                return -EREMOTE;
-                        }
-                        if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
-                                DECLARE_WAITQUEUE(wait, current);
-                                add_wait_queue(&net->xfrm.km_waitq, &wait);
-                                set_current_state(TASK_INTERRUPTIBLE);
-                                schedule();
-                                set_current_state(TASK_RUNNING);
-                                remove_wait_queue(&net->xfrm.km_waitq, &wait);
-                                nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
-                                if (nx == -EAGAIN && signal_pending(current)) {
-                                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-                                        err = -ERESTART;
-                                        goto error;
-                                }
-                                if (nx == -EAGAIN ||
-                                    genid != atomic_read(&flow_cache_genid)) {
-                                        xfrm_pols_put(pols, npols);
-                                        goto restart;
-                                }
-                                err = nx;
-                        }
-                        if (err < 0) {
-                                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-                                goto error;
-                        }
-                }
-                if (nx == 0) {
-                        /* Flow passes not transformed. */
-                        xfrm_pols_put(pols, npols);
-                        return 0;
-                }
-                dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
-                err = PTR_ERR(dst);
-                if (IS_ERR(dst)) {
-                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
-                        goto error;
-                }
-                for (pi = 0; pi < npols; pi++) {
-                        read_lock_bh(&pols[pi]->lock);
-                        pol_dead |= pols[pi]->walk.dead;
-                        read_unlock_bh(&pols[pi]->lock);
-                }
-                write_lock_bh(&policy->lock);
-                if (unlikely(pol_dead || stale_bundle(dst))) {
-                        /* Wow! While we worked on resolving, this
-                         * policy has gone. Retry. It is not paranoia,
-                         * we just cannot enlist new bundle to dead object.
-                         * We can't enlist stable bundles either.
-                         */
-                        write_unlock_bh(&policy->lock);
-                        dst_free(dst);
-                        if (pol_dead)
-                                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
-                        else
-                                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-                        err = -EHOSTUNREACH;
-                        goto error;
-                }
-                if (npols > 1)
-                        err = xfrm_dst_update_parent(dst, &pols[1]->selector);
-                else
-                        err = xfrm_dst_update_origin(dst, fl);
-                if (unlikely(err)) {
-                        write_unlock_bh(&policy->lock);
-                        dst_free(dst);
-                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-                        goto error;
-                }
-                dst->next = policy->bundles;
-                policy->bundles = dst;
-                dst_hold(dst);
-                write_unlock_bh(&policy->lock);
        }
-        *dst_p = dst;
+ok:
-        dst_release(dst_orig);
+        xfrm_pols_put(pols, drop_pols);
-        xfrm_pols_put(pols, npols);
        return 0;
+nopol:
+        if (!(flags & XFRM_LOOKUP_ICMP))
+                goto ok;
+        err = -ENOENT;
 error:
-        xfrm_pols_put(pols, npols);
+        dst_release(dst);
 dropdst:
        dst_release(dst_orig);
        *dst_p = NULL;
+        xfrm_pols_put(pols, drop_pols);
        return err;
-nopol:
-        err = -ENOENT;
-        if (flags & XFRM_LOOKUP_ICMP)
-                goto dropdst;
-        return 0;
 }
 EXPORT_SYMBOL(__xfrm_lookup);
@@ -1952,9 +2030,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
                }
        }
-        if (!pol)
+        if (!pol) {
-                pol = flow_cache_lookup(net, &fl, family, fl_dir,
+                struct flow_cache_object *flo;
-                                        xfrm_policy_lookup);
+                flo = flow_cache_lookup(net, &fl, family, fl_dir,
+                                        xfrm_policy_lookup, NULL);
+                if (IS_ERR_OR_NULL(flo))
+                        pol = ERR_CAST(flo);
+                else
+                        pol = container_of(flo, struct xfrm_policy, flo);
+        }
        if (IS_ERR(pol)) {
                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2138,71 +2223,24 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
        return dst;
 }
-static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
+static void __xfrm_garbage_collect(struct net *net)
-{
-        struct dst_entry *dst, **dstp;
-        write_lock(&pol->lock);
-        dstp = &pol->bundles;
-        while ((dst=*dstp) != NULL) {
-                if (func(dst)) {
-                        *dstp = dst->next;
-                        dst->next = *gc_list_p;
-                        *gc_list_p = dst;
-                } else {
-                        dstp = &dst->next;
-                }
-        }
-        write_unlock(&pol->lock);
-}
-static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
 {
-        struct dst_entry *gc_list = NULL;
+        struct dst_entry *head, *next;
-        int dir;
-        read_lock_bh(&xfrm_policy_lock);
+        flow_cache_flush();
-        for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
-                struct xfrm_policy *pol;
-                struct hlist_node *entry;
-                struct hlist_head *table;
-                int i;
-                hlist_for_each_entry(pol, entry,
+        spin_lock_bh(&xfrm_policy_sk_bundle_lock);
-                                     &net->xfrm.policy_inexact[dir], bydst)
+        head = xfrm_policy_sk_bundles;
-                        prune_one_bundle(pol, func, &gc_list);
+        xfrm_policy_sk_bundles = NULL;
+        spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
-                table = net->xfrm.policy_bydst[dir].table;
+        while (head) {
-                for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
+                next = head->next;
-                        hlist_for_each_entry(pol, entry, table + i, bydst)
+                dst_free(head);
-                                prune_one_bundle(pol, func, &gc_list);
+                head = next;
-                }
-        }
-        read_unlock_bh(&xfrm_policy_lock);
-        while (gc_list) {
-                struct dst_entry *dst = gc_list;
-                gc_list = dst->next;
-                dst_free(dst);
        }
 }
-static int unused_bundle(struct dst_entry *dst)
-{
-        return !atomic_read(&dst->__refcnt);
-}
-static void __xfrm_garbage_collect(struct net *net)
-{
-        xfrm_prune_bundles(net, unused_bundle);
-}
-static int xfrm_flush_bundles(struct net *net)
-{
-        xfrm_prune_bundles(net, stale_bundle);
-        return 0;
-}
 static void xfrm_init_pmtu(struct dst_entry *dst)
 {
        do {
@@ -2260,7 +2298,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
                        return 0;
                if (dst->xfrm->km.state != XFRM_STATE_VALID)
                        return 0;
-                if (xdst->genid != dst->xfrm->genid)
+                if (xdst->xfrm_genid != dst->xfrm->genid)
+                        return 0;
+                if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
                        return 0;
                if (strict && fl &&
@@ -2425,7 +2465,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
        switch (event) {
        case NETDEV_DOWN:
-                xfrm_flush_bundles(dev_net(dev));
+                __xfrm_garbage_collect(dev_net(dev));
        }
        return NOTIFY_DONE;
 }
@@ -2531,7 +2571,6 @@ static void xfrm_policy_fini(struct net *net)
        audit_info.sessionid = -1;
        audit_info.secid = 0;
        xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
-        flush_work(&xfrm_policy_gc_work);
        WARN_ON(!list_empty(&net->xfrm.policy_all));
@@ -2757,7 +2796,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
                               struct xfrm_migrate *m, int num_migrate)
 {
        struct xfrm_migrate *mp;
-        struct dst_entry *dst;
        int i, j, n = 0;
        write_lock_bh(&pol->lock);
@@ -2782,10 +2820,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
                               sizeof(pol->xfrm_vec[i].saddr));
                        pol->xfrm_vec[i].encap_family = mp->new_family;
                        /* flush bundles */
-                        while ((dst = pol->bundles) != NULL) {
+                        atomic_inc(&pol->genid);
-                                pol->bundles = dst->next;
-                                dst_free(dst);
-                        }
                }
        }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index add77ecb8ac4..5208b12fbfb4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -38,7 +38,6 @@
 static DEFINE_SPINLOCK(xfrm_state_lock);
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
-static unsigned int xfrm_state_genid;
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -924,8 +923,6 @@ static void __xfrm_state_insert(struct xfrm_state *x)
        struct net *net = xs_net(x);
        unsigned int h;
-        x->genid = ++xfrm_state_genid;
        list_add(&x->km.all, &net->xfrm.state_all);
        h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
@@ -971,7 +968,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
                    (mark & x->mark.m) == x->mark.v &&
                    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
                    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
-                        x->genid = xfrm_state_genid;
+                        x->genid++;
        }
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6106b72826d3..a267fbdda525 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1741,6 +1741,10 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (err)
                return err;
+        err = verify_policy_dir(p->dir);
+        if (err)
+                return err;
        if (p->index)
                xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err);
        else {
@@ -1766,13 +1770,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (xp == NULL)
                return -ENOENT;
-        read_lock(&xp->lock);
+        if (unlikely(xp->walk.dead))
-        if (xp->walk.dead) {
-                read_unlock(&xp->lock);
                goto out;
-        }
-        read_unlock(&xp->lock);
        err = 0;
        if (up->hard) {
                uid_t loginuid = NETLINK_CB(skb).loginuid;