Merge with master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6.git

author: David Woodhouse <dwmw2@shinybook.infradead.org> 2005-06-02 11:39:11 -0400
committer: David Woodhouse <dwmw2@shinybook.infradead.org> 2005-06-02 11:39:11 -0400
commit: 1c3f45ab2f7f879ea482501c83899505c31f7539 (patch)
tree: 672465b3b9b3e2e26a8caf74ed64aa6885c52c13 /net
parent: 4bcff1b37e7c3aed914d1ce5b45994adc7dbf455 (diff)
parent: e0d6d71440a3a35c6fc2dde09f8e8d4d7bd44dda (diff)
31 files changed, 310 insertions, 270 deletions
diff --git a/net/802/tr.c b/net/802/tr.c
index 85293ccf7efc..a755e880f4ba 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -47,12 +47,12 @@ static void rif_check_expire(unsigned long dummy);
 *      Each RIF entry we learn is kept this way
 */
 
-struct rif_cache_s {    
+struct rif_cache {
        unsigned char addr[TR_ALEN];
        int iface;
-        __u16 rcf;
+        __be16 rcf;
-        __u16 rseg[8];
+        __be16 rseg[8];
-        struct rif_cache_s *next;
+        struct rif_cache *next;
        unsigned long last_used;
        unsigned char local_ring;
 };
@@ -64,7 +64,7 @@ struct rif_cache_s {
 *      up a lot.
 */
 
-static struct rif_cache_s *rif_table[RIF_TABLE_SIZE];
+static struct rif_cache *rif_table[RIF_TABLE_SIZE];
 static DEFINE_SPINLOCK(rif_lock);
@@ -249,7 +249,7 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device *
 {
        int slack;
        unsigned int hash;
-        struct rif_cache_s *entry;
+        struct rif_cache *entry;
        unsigned char *olddata;
        static const unsigned char mcast_func_addr[] 
                = {0xC0,0x00,0x00,0x04,0x00,0x00};
@@ -337,7 +337,7 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0],
 static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
 {
        unsigned int hash, rii_p = 0;
-        struct rif_cache_s *entry;
+        struct rif_cache *entry;
        spin_lock_bh(&rif_lock);
@@ -373,7 +373,7 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
                 *      FIXME: We ought to keep some kind of cache size
                 *      limiting and adjust the timers to suit.
                 */
-                entry=kmalloc(sizeof(struct rif_cache_s),GFP_ATOMIC);
+                entry=kmalloc(sizeof(struct rif_cache),GFP_ATOMIC);
                if(!entry) 
                {
@@ -435,7 +435,7 @@ static void rif_check_expire(unsigned long dummy)
        spin_lock_bh(&rif_lock);
        
        for(i =0; i < RIF_TABLE_SIZE; i++) {
-                struct rif_cache_s *entry, **pentry;
+                struct rif_cache *entry, **pentry;
                
                pentry = rif_table+i;
                while((entry=*pentry) != NULL) {
@@ -467,10 +467,10 @@ static void rif_check_expire(unsigned long dummy)
 
 #ifdef CONFIG_PROC_FS
-static struct rif_cache_s *rif_get_idx(loff_t pos)
+static struct rif_cache *rif_get_idx(loff_t pos)
 {
        int i;
-        struct rif_cache_s *entry;
+        struct rif_cache *entry;
        loff_t off = 0;
        for(i = 0; i < RIF_TABLE_SIZE; i++) 
@@ -493,7 +493,7 @@ static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
 static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        int i;
-        struct rif_cache_s *ent = v;
+        struct rif_cache *ent = v;
        ++*pos;
@@ -522,7 +522,7 @@ static void rif_seq_stop(struct seq_file *seq, void *v)
 static int rif_seq_show(struct seq_file *seq, void *v)
 {
        int j, rcf_len, segment, brdgnmb;
-        struct rif_cache_s *entry = v;
+        struct rif_cache *entry = v;
        if (v == SEQ_START_TOKEN)
                seq_puts(seq,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index d9b72fde433c..f564ee99782d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -21,10 +21,7 @@
 static struct net_device_stats *br_dev_get_stats(struct net_device *dev)
 {
-        struct net_bridge *br;
+        struct net_bridge *br = netdev_priv(dev);
-        br = dev->priv;
        return &br->statistics;
 }
@@ -54,9 +51,11 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 static int br_dev_open(struct net_device *dev)
 {
-        netif_start_queue(dev);
+        struct net_bridge *br = netdev_priv(dev);
-        br_stp_enable_bridge(dev->priv);
+        br_features_recompute(br);
+        netif_start_queue(dev);
+        br_stp_enable_bridge(br);
        return 0;
 }
@@ -67,7 +66,7 @@ static void br_dev_set_multicast_list(struct net_device *dev)
 static int br_dev_stop(struct net_device *dev)
 {
-        br_stp_disable_bridge(dev->priv);
+        br_stp_disable_bridge(netdev_priv(dev));
        netif_stop_queue(dev);
@@ -76,7 +75,7 @@ static int br_dev_stop(struct net_device *dev)
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
-        if ((new_mtu < 68) || new_mtu > br_min_mtu(dev->priv))
+        if (new_mtu < 68 || new_mtu > br_min_mtu(netdev_priv(dev)))
                return -EINVAL;
        dev->mtu = new_mtu;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 69872bf3b87e..91bb895375f4 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -314,6 +314,28 @@ int br_min_mtu(const struct net_bridge *br)
        return mtu;
 }
+/*
+ * Recomputes features using slave's features
+ */
+void br_features_recompute(struct net_bridge *br)
+{
+        struct net_bridge_port *p;
+        unsigned long features, checksum;
+        features = NETIF_F_SG | NETIF_F_FRAGLIST 
+                | NETIF_F_HIGHDMA | NETIF_F_TSO;
+        checksum = NETIF_F_IP_CSUM;     /* least commmon subset */
+        list_for_each_entry(p, &br->port_list, list) {
+                if (!(p->dev->features 
+                      & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)))
+                        checksum = 0;
+                features &= p->dev->features;
+        }
+        br->dev->features = features | checksum | NETIF_F_LLTX;
+}
 /* called with RTNL */
 int br_add_if(struct net_bridge *br, struct net_device *dev)
 {
@@ -368,6 +390,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
        spin_lock_bh(&br->lock);
        br_stp_recalculate_bridge_id(br);
+        br_features_recompute(br);
        spin_unlock_bh(&br->lock);
        return 0;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 2b1cce46cab4..8f5f2e730992 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,7 +26,7 @@ static int br_pass_frame_up_finish(struct sk_buff *skb)
 #ifdef CONFIG_NETFILTER_DEBUG
        skb->nf_debug = 0;
 #endif
-        netif_rx(skb);
+        netif_receive_skb(skb);
        return 0;
 }
@@ -54,6 +54,9 @@ int br_handle_frame_finish(struct sk_buff *skb)
        struct net_bridge_fdb_entry *dst;
        int passedup = 0;
+        /* insert into forwarding database after filtering to avoid spoofing */
+        br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
        if (br->dev->flags & IFF_PROMISC) {
                struct sk_buff *skb2;
@@ -108,8 +111,7 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
        if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
                goto err;
-        if (p->state == BR_STATE_LEARNING ||
+        if (p->state == BR_STATE_LEARNING)
-            p->state == BR_STATE_FORWARDING)
                br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
        if (p->br->stp_enabled &&
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index f8fb49e34764..917311c6828b 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -65,6 +65,15 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
                }
                break;
+        case NETDEV_FEAT_CHANGE:
+                if (br->dev->flags & IFF_UP) 
+                        br_features_recompute(br);
+                /* could do recursive feature change notification
+                 * but who would care?? 
+                 */
+                break;
        case NETDEV_DOWN:
                if (br->dev->flags & IFF_UP)
                        br_stp_disable_port(p);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 54d63f1372a0..bdf95a74d8cd 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -174,6 +174,7 @@ extern int br_add_if(struct net_bridge *br,
 extern int br_del_if(struct net_bridge *br,
              struct net_device *dev);
 extern int br_min_mtu(const struct net_bridge *br);
+extern void br_features_recompute(struct net_bridge *br);
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index b91a875aca01..d071f1c9ad0b 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -140,6 +140,9 @@ int br_stp_handle_bpdu(struct sk_buff *skb)
        struct net_bridge *br = p->br;
        unsigned char *buf;
+        /* insert into forwarding database after filtering to avoid spoofing */
+        br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
        /* need at least the 802 and STP headers */
        if (!pskb_may_pull(skb, sizeof(header)+1) ||
            memcmp(skb->data, header, sizeof(header)))
diff --git a/net/core/dev.c b/net/core/dev.c
index d4d9e2680adb..f15a3ffff635 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -761,6 +761,18 @@ int dev_change_name(struct net_device *dev, char *newname)
 }
 /**
+ *      netdev_features_change - device changes fatures
+ *      @dev: device to cause notification
+ *
+ *      Called to indicate a device has changed features.
+ */
+void netdev_features_change(struct net_device *dev)
+{
+        notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL(netdev_features_change);
+/**
 *      netdev_state_change - device changes state
 *      @dev: device to cause notification
 *
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f05fde97c43d..8ec484894d68 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -29,7 +29,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
 u32 ethtool_op_get_tx_csum(struct net_device *dev)
 {
-        return (dev->features & NETIF_F_IP_CSUM) != 0;
+        return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0;
 }
 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
@@ -42,6 +42,15 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
        return 0;
 }
+int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
+{
+        if (data)
+                dev->features |= NETIF_F_HW_CSUM;
+        else
+                dev->features &= ~NETIF_F_HW_CSUM;
+        return 0;
+}
 u32 ethtool_op_get_sg(struct net_device *dev)
 {
        return (dev->features & NETIF_F_SG) != 0;
@@ -682,6 +691,7 @@ int dev_ethtool(struct ifreq *ifr)
        void __user *useraddr = ifr->ifr_data;
        u32 ethcmd;
        int rc;
+        unsigned long old_features;
        /*
         * XXX: This can be pushed down into the ethtool_* handlers that
@@ -703,6 +713,8 @@ int dev_ethtool(struct ifreq *ifr)
                if ((rc = dev->ethtool_ops->begin(dev)) < 0)
                        return rc;
+        old_features = dev->features;
        switch (ethcmd) {
        case ETHTOOL_GSET:
                rc = ethtool_get_settings(dev, useraddr);
@@ -712,7 +724,6 @@ int dev_ethtool(struct ifreq *ifr)
                break;
        case ETHTOOL_GDRVINFO:
                rc = ethtool_get_drvinfo(dev, useraddr);
                break;
        case ETHTOOL_GREGS:
                rc = ethtool_get_regs(dev, useraddr);
@@ -801,6 +812,10 @@ int dev_ethtool(struct ifreq *ifr)
        
        if(dev->ethtool_ops->complete)
                dev->ethtool_ops->complete(dev);
+        if (old_features != dev->features)
+                netdev_features_change(dev);
        return rc;
 ioctl:
@@ -817,3 +832,4 @@ EXPORT_SYMBOL(ethtool_op_get_tx_csum);
 EXPORT_SYMBOL(ethtool_op_set_sg);
 EXPORT_SYMBOL(ethtool_op_set_tso);
 EXPORT_SYMBOL(ethtool_op_set_tx_csum);
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 060f703659e8..910eb4c05a47 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
 #define to_net_dev(class) container_of(class, struct net_device, class_dev)
 static const char fmt_hex[] = "%#x\n";
+static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
 static const char fmt_ulong[] = "%lu\n";
@@ -91,7 +92,7 @@ static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL)		\
 NETDEVICE_ATTR(addr_len, fmt_dec);
 NETDEVICE_ATTR(iflink, fmt_dec);
 NETDEVICE_ATTR(ifindex, fmt_dec);
-NETDEVICE_ATTR(features, fmt_hex);
+NETDEVICE_ATTR(features, fmt_long_hex);
 NETDEVICE_ATTR(type, fmt_dec);
 /* use same locking rules as GIFHWADDR ioctl's */
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3cc96730c4ed..478a30179a52 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -233,11 +233,14 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
                         int destroy)
 {
+        struct in_ifaddr *promote = NULL;
        struct in_ifaddr *ifa1 = *ifap;
        ASSERT_RTNL();
-        /* 1. Deleting primary ifaddr forces deletion all secondaries */
+        /* 1. Deleting primary ifaddr forces deletion all secondaries 
+         * unless alias promotion is set
+         **/
        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
                struct in_ifaddr *ifa;
@@ -251,11 +254,16 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
                                continue;
                        }
-                        *ifap1 = ifa->ifa_next;
+                        if (!IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
+                                *ifap1 = ifa->ifa_next;
-                        rtmsg_ifa(RTM_DELADDR, ifa);
+                                rtmsg_ifa(RTM_DELADDR, ifa);
-                        notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
+                                notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
-                        inet_free_ifa(ifa);
+                                inet_free_ifa(ifa);
+                        } else {
+                                promote = ifa;
+                                break;
+                        }
                }
        }
@@ -281,6 +289,13 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
                if (!in_dev->ifa_list)
                        inetdev_destroy(in_dev);
        }
+        if (promote && IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
+                /* not sure if we should send a delete notify first? */
+                promote->ifa_flags &= ~IFA_F_SECONDARY;
+                rtmsg_ifa(RTM_NEWADDR, promote);
+                notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote);
+        }
 }
 static int inet_insert_ifa(struct in_ifaddr *ifa)
@@ -1384,6 +1399,15 @@ static struct devinet_sysctl_table {
                        .proc_handler   = &ipv4_doint_and_flush,
                        .strategy       = &ipv4_doint_and_flush_strategy,
                },
+                {
+                        .ctl_name       = NET_IPV4_CONF_PROMOTE_SECONDARIES,
+                        .procname       = "promote_secondaries",
+                        .data           = &ipv4_devconf.promote_secondaries,
+                        .maxlen         = sizeof(int),
+                        .mode           = 0644,
+                        .proc_handler   = &ipv4_doint_and_flush,
+                        .strategy       = &ipv4_doint_and_flush_strategy,
+                },
        },
        .devinet_dev = {
                {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 053a883247ba..eae84cc39d3f 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -478,7 +478,7 @@ static int __init esp4_init(void)
 {
        struct xfrm_decap_state decap;
-        if (sizeof(struct esp_decap_data)  <
+        if (sizeof(struct esp_decap_data)  >
            sizeof(decap.decap_data)) {
                extern void decap_data_too_small(void);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index daebd93fd8a0..760dc8238d65 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -490,6 +490,14 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
                        /* Partially cloned skb? */
                        if (skb_shared(frag))
                                goto slow_path;
+                        BUG_ON(frag->sk);
+                        if (skb->sk) {
+                                sock_hold(skb->sk);
+                                frag->sk = skb->sk;
+                                frag->destructor = sock_wfree;
+                                skb->truesize -= frag->truesize;
+                        }
                }
                /* Everything is OK. Generate! */
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index faa6176bbeb1..de21da00057f 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -508,7 +508,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                        rc = NF_ACCEPT;
                /* do not touch skb anymore */
                atomic_inc(&cp->in_pkts);
-                __ip_vs_conn_put(cp);
                goto out;
        }
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index 9349686131fc..cf2e6bcf7973 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -57,7 +57,6 @@ struct multipath_device {
 static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
 static DEFINE_SPINLOCK(state_lock);
-static struct rtable *last_selection = NULL;
 static int inline __multipath_findslot(void)
 {
@@ -111,11 +110,6 @@ struct notifier_block drr_dev_notifier = {
        .notifier_call  = drr_dev_event,
 };
-static void drr_remove(struct rtable *rt)
-{
-        if (last_selection == rt)
-                last_selection = NULL;
-}
 static void drr_safe_inc(atomic_t *usecount)
 {
@@ -144,14 +138,6 @@ static void drr_select_route(const struct flowi *flp,
        int devidx = -1;
        int cur_min_devidx = -1;
-        /* if necessary and possible utilize the old alternative */
-        if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
-            last_selection != NULL) {
-                result = last_selection;
-                *rp = result;
-                return;
-        }
        /* 1. make sure all alt. nexthops have the same GC related data */
        /* 2. determine the new candidate to be returned */
        result = NULL;
@@ -229,12 +215,10 @@ static void drr_select_route(const struct flowi *flp,
        }
        *rp = result;
-        last_selection = result;
 }
 static struct ip_mp_alg_ops drr_ops = {
        .mp_alg_select_route    =       drr_select_route,
-        .mp_alg_remove          =       drr_remove,
 };
 static int __init drr_init(void)
@@ -244,7 +228,7 @@ static int __init drr_init(void)
        if (err)
                return err;
-        err = multipath_alg_register(&drr_ops, IP_MP_ALG_RR);
+        err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR);
        if (err)
                goto fail;
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
index 554a82568160..061b6b253982 100644
--- a/net/ipv4/multipath_rr.c
+++ b/net/ipv4/multipath_rr.c
@@ -47,29 +47,12 @@
 #include <net/checksum.h>
 #include <net/ip_mp_alg.h>
-#define MULTIPATH_MAX_CANDIDATES 40
-static struct rtable* last_used = NULL;
-static void rr_remove(struct rtable *rt)
-{
-        if (last_used == rt)
-                last_used = NULL;
-}
 static void rr_select_route(const struct flowi *flp,
                            struct rtable *first, struct rtable **rp)
 {
        struct rtable *nh, *result, *min_use_cand = NULL;
        int min_use = -1;
-        /* if necessary and possible utilize the old alternative */
-        if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
-            last_used != NULL) {
-                result = last_used;
-                goto out;
-        }
        /* 1. make sure all alt. nexthops have the same GC related data
         * 2. determine the new candidate to be returned
         */
@@ -90,15 +73,12 @@ static void rr_select_route(const struct flowi *flp,
        if (!result)
                result = first;
-out:
-        last_used = result;
        result->u.dst.__use++;
        *rp = result;
 }
 static struct ip_mp_alg_ops rr_ops = {
        .mp_alg_select_route    =       rr_select_route,
-        .mp_alg_remove          =       rr_remove,
 };
 static int __init rr_init(void)
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 28d9425d5c39..09e824622977 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -940,37 +940,25 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
 struct sk_buff *
 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
-        struct sock *sk = skb->sk;
 #ifdef CONFIG_NETFILTER_DEBUG
        unsigned int olddebug = skb->nf_debug;
 #endif
-        if (sk) {
+        skb_orphan(skb);
-                sock_hold(sk);
-                skb_orphan(skb);
-        }
        local_bh_disable(); 
        skb = ip_defrag(skb, user);
        local_bh_enable();
-        if (!skb) {
+        if (skb) {
-                if (sk)
+                ip_send_check(skb->nh.iph);
-                        sock_put(sk);
+                skb->nfcache |= NFC_ALTERED;
-                return skb;
-        }
-        if (sk) {
-                skb_set_owner_w(skb, sk);
-                sock_put(sk);
-        }
-        ip_send_check(skb->nh.iph);
-        skb->nfcache |= NFC_ALTERED;
 #ifdef CONFIG_NETFILTER_DEBUG
-        /* Packet path as if nothing had happened. */
+                /* Packet path as if nothing had happened. */
-        skb->nf_debug = olddebug;
+                skb->nf_debug = olddebug;
 #endif
+        }
        return skb;
 }
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e5746b674413..eda1fba431a4 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -3,6 +3,7 @@
 * communicating with userspace via netlink.
 *
 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
+ * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
@@ -17,6 +18,7 @@
 * 2005-01-10: Added /proc counter for dropped packets; fixed so
 *             packets aren't delivered to user space if they're going 
 *             to be dropped. 
+ * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
 *
 */
 #include <linux/module.h>
@@ -71,7 +73,15 @@ static DECLARE_MUTEX(ipqnl_sem);
 static void
 ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
 {
+        /* TCP input path (and probably other bits) assume to be called
+         * from softirq context, not from syscall, like ipq_issue_verdict is
+         * called.  TCP input path deadlocks with locks taken from timer
+         * softirq, e.g.  We therefore emulate this by local_bh_disable() */
+        local_bh_disable();
        nf_reinject(entry->skb, entry->info, verdict);
+        local_bh_enable();
        kfree(entry);
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 79835a67a274..5bad504630a3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4355,16 +4355,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                        goto no_ack;
                        }
-                        if (eaten) {
+                        __tcp_ack_snd_check(sk, 0);
-                                if (tcp_in_quickack_mode(tp)) {
-                                        tcp_send_ack(sk);
-                                } else {
-                                        tcp_send_delayed_ack(sk);
-                                }
-                        } else {
-                                __tcp_ack_snd_check(sk, 0);
-                        }
 no_ack:
                        if (eaten)
                                __kfree_skb(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4a6952e3fee9..7c24e64b443f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -738,7 +738,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                        unsigned long amount;
                        amount = 0;
-                        spin_lock_irq(&sk->sk_receive_queue.lock);
+                        spin_lock_bh(&sk->sk_receive_queue.lock);
                        skb = skb_peek(&sk->sk_receive_queue);
                        if (skb != NULL) {
                                /*
@@ -748,7 +748,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                                 */
                                amount = skb->len - sizeof(struct udphdr);
                        }
-                        spin_unlock_irq(&sk->sk_receive_queue.lock);
+                        spin_unlock_bh(&sk->sk_receive_queue.lock);
                        return put_user(amount, (int __user *)arg);
                }
@@ -848,12 +848,12 @@ csum_copy_err:
        /* Clear queue. */
        if (flags&MSG_PEEK) {
                int clear = 0;
-                spin_lock_irq(&sk->sk_receive_queue.lock);
+                spin_lock_bh(&sk->sk_receive_queue.lock);
                if (skb == skb_peek(&sk->sk_receive_queue)) {
                        __skb_unlink(skb, &sk->sk_receive_queue);
                        clear = 1;
                }
-                spin_unlock_irq(&sk->sk_receive_queue.lock);
+                spin_unlock_bh(&sk->sk_receive_queue.lock);
                if (clear)
                        kfree_skb(skb);
        }
@@ -1334,7 +1334,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
                struct sk_buff_head *rcvq = &sk->sk_receive_queue;
                struct sk_buff *skb;
-                spin_lock_irq(&rcvq->lock);
+                spin_lock_bh(&rcvq->lock);
                while ((skb = skb_peek(rcvq)) != NULL) {
                        if (udp_checksum_complete(skb)) {
                                UDP_INC_STATS_BH(UDP_MIB_INERRORS);
@@ -1345,7 +1345,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
                                break;
                        }
                }
-                spin_unlock_irq(&rcvq->lock);
+                spin_unlock_bh(&rcvq->lock);
                /* nothing to see, move along */
                if (skb == NULL)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index a93f6dc51979..0e5f7499debb 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -535,10 +535,12 @@ release:
                if (err)
                        goto done;
-                /* Do not check for fault */
+                if (!freq.flr_label) {
-                if (!freq.flr_label)
+                        if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
-                        copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
+                                         &fl->label, sizeof(fl->label))) {
-                                     &fl->label, sizeof(fl->label));
+                                /* Intentionally ignore fault. */
+                        }
+                }
                sfl1->fl = fl;
                sfl1->next = np->ipv6_fl_list;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0f0711417c9d..b78a53586804 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -552,13 +552,17 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
                            skb_headroom(frag) < hlen)
                            goto slow_path;
-                        /* Correct socket ownership. */
-                        if (frag->sk == NULL)
-                                goto slow_path;
                        /* Partially cloned skb? */
                        if (skb_shared(frag))
                                goto slow_path;
+                        BUG_ON(frag->sk);
+                        if (skb->sk) {
+                                sock_hold(skb->sk);
+                                frag->sk = skb->sk;
+                                frag->destructor = sock_wfree;
+                                skb->truesize -= frag->truesize;
+                        }
                }
                err = 0;
@@ -1116,12 +1120,10 @@ int ip6_push_pending_frames(struct sock *sk)
                tail_skb = &(tmp_skb->next);
                skb->len += tmp_skb->len;
                skb->data_len += tmp_skb->len;
-#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
                skb->truesize += tmp_skb->truesize;
                __sock_put(tmp_skb->sk);
                tmp_skb->destructor = NULL;
                tmp_skb->sk = NULL;
-#endif
        }
        ipv6_addr_copy(final_dst, &fl->fl6_dst);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 601a148f60f3..6b9867717d11 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -84,6 +84,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
                mtu = IPV6_MIN_MTU;
        if (skb->len > mtu) {
+                skb->dev = dst->dev;
                icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
                ret = -EMSGSIZE;
        }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4429b1a1fe5f..cf1d91e74c82 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -113,6 +113,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
                xdst = (struct xfrm_dst *)dst1;
                xdst->route = &rt->u.dst;
+                if (rt->rt6i_node)
+                        xdst->route_cookie = rt->rt6i_node->fn_sernum;
                dst1->next = dst_prev;
                dst_prev = dst1;
@@ -137,6 +139,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
        dst_prev->child = &rt->u.dst;
        dst->path = &rt->u.dst;
+        if (rt->rt6i_node)
+                ((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum;
        *dst_p = dst;
        dst = dst_prev;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 733bf52cef3e..e41ce458c2a9 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -735,11 +735,15 @@ static inline int do_one_broadcast(struct sock *sk,
        sock_hold(sk);
        if (p->skb2 == NULL) {
-                if (atomic_read(&p->skb->users) != 1) {
+                if (skb_shared(p->skb)) {
                        p->skb2 = skb_clone(p->skb, p->allocation);
                } else {
-                        p->skb2 = p->skb;
+                        p->skb2 = skb_get(p->skb);
-                        atomic_inc(&p->skb->users);
+                        /*
+                         * skb ownership may have been set when
+                         * delivered to a previous socket.
+                         */
+                        skb_orphan(p->skb2);
                }
        }
        if (p->skb2 == NULL) {
@@ -785,11 +789,12 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
        sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
                do_one_broadcast(sk, &info);
+        kfree_skb(skb);
        netlink_unlock_table();
        if (info.skb2)
                kfree_skb(info.skb2);
-        kfree_skb(skb);
        if (info.delivered) {
                if (info.congested && (allocation & __GFP_WAIT))
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 8a3db9d95bab..d8bd2a569c7c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -18,7 +18,7 @@
 #include <asm/byteorder.h>
-#if 1 /* control */
+#if 0 /* control */
 #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
 #else
 #define DPRINTK(format,args...)
@@ -73,8 +73,13 @@ static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
        DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new,
            old);
-        if (!new)
-                new = &noop_qdisc;
+        if (new == NULL) {
+                new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+                if (new == NULL)
+                        new = &noop_qdisc;
+        }
        sch_tree_lock(sch);
        *old = xchg(&p->q,new);
        if (*old)
@@ -163,14 +168,15 @@ static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
                return;
        for (i = 0; i < p->indices; i++) {
                if (p->mask[i] == 0xff && !p->value[i])
-                        continue;
+                        goto ignore;
                if (walker->count >= walker->skip) {
                        if (walker->fn(sch, i+1, walker) < 0) {
                                walker->stop = 1;
                                break;
                        }
                }
-                walker->count++;
+ignore:         
+                walker->count++;
        }
 }
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index e0c9fbe73b15..bb9bf8d5003c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -53,7 +53,6 @@
 struct netem_sched_data {
        struct Qdisc    *qdisc;
-        struct sk_buff_head delayed;
        struct timer_list timer;
        u32 latency;
@@ -63,11 +62,12 @@ struct netem_sched_data {
        u32 gap;
        u32 jitter;
        u32 duplicate;
+        u32 reorder;
        struct crndstate {
                unsigned long last;
                unsigned long rho;
-        } delay_cor, loss_cor, dup_cor;
+        } delay_cor, loss_cor, dup_cor, reorder_cor;
        struct disttable {
                u32  size;
@@ -137,122 +137,68 @@ static long tabledist(unsigned long mu, long sigma,
        return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 }
-/* Put skb in the private delayed queue. */
-static int netem_delay(struct Qdisc *sch, struct sk_buff *skb)
-{
-        struct netem_sched_data *q = qdisc_priv(sch);
-        psched_tdiff_t td;
-        psched_time_t now;
-        
-        PSCHED_GET_TIME(now);
-        td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
-        
-        /* Always queue at tail to keep packets in order */
-        if (likely(q->delayed.qlen < q->limit)) {
-                struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
-        
-                PSCHED_TADD2(now, td, cb->time_to_send);
-                pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb, 
-                         now, cb->time_to_send);
-        
-                __skb_queue_tail(&q->delayed, skb);
-                return NET_XMIT_SUCCESS;
-        }
-        pr_debug("netem_delay: queue over limit %d\n", q->limit);
-        sch->qstats.overlimits++;
-        kfree_skb(skb);
-        return NET_XMIT_DROP;
-}
 /*
- *  Move a packet that is ready to send from the delay holding
+ * Insert one skb into qdisc.
- *  list to the underlying qdisc.
+ * Note: parent depends on return value to account for queue length.
+ *      NET_XMIT_DROP: queue length didn't change.
+ *      NET_XMIT_SUCCESS: one skb was queued.
 */
-static int netem_run(struct Qdisc *sch)
-{
-        struct netem_sched_data *q = qdisc_priv(sch);
-        struct sk_buff *skb;
-        psched_time_t now;
-        PSCHED_GET_TIME(now);
-        skb = skb_peek(&q->delayed);
-        if (skb) {
-                const struct netem_skb_cb *cb
-                        = (const struct netem_skb_cb *)skb->cb;
-                long delay 
-                        = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
-                pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
-                /* if more time remaining? */
-                if (delay > 0) {
-                        mod_timer(&q->timer, jiffies + delay);
-                        return 1;
-                }
-                __skb_unlink(skb, &q->delayed);
-                
-                if (q->qdisc->enqueue(skb, q->qdisc)) {
-                        sch->q.qlen--;
-                        sch->qstats.drops++;
-                } 
-        }
-        return 0;
-}
 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct netem_sched_data *q = qdisc_priv(sch);
+        struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
+        struct sk_buff *skb2;
        int ret;
+        int count = 1;
        pr_debug("netem_enqueue skb=%p\n", skb);
+        /* Random duplication */
+        if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+                ++count;
        /* Random packet drop 0 => none, ~0 => all */
-        if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
+        if (q->loss && q->loss >= get_crandom(&q->loss_cor))
-                pr_debug("netem_enqueue: random loss\n");
+                --count;
+        if (count == 0) {
                sch->qstats.drops++;
                kfree_skb(skb);
-                return 0;       /* lie about loss so TCP doesn't know */
+                return NET_XMIT_DROP;
        }
-        /* Random duplication */
+        /*
-        if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) {
+         * If we need to duplicate packet, then re-insert at top of the
-                struct sk_buff *skb2;
+         * qdisc tree, since parent queuer expects that only one
+         * skb will be queued.
-                skb2 = skb_clone(skb, GFP_ATOMIC);
+         */
-                if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) {
+        if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
-                        struct Qdisc *qp;
+                struct Qdisc *rootq = sch->dev->qdisc;
+                u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
-                        /* Since one packet can generate two packets in the
+                q->duplicate = 0;
-                         * queue, the parent's qlen accounting gets confused,
-                         * so fix it.
+                rootq->enqueue(skb2, rootq);
-                         */
+                q->duplicate = dupsave;
-                        qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent));
-                        if (qp)
-                                qp->q.qlen++;
-                        sch->q.qlen++;
-                        sch->bstats.bytes += skb2->len;
-                        sch->bstats.packets++;
-                } else
-                        sch->qstats.drops++;
        }
-        /* If doing simple delay then gap == 0 so all packets
+        if (q->gap == 0                 /* not doing reordering */
-         * go into the delayed holding queue
+            || q->counter < q->gap      /* inside last reordering gap */
-         * otherwise if doing out of order only "1 out of gap"
+            || q->reorder < get_crandom(&q->reorder_cor)) {
-         * packets will be delayed.
+                psched_time_t now;
-         */
+                PSCHED_GET_TIME(now);
-        if (q->counter < q->gap) {
+                PSCHED_TADD2(now, tabledist(q->latency, q->jitter, 
+                                            &q->delay_cor, q->delay_dist),
+                             cb->time_to_send);
                ++q->counter;
                ret = q->qdisc->enqueue(skb, q->qdisc);
        } else {
+                /* 
+                 * Do re-ordering by putting one out of N packets at the front
+                 * of the queue.
+                 */
+                PSCHED_GET_TIME(cb->time_to_send);
                q->counter = 0;
-                ret = netem_delay(sch, skb);
+                ret = q->qdisc->ops->requeue(skb, q->qdisc);
-                netem_run(sch);
        }
        if (likely(ret == NET_XMIT_SUCCESS)) {
@@ -296,22 +242,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 {
        struct netem_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
-        int pending;
-        pending = netem_run(sch);
        skb = q->qdisc->dequeue(q->qdisc);
        if (skb) {
-                pr_debug("netem_dequeue: return skb=%p\n", skb);
+                const struct netem_skb_cb *cb
-                sch->q.qlen--;
+                        = (const struct netem_skb_cb *)skb->cb;
-                sch->flags &= ~TCQ_F_THROTTLED;
+                psched_time_t now;
-        }
+                long delay;
-        else if (pending) {
-                pr_debug("netem_dequeue: throttling\n");
+                /* if more time remaining? */
+                PSCHED_GET_TIME(now);
+                delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
+                pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
+                if (delay <= 0) {
+                        pr_debug("netem_dequeue: return skb=%p\n", skb);
+                        sch->q.qlen--;
+                        sch->flags &= ~TCQ_F_THROTTLED;
+                        return skb;
+                }
+                mod_timer(&q->timer, jiffies + delay);
                sch->flags |= TCQ_F_THROTTLED;
-        } 
-        return skb;
+                if (q->qdisc->ops->requeue(skb, q->qdisc) != 0)
+                        sch->qstats.drops++;
+        }
+        return NULL;
 }
 static void netem_watchdog(unsigned long arg)
@@ -328,8 +285,6 @@ static void netem_reset(struct Qdisc *sch)
        struct netem_sched_data *q = qdisc_priv(sch);
        qdisc_reset(q->qdisc);
-        skb_queue_purge(&q->delayed);
        sch->q.qlen = 0;
        sch->flags &= ~TCQ_F_THROTTLED;
        del_timer_sync(&q->timer);
@@ -397,6 +352,19 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
        return 0;
 }
+static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
+{
+        struct netem_sched_data *q = qdisc_priv(sch);
+        const struct tc_netem_reorder *r = RTA_DATA(attr);
+        if (RTA_PAYLOAD(attr) != sizeof(*r))
+                return -EINVAL;
+        q->reorder = r->probability;
+        init_crandom(&q->reorder_cor, r->correlation);
+        return 0;
+}
 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 {
        struct netem_sched_data *q = qdisc_priv(sch);
@@ -417,9 +385,15 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
        q->jitter = qopt->jitter;
        q->limit = qopt->limit;
        q->gap = qopt->gap;
+        q->counter = 0;
        q->loss = qopt->loss;
        q->duplicate = qopt->duplicate;
+        /* for compatiablity with earlier versions.
+         * if gap is set, need to assume 100% probablity
+         */
+        q->reorder = ~0;
        /* Handle nested options after initial queue options.
         * Should have put all options in nested format but too late now.
         */ 
@@ -441,6 +415,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
                        if (ret)
                                return ret;
                }
+                if (tb[TCA_NETEM_REORDER-1]) {
+                        ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
+                        if (ret)
+                                return ret;
+                }
        }
@@ -455,11 +434,9 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
        if (!opt)
                return -EINVAL;
-        skb_queue_head_init(&q->delayed);
        init_timer(&q->timer);
        q->timer.function = netem_watchdog;
        q->timer.data = (unsigned long) sch;
-        q->counter = 0;
        q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
        if (!q->qdisc) {
@@ -491,6 +468,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
        struct rtattr *rta = (struct rtattr *) b;
        struct tc_netem_qopt qopt;
        struct tc_netem_corr cor;
+        struct tc_netem_reorder reorder;
        qopt.latency = q->latency;
        qopt.jitter = q->jitter;
@@ -504,6 +482,11 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
        cor.loss_corr = q->loss_cor.rho;
        cor.dup_corr = q->dup_cor.rho;
        RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
+        reorder.probability = q->reorder;
+        reorder.correlation = q->reorder_cor.rho;
+        RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
        rta->rta_len = skb->tail - b;
        return skb->len;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c478fc8db776..c420eba4876b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -770,33 +770,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
                if (err)
                        goto out_mknod_parent;
-                /*
-                 * Yucky last component or no last component at all?
+                dentry = lookup_create(&nd, 0);
-                 * (foo/., foo/.., /////)
-                 */
-                err = -EEXIST;
-                if (nd.last_type != LAST_NORM)
-                        goto out_mknod;
-                /*
-                 * Lock the directory.
-                 */
-                down(&nd.dentry->d_inode->i_sem);
-                /*
-                 * Do the final lookup.
-                 */
-                dentry = lookup_hash(&nd.last, nd.dentry);
                err = PTR_ERR(dentry);
                if (IS_ERR(dentry))
                        goto out_mknod_unlock;
-                err = -ENOENT;
-                /*
-                 * Special case - lookup gave negative, but... we had foo/bar/
-                 * From the vfs_mknod() POV we just have a negative dentry -
-                 * all is fine. Let's be bastards - you had / on the end, you've
-                 * been asking for (non-existent) directory. -ENOENT for you.
-                 */
-                if (nd.last.name[nd.last.len] && !dentry->d_inode)
-                        goto out_mknod_dput;
                /*
                 * All right, let's create it.
                 */
@@ -845,7 +824,6 @@ out_mknod_dput:
        dput(dentry);
 out_mknod_unlock:
        up(&nd.dentry->d_inode->i_sem);
-out_mknod:
        path_release(&nd);
 out_mknod_parent:
        if (err==-EEXIST)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 080aae243ce0..2f4531fcaca2 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -698,7 +698,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
                                return -ENOMEM;
                        if (skb1->sk)
-                                skb_set_owner_w(skb, skb1->sk);
+                                skb_set_owner_w(skb2, skb1->sk);
                        /* Looking around. Are we still alive?
                         * OK, link new skb, drop old one */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 55ed979db144..d07f5ce31824 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1136,7 +1136,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
        struct xfrm_dst *last;
        u32 mtu;
-        if (!dst_check(dst->path, 0) ||
+        if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
            (dst->dev && !netif_running(dst->dev)))
                return 0;
@@ -1156,7 +1156,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
                        xdst->child_mtu_cached = mtu;
                }
-                if (!dst_check(xdst->route, 0))
+                if (!dst_check(xdst->route, xdst->route_cookie))
                        return 0;
                mtu = dst_mtu(xdst->route);
                if (xdst->route_mtu_cached != mtu) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5ddda2c98af9..97509011c274 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -34,14 +34,21 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
 {
        struct rtattr *rt = xfrma[type - 1];
        struct xfrm_algo *algp;
+        int len;
        if (!rt)
                return 0;
-        if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp))
+        len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp);
+        if (len < 0)
                return -EINVAL;
        algp = RTA_DATA(rt);
+        len -= (algp->alg_key_len + 7U) / 8; 
+        if (len < 0)
+                return -EINVAL;
        switch (type) {
        case XFRMA_ALG_AUTH:
                if (!algp->alg_key_len &&
@@ -162,6 +169,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
        struct rtattr *rta = u_arg;
        struct xfrm_algo *p, *ualg;
        struct xfrm_algo_desc *algo;
+        int len;
        if (!rta)
                return 0;
@@ -173,11 +181,12 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
                return -ENOSYS;
        *props = algo->desc.sadb_alg_id;
-        p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
+        len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8;
+        p = kmalloc(len, GFP_KERNEL);
        if (!p)
                return -ENOMEM;
-        memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len);
+        memcpy(p, ualg, len);
        *algpp = p;
        return 0;
 }
author	David Woodhouse <dwmw2@shinybook.infradead.org>	2005-06-02 11:39:11 -0400
committer	David Woodhouse <dwmw2@shinybook.infradead.org>	2005-06-02 11:39:11 -0400
commit	1c3f45ab2f7f879ea482501c83899505c31f7539 (patch)
tree	672465b3b9b3e2e26a8caf74ed64aa6885c52c13 /net
parent	4bcff1b37e7c3aed914d1ce5b45994adc7dbf455 (diff)
parent	e0d6d71440a3a35c6fc2dde09f8e8d4d7bd44dda (diff)