aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@shinybook.infradead.org>2005-06-02 11:39:11 -0400
committerDavid Woodhouse <dwmw2@shinybook.infradead.org>2005-06-02 11:39:11 -0400
commit1c3f45ab2f7f879ea482501c83899505c31f7539 (patch)
tree672465b3b9b3e2e26a8caf74ed64aa6885c52c13 /net
parent4bcff1b37e7c3aed914d1ce5b45994adc7dbf455 (diff)
parente0d6d71440a3a35c6fc2dde09f8e8d4d7bd44dda (diff)
Merge with master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'net')
-rw-r--r--net/802/tr.c26
-rw-r--r--net/bridge/br_device.c15
-rw-r--r--net/bridge/br_if.c23
-rw-r--r--net/bridge/br_input.c8
-rw-r--r--net/bridge/br_notify.c9
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/br_stp_bpdu.c3
-rw-r--r--net/core/dev.c12
-rw-r--r--net/core/ethtool.c20
-rw-r--r--net/core/net-sysfs.c3
-rw-r--r--net/ipv4/devinet.c34
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c1
-rw-r--r--net/ipv4/multipath_drr.c18
-rw-r--r--net/ipv4/multipath_rr.c20
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c28
-rw-r--r--net/ipv4/netfilter/ip_queue.c10
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/udp.c12
-rw-r--r--net/ipv6/ip6_flowlabel.c10
-rw-r--r--net/ipv6/ip6_output.c14
-rw-r--r--net/ipv6/xfrm6_output.c1
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/netlink/af_netlink.c13
-rw-r--r--net/sched/sch_dsmark.c16
-rw-r--r--net/sched/sch_netem.c209
-rw-r--r--net/unix/af_unix.c28
-rw-r--r--net/xfrm/xfrm_algo.c2
-rw-r--r--net/xfrm/xfrm_policy.c4
-rw-r--r--net/xfrm/xfrm_user.c15
31 files changed, 310 insertions, 270 deletions
diff --git a/net/802/tr.c b/net/802/tr.c
index 85293ccf7efc..a755e880f4ba 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -47,12 +47,12 @@ static void rif_check_expire(unsigned long dummy);
47 * Each RIF entry we learn is kept this way 47 * Each RIF entry we learn is kept this way
48 */ 48 */
49 49
50struct rif_cache_s { 50struct rif_cache {
51 unsigned char addr[TR_ALEN]; 51 unsigned char addr[TR_ALEN];
52 int iface; 52 int iface;
53 __u16 rcf; 53 __be16 rcf;
54 __u16 rseg[8]; 54 __be16 rseg[8];
55 struct rif_cache_s *next; 55 struct rif_cache *next;
56 unsigned long last_used; 56 unsigned long last_used;
57 unsigned char local_ring; 57 unsigned char local_ring;
58}; 58};
@@ -64,7 +64,7 @@ struct rif_cache_s {
64 * up a lot. 64 * up a lot.
65 */ 65 */
66 66
67static struct rif_cache_s *rif_table[RIF_TABLE_SIZE]; 67static struct rif_cache *rif_table[RIF_TABLE_SIZE];
68 68
69static DEFINE_SPINLOCK(rif_lock); 69static DEFINE_SPINLOCK(rif_lock);
70 70
@@ -249,7 +249,7 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device *
249{ 249{
250 int slack; 250 int slack;
251 unsigned int hash; 251 unsigned int hash;
252 struct rif_cache_s *entry; 252 struct rif_cache *entry;
253 unsigned char *olddata; 253 unsigned char *olddata;
254 static const unsigned char mcast_func_addr[] 254 static const unsigned char mcast_func_addr[]
255 = {0xC0,0x00,0x00,0x04,0x00,0x00}; 255 = {0xC0,0x00,0x00,0x04,0x00,0x00};
@@ -337,7 +337,7 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0],
337static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) 337static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
338{ 338{
339 unsigned int hash, rii_p = 0; 339 unsigned int hash, rii_p = 0;
340 struct rif_cache_s *entry; 340 struct rif_cache *entry;
341 341
342 342
343 spin_lock_bh(&rif_lock); 343 spin_lock_bh(&rif_lock);
@@ -373,7 +373,7 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
373 * FIXME: We ought to keep some kind of cache size 373 * FIXME: We ought to keep some kind of cache size
374 * limiting and adjust the timers to suit. 374 * limiting and adjust the timers to suit.
375 */ 375 */
376 entry=kmalloc(sizeof(struct rif_cache_s),GFP_ATOMIC); 376 entry=kmalloc(sizeof(struct rif_cache),GFP_ATOMIC);
377 377
378 if(!entry) 378 if(!entry)
379 { 379 {
@@ -435,7 +435,7 @@ static void rif_check_expire(unsigned long dummy)
435 spin_lock_bh(&rif_lock); 435 spin_lock_bh(&rif_lock);
436 436
437 for(i =0; i < RIF_TABLE_SIZE; i++) { 437 for(i =0; i < RIF_TABLE_SIZE; i++) {
438 struct rif_cache_s *entry, **pentry; 438 struct rif_cache *entry, **pentry;
439 439
440 pentry = rif_table+i; 440 pentry = rif_table+i;
441 while((entry=*pentry) != NULL) { 441 while((entry=*pentry) != NULL) {
@@ -467,10 +467,10 @@ static void rif_check_expire(unsigned long dummy)
467 467
468#ifdef CONFIG_PROC_FS 468#ifdef CONFIG_PROC_FS
469 469
470static struct rif_cache_s *rif_get_idx(loff_t pos) 470static struct rif_cache *rif_get_idx(loff_t pos)
471{ 471{
472 int i; 472 int i;
473 struct rif_cache_s *entry; 473 struct rif_cache *entry;
474 loff_t off = 0; 474 loff_t off = 0;
475 475
476 for(i = 0; i < RIF_TABLE_SIZE; i++) 476 for(i = 0; i < RIF_TABLE_SIZE; i++)
@@ -493,7 +493,7 @@ static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
493static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 493static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
494{ 494{
495 int i; 495 int i;
496 struct rif_cache_s *ent = v; 496 struct rif_cache *ent = v;
497 497
498 ++*pos; 498 ++*pos;
499 499
@@ -522,7 +522,7 @@ static void rif_seq_stop(struct seq_file *seq, void *v)
522static int rif_seq_show(struct seq_file *seq, void *v) 522static int rif_seq_show(struct seq_file *seq, void *v)
523{ 523{
524 int j, rcf_len, segment, brdgnmb; 524 int j, rcf_len, segment, brdgnmb;
525 struct rif_cache_s *entry = v; 525 struct rif_cache *entry = v;
526 526
527 if (v == SEQ_START_TOKEN) 527 if (v == SEQ_START_TOKEN)
528 seq_puts(seq, 528 seq_puts(seq,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index d9b72fde433c..f564ee99782d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -21,10 +21,7 @@
21 21
22static struct net_device_stats *br_dev_get_stats(struct net_device *dev) 22static struct net_device_stats *br_dev_get_stats(struct net_device *dev)
23{ 23{
24 struct net_bridge *br; 24 struct net_bridge *br = netdev_priv(dev);
25
26 br = dev->priv;
27
28 return &br->statistics; 25 return &br->statistics;
29} 26}
30 27
@@ -54,9 +51,11 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
54 51
55static int br_dev_open(struct net_device *dev) 52static int br_dev_open(struct net_device *dev)
56{ 53{
57 netif_start_queue(dev); 54 struct net_bridge *br = netdev_priv(dev);
58 55
59 br_stp_enable_bridge(dev->priv); 56 br_features_recompute(br);
57 netif_start_queue(dev);
58 br_stp_enable_bridge(br);
60 59
61 return 0; 60 return 0;
62} 61}
@@ -67,7 +66,7 @@ static void br_dev_set_multicast_list(struct net_device *dev)
67 66
68static int br_dev_stop(struct net_device *dev) 67static int br_dev_stop(struct net_device *dev)
69{ 68{
70 br_stp_disable_bridge(dev->priv); 69 br_stp_disable_bridge(netdev_priv(dev));
71 70
72 netif_stop_queue(dev); 71 netif_stop_queue(dev);
73 72
@@ -76,7 +75,7 @@ static int br_dev_stop(struct net_device *dev)
76 75
77static int br_change_mtu(struct net_device *dev, int new_mtu) 76static int br_change_mtu(struct net_device *dev, int new_mtu)
78{ 77{
79 if ((new_mtu < 68) || new_mtu > br_min_mtu(dev->priv)) 78 if (new_mtu < 68 || new_mtu > br_min_mtu(netdev_priv(dev)))
80 return -EINVAL; 79 return -EINVAL;
81 80
82 dev->mtu = new_mtu; 81 dev->mtu = new_mtu;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 69872bf3b87e..91bb895375f4 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -314,6 +314,28 @@ int br_min_mtu(const struct net_bridge *br)
314 return mtu; 314 return mtu;
315} 315}
316 316
317/*
318 * Recomputes features using slave's features
319 */
320void br_features_recompute(struct net_bridge *br)
321{
322 struct net_bridge_port *p;
323 unsigned long features, checksum;
324
325 features = NETIF_F_SG | NETIF_F_FRAGLIST
326 | NETIF_F_HIGHDMA | NETIF_F_TSO;
327 checksum = NETIF_F_IP_CSUM; /* least commmon subset */
328
329 list_for_each_entry(p, &br->port_list, list) {
330 if (!(p->dev->features
331 & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)))
332 checksum = 0;
333 features &= p->dev->features;
334 }
335
336 br->dev->features = features | checksum | NETIF_F_LLTX;
337}
338
317/* called with RTNL */ 339/* called with RTNL */
318int br_add_if(struct net_bridge *br, struct net_device *dev) 340int br_add_if(struct net_bridge *br, struct net_device *dev)
319{ 341{
@@ -368,6 +390,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
368 390
369 spin_lock_bh(&br->lock); 391 spin_lock_bh(&br->lock);
370 br_stp_recalculate_bridge_id(br); 392 br_stp_recalculate_bridge_id(br);
393 br_features_recompute(br);
371 spin_unlock_bh(&br->lock); 394 spin_unlock_bh(&br->lock);
372 395
373 return 0; 396 return 0;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 2b1cce46cab4..8f5f2e730992 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,7 +26,7 @@ static int br_pass_frame_up_finish(struct sk_buff *skb)
26#ifdef CONFIG_NETFILTER_DEBUG 26#ifdef CONFIG_NETFILTER_DEBUG
27 skb->nf_debug = 0; 27 skb->nf_debug = 0;
28#endif 28#endif
29 netif_rx(skb); 29 netif_receive_skb(skb);
30 30
31 return 0; 31 return 0;
32} 32}
@@ -54,6 +54,9 @@ int br_handle_frame_finish(struct sk_buff *skb)
54 struct net_bridge_fdb_entry *dst; 54 struct net_bridge_fdb_entry *dst;
55 int passedup = 0; 55 int passedup = 0;
56 56
57 /* insert into forwarding database after filtering to avoid spoofing */
58 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
59
57 if (br->dev->flags & IFF_PROMISC) { 60 if (br->dev->flags & IFF_PROMISC) {
58 struct sk_buff *skb2; 61 struct sk_buff *skb2;
59 62
@@ -108,8 +111,7 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
108 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 111 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
109 goto err; 112 goto err;
110 113
111 if (p->state == BR_STATE_LEARNING || 114 if (p->state == BR_STATE_LEARNING)
112 p->state == BR_STATE_FORWARDING)
113 br_fdb_update(p->br, p, eth_hdr(skb)->h_source); 115 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
114 116
115 if (p->br->stp_enabled && 117 if (p->br->stp_enabled &&
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index f8fb49e34764..917311c6828b 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -65,6 +65,15 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
65 } 65 }
66 break; 66 break;
67 67
68 case NETDEV_FEAT_CHANGE:
69 if (br->dev->flags & IFF_UP)
70 br_features_recompute(br);
71
72 /* could do recursive feature change notification
73 * but who would care??
74 */
75 break;
76
68 case NETDEV_DOWN: 77 case NETDEV_DOWN:
69 if (br->dev->flags & IFF_UP) 78 if (br->dev->flags & IFF_UP)
70 br_stp_disable_port(p); 79 br_stp_disable_port(p);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 54d63f1372a0..bdf95a74d8cd 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -174,6 +174,7 @@ extern int br_add_if(struct net_bridge *br,
174extern int br_del_if(struct net_bridge *br, 174extern int br_del_if(struct net_bridge *br,
175 struct net_device *dev); 175 struct net_device *dev);
176extern int br_min_mtu(const struct net_bridge *br); 176extern int br_min_mtu(const struct net_bridge *br);
177extern void br_features_recompute(struct net_bridge *br);
177 178
178/* br_input.c */ 179/* br_input.c */
179extern int br_handle_frame_finish(struct sk_buff *skb); 180extern int br_handle_frame_finish(struct sk_buff *skb);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index b91a875aca01..d071f1c9ad0b 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -140,6 +140,9 @@ int br_stp_handle_bpdu(struct sk_buff *skb)
140 struct net_bridge *br = p->br; 140 struct net_bridge *br = p->br;
141 unsigned char *buf; 141 unsigned char *buf;
142 142
143 /* insert into forwarding database after filtering to avoid spoofing */
144 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
145
143 /* need at least the 802 and STP headers */ 146 /* need at least the 802 and STP headers */
144 if (!pskb_may_pull(skb, sizeof(header)+1) || 147 if (!pskb_may_pull(skb, sizeof(header)+1) ||
145 memcmp(skb->data, header, sizeof(header))) 148 memcmp(skb->data, header, sizeof(header)))
diff --git a/net/core/dev.c b/net/core/dev.c
index d4d9e2680adb..f15a3ffff635 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -761,6 +761,18 @@ int dev_change_name(struct net_device *dev, char *newname)
761} 761}
762 762
763/** 763/**
764 * netdev_features_change - device changes fatures
765 * @dev: device to cause notification
766 *
767 * Called to indicate a device has changed features.
768 */
769void netdev_features_change(struct net_device *dev)
770{
771 notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
772}
773EXPORT_SYMBOL(netdev_features_change);
774
775/**
764 * netdev_state_change - device changes state 776 * netdev_state_change - device changes state
765 * @dev: device to cause notification 777 * @dev: device to cause notification
766 * 778 *
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f05fde97c43d..8ec484894d68 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -29,7 +29,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
29 29
30u32 ethtool_op_get_tx_csum(struct net_device *dev) 30u32 ethtool_op_get_tx_csum(struct net_device *dev)
31{ 31{
32 return (dev->features & NETIF_F_IP_CSUM) != 0; 32 return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0;
33} 33}
34 34
35int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) 35int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
@@ -42,6 +42,15 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
42 return 0; 42 return 0;
43} 43}
44 44
45int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
46{
47 if (data)
48 dev->features |= NETIF_F_HW_CSUM;
49 else
50 dev->features &= ~NETIF_F_HW_CSUM;
51
52 return 0;
53}
45u32 ethtool_op_get_sg(struct net_device *dev) 54u32 ethtool_op_get_sg(struct net_device *dev)
46{ 55{
47 return (dev->features & NETIF_F_SG) != 0; 56 return (dev->features & NETIF_F_SG) != 0;
@@ -682,6 +691,7 @@ int dev_ethtool(struct ifreq *ifr)
682 void __user *useraddr = ifr->ifr_data; 691 void __user *useraddr = ifr->ifr_data;
683 u32 ethcmd; 692 u32 ethcmd;
684 int rc; 693 int rc;
694 unsigned long old_features;
685 695
686 /* 696 /*
687 * XXX: This can be pushed down into the ethtool_* handlers that 697 * XXX: This can be pushed down into the ethtool_* handlers that
@@ -703,6 +713,8 @@ int dev_ethtool(struct ifreq *ifr)
703 if ((rc = dev->ethtool_ops->begin(dev)) < 0) 713 if ((rc = dev->ethtool_ops->begin(dev)) < 0)
704 return rc; 714 return rc;
705 715
716 old_features = dev->features;
717
706 switch (ethcmd) { 718 switch (ethcmd) {
707 case ETHTOOL_GSET: 719 case ETHTOOL_GSET:
708 rc = ethtool_get_settings(dev, useraddr); 720 rc = ethtool_get_settings(dev, useraddr);
@@ -712,7 +724,6 @@ int dev_ethtool(struct ifreq *ifr)
712 break; 724 break;
713 case ETHTOOL_GDRVINFO: 725 case ETHTOOL_GDRVINFO:
714 rc = ethtool_get_drvinfo(dev, useraddr); 726 rc = ethtool_get_drvinfo(dev, useraddr);
715
716 break; 727 break;
717 case ETHTOOL_GREGS: 728 case ETHTOOL_GREGS:
718 rc = ethtool_get_regs(dev, useraddr); 729 rc = ethtool_get_regs(dev, useraddr);
@@ -801,6 +812,10 @@ int dev_ethtool(struct ifreq *ifr)
801 812
802 if(dev->ethtool_ops->complete) 813 if(dev->ethtool_ops->complete)
803 dev->ethtool_ops->complete(dev); 814 dev->ethtool_ops->complete(dev);
815
816 if (old_features != dev->features)
817 netdev_features_change(dev);
818
804 return rc; 819 return rc;
805 820
806 ioctl: 821 ioctl:
@@ -817,3 +832,4 @@ EXPORT_SYMBOL(ethtool_op_get_tx_csum);
817EXPORT_SYMBOL(ethtool_op_set_sg); 832EXPORT_SYMBOL(ethtool_op_set_sg);
818EXPORT_SYMBOL(ethtool_op_set_tso); 833EXPORT_SYMBOL(ethtool_op_set_tso);
819EXPORT_SYMBOL(ethtool_op_set_tx_csum); 834EXPORT_SYMBOL(ethtool_op_set_tx_csum);
835EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 060f703659e8..910eb4c05a47 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
21#define to_net_dev(class) container_of(class, struct net_device, class_dev) 21#define to_net_dev(class) container_of(class, struct net_device, class_dev)
22 22
23static const char fmt_hex[] = "%#x\n"; 23static const char fmt_hex[] = "%#x\n";
24static const char fmt_long_hex[] = "%#lx\n";
24static const char fmt_dec[] = "%d\n"; 25static const char fmt_dec[] = "%d\n";
25static const char fmt_ulong[] = "%lu\n"; 26static const char fmt_ulong[] = "%lu\n";
26 27
@@ -91,7 +92,7 @@ static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL) \
91NETDEVICE_ATTR(addr_len, fmt_dec); 92NETDEVICE_ATTR(addr_len, fmt_dec);
92NETDEVICE_ATTR(iflink, fmt_dec); 93NETDEVICE_ATTR(iflink, fmt_dec);
93NETDEVICE_ATTR(ifindex, fmt_dec); 94NETDEVICE_ATTR(ifindex, fmt_dec);
94NETDEVICE_ATTR(features, fmt_hex); 95NETDEVICE_ATTR(features, fmt_long_hex);
95NETDEVICE_ATTR(type, fmt_dec); 96NETDEVICE_ATTR(type, fmt_dec);
96 97
97/* use same locking rules as GIFHWADDR ioctl's */ 98/* use same locking rules as GIFHWADDR ioctl's */
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3cc96730c4ed..478a30179a52 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -233,11 +233,14 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
233static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 233static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
234 int destroy) 234 int destroy)
235{ 235{
236 struct in_ifaddr *promote = NULL;
236 struct in_ifaddr *ifa1 = *ifap; 237 struct in_ifaddr *ifa1 = *ifap;
237 238
238 ASSERT_RTNL(); 239 ASSERT_RTNL();
239 240
240 /* 1. Deleting primary ifaddr forces deletion all secondaries */ 241 /* 1. Deleting primary ifaddr forces deletion all secondaries
242 * unless alias promotion is set
243 **/
241 244
242 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 245 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
243 struct in_ifaddr *ifa; 246 struct in_ifaddr *ifa;
@@ -251,11 +254,16 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
251 continue; 254 continue;
252 } 255 }
253 256
254 *ifap1 = ifa->ifa_next; 257 if (!IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
258 *ifap1 = ifa->ifa_next;
255 259
256 rtmsg_ifa(RTM_DELADDR, ifa); 260 rtmsg_ifa(RTM_DELADDR, ifa);
257 notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); 261 notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
258 inet_free_ifa(ifa); 262 inet_free_ifa(ifa);
263 } else {
264 promote = ifa;
265 break;
266 }
259 } 267 }
260 } 268 }
261 269
@@ -281,6 +289,13 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
281 if (!in_dev->ifa_list) 289 if (!in_dev->ifa_list)
282 inetdev_destroy(in_dev); 290 inetdev_destroy(in_dev);
283 } 291 }
292
293 if (promote && IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
294 /* not sure if we should send a delete notify first? */
295 promote->ifa_flags &= ~IFA_F_SECONDARY;
296 rtmsg_ifa(RTM_NEWADDR, promote);
297 notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote);
298 }
284} 299}
285 300
286static int inet_insert_ifa(struct in_ifaddr *ifa) 301static int inet_insert_ifa(struct in_ifaddr *ifa)
@@ -1384,6 +1399,15 @@ static struct devinet_sysctl_table {
1384 .proc_handler = &ipv4_doint_and_flush, 1399 .proc_handler = &ipv4_doint_and_flush,
1385 .strategy = &ipv4_doint_and_flush_strategy, 1400 .strategy = &ipv4_doint_and_flush_strategy,
1386 }, 1401 },
1402 {
1403 .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES,
1404 .procname = "promote_secondaries",
1405 .data = &ipv4_devconf.promote_secondaries,
1406 .maxlen = sizeof(int),
1407 .mode = 0644,
1408 .proc_handler = &ipv4_doint_and_flush,
1409 .strategy = &ipv4_doint_and_flush_strategy,
1410 },
1387 }, 1411 },
1388 .devinet_dev = { 1412 .devinet_dev = {
1389 { 1413 {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 053a883247ba..eae84cc39d3f 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -478,7 +478,7 @@ static int __init esp4_init(void)
478{ 478{
479 struct xfrm_decap_state decap; 479 struct xfrm_decap_state decap;
480 480
481 if (sizeof(struct esp_decap_data) < 481 if (sizeof(struct esp_decap_data) >
482 sizeof(decap.decap_data)) { 482 sizeof(decap.decap_data)) {
483 extern void decap_data_too_small(void); 483 extern void decap_data_too_small(void);
484 484
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index daebd93fd8a0..760dc8238d65 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -490,6 +490,14 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
490 /* Partially cloned skb? */ 490 /* Partially cloned skb? */
491 if (skb_shared(frag)) 491 if (skb_shared(frag))
492 goto slow_path; 492 goto slow_path;
493
494 BUG_ON(frag->sk);
495 if (skb->sk) {
496 sock_hold(skb->sk);
497 frag->sk = skb->sk;
498 frag->destructor = sock_wfree;
499 skb->truesize -= frag->truesize;
500 }
493 } 501 }
494 502
495 /* Everything is OK. Generate! */ 503 /* Everything is OK. Generate! */
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index faa6176bbeb1..de21da00057f 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -508,7 +508,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
508 rc = NF_ACCEPT; 508 rc = NF_ACCEPT;
509 /* do not touch skb anymore */ 509 /* do not touch skb anymore */
510 atomic_inc(&cp->in_pkts); 510 atomic_inc(&cp->in_pkts);
511 __ip_vs_conn_put(cp);
512 goto out; 511 goto out;
513 } 512 }
514 513
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index 9349686131fc..cf2e6bcf7973 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -57,7 +57,6 @@ struct multipath_device {
57 57
58static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; 58static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
59static DEFINE_SPINLOCK(state_lock); 59static DEFINE_SPINLOCK(state_lock);
60static struct rtable *last_selection = NULL;
61 60
62static int inline __multipath_findslot(void) 61static int inline __multipath_findslot(void)
63{ 62{
@@ -111,11 +110,6 @@ struct notifier_block drr_dev_notifier = {
111 .notifier_call = drr_dev_event, 110 .notifier_call = drr_dev_event,
112}; 111};
113 112
114static void drr_remove(struct rtable *rt)
115{
116 if (last_selection == rt)
117 last_selection = NULL;
118}
119 113
120static void drr_safe_inc(atomic_t *usecount) 114static void drr_safe_inc(atomic_t *usecount)
121{ 115{
@@ -144,14 +138,6 @@ static void drr_select_route(const struct flowi *flp,
144 int devidx = -1; 138 int devidx = -1;
145 int cur_min_devidx = -1; 139 int cur_min_devidx = -1;
146 140
147 /* if necessary and possible utilize the old alternative */
148 if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
149 last_selection != NULL) {
150 result = last_selection;
151 *rp = result;
152 return;
153 }
154
155 /* 1. make sure all alt. nexthops have the same GC related data */ 141 /* 1. make sure all alt. nexthops have the same GC related data */
156 /* 2. determine the new candidate to be returned */ 142 /* 2. determine the new candidate to be returned */
157 result = NULL; 143 result = NULL;
@@ -229,12 +215,10 @@ static void drr_select_route(const struct flowi *flp,
229 } 215 }
230 216
231 *rp = result; 217 *rp = result;
232 last_selection = result;
233} 218}
234 219
235static struct ip_mp_alg_ops drr_ops = { 220static struct ip_mp_alg_ops drr_ops = {
236 .mp_alg_select_route = drr_select_route, 221 .mp_alg_select_route = drr_select_route,
237 .mp_alg_remove = drr_remove,
238}; 222};
239 223
240static int __init drr_init(void) 224static int __init drr_init(void)
@@ -244,7 +228,7 @@ static int __init drr_init(void)
244 if (err) 228 if (err)
245 return err; 229 return err;
246 230
247 err = multipath_alg_register(&drr_ops, IP_MP_ALG_RR); 231 err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR);
248 if (err) 232 if (err)
249 goto fail; 233 goto fail;
250 234
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
index 554a82568160..061b6b253982 100644
--- a/net/ipv4/multipath_rr.c
+++ b/net/ipv4/multipath_rr.c
@@ -47,29 +47,12 @@
47#include <net/checksum.h> 47#include <net/checksum.h>
48#include <net/ip_mp_alg.h> 48#include <net/ip_mp_alg.h>
49 49
50#define MULTIPATH_MAX_CANDIDATES 40
51
52static struct rtable* last_used = NULL;
53
54static void rr_remove(struct rtable *rt)
55{
56 if (last_used == rt)
57 last_used = NULL;
58}
59
60static void rr_select_route(const struct flowi *flp, 50static void rr_select_route(const struct flowi *flp,
61 struct rtable *first, struct rtable **rp) 51 struct rtable *first, struct rtable **rp)
62{ 52{
63 struct rtable *nh, *result, *min_use_cand = NULL; 53 struct rtable *nh, *result, *min_use_cand = NULL;
64 int min_use = -1; 54 int min_use = -1;
65 55
66 /* if necessary and possible utilize the old alternative */
67 if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
68 last_used != NULL) {
69 result = last_used;
70 goto out;
71 }
72
73 /* 1. make sure all alt. nexthops have the same GC related data 56 /* 1. make sure all alt. nexthops have the same GC related data
74 * 2. determine the new candidate to be returned 57 * 2. determine the new candidate to be returned
75 */ 58 */
@@ -90,15 +73,12 @@ static void rr_select_route(const struct flowi *flp,
90 if (!result) 73 if (!result)
91 result = first; 74 result = first;
92 75
93out:
94 last_used = result;
95 result->u.dst.__use++; 76 result->u.dst.__use++;
96 *rp = result; 77 *rp = result;
97} 78}
98 79
99static struct ip_mp_alg_ops rr_ops = { 80static struct ip_mp_alg_ops rr_ops = {
100 .mp_alg_select_route = rr_select_route, 81 .mp_alg_select_route = rr_select_route,
101 .mp_alg_remove = rr_remove,
102}; 82};
103 83
104static int __init rr_init(void) 84static int __init rr_init(void)
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 28d9425d5c39..09e824622977 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -940,37 +940,25 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
940struct sk_buff * 940struct sk_buff *
941ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) 941ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
942{ 942{
943 struct sock *sk = skb->sk;
944#ifdef CONFIG_NETFILTER_DEBUG 943#ifdef CONFIG_NETFILTER_DEBUG
945 unsigned int olddebug = skb->nf_debug; 944 unsigned int olddebug = skb->nf_debug;
946#endif 945#endif
947 946
948 if (sk) { 947 skb_orphan(skb);
949 sock_hold(sk);
950 skb_orphan(skb);
951 }
952 948
953 local_bh_disable(); 949 local_bh_disable();
954 skb = ip_defrag(skb, user); 950 skb = ip_defrag(skb, user);
955 local_bh_enable(); 951 local_bh_enable();
956 952
957 if (!skb) { 953 if (skb) {
958 if (sk) 954 ip_send_check(skb->nh.iph);
959 sock_put(sk); 955 skb->nfcache |= NFC_ALTERED;
960 return skb;
961 }
962
963 if (sk) {
964 skb_set_owner_w(skb, sk);
965 sock_put(sk);
966 }
967
968 ip_send_check(skb->nh.iph);
969 skb->nfcache |= NFC_ALTERED;
970#ifdef CONFIG_NETFILTER_DEBUG 956#ifdef CONFIG_NETFILTER_DEBUG
971 /* Packet path as if nothing had happened. */ 957 /* Packet path as if nothing had happened. */
972 skb->nf_debug = olddebug; 958 skb->nf_debug = olddebug;
973#endif 959#endif
960 }
961
974 return skb; 962 return skb;
975} 963}
976 964
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e5746b674413..eda1fba431a4 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -3,6 +3,7 @@
3 * communicating with userspace via netlink. 3 * communicating with userspace via netlink.
4 * 4 *
5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au> 5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
6 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -17,6 +18,7 @@
17 * 2005-01-10: Added /proc counter for dropped packets; fixed so 18 * 2005-01-10: Added /proc counter for dropped packets; fixed so
18 * packets aren't delivered to user space if they're going 19 * packets aren't delivered to user space if they're going
19 * to be dropped. 20 * to be dropped.
21 * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
20 * 22 *
21 */ 23 */
22#include <linux/module.h> 24#include <linux/module.h>
@@ -71,7 +73,15 @@ static DECLARE_MUTEX(ipqnl_sem);
71static void 73static void
72ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) 74ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
73{ 75{
76 /* TCP input path (and probably other bits) assume to be called
77 * from softirq context, not from syscall, like ipq_issue_verdict is
78 * called. TCP input path deadlocks with locks taken from timer
79 * softirq, e.g. We therefore emulate this by local_bh_disable() */
80
81 local_bh_disable();
74 nf_reinject(entry->skb, entry->info, verdict); 82 nf_reinject(entry->skb, entry->info, verdict);
83 local_bh_enable();
84
75 kfree(entry); 85 kfree(entry);
76} 86}
77 87
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 79835a67a274..5bad504630a3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4355,16 +4355,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4355 goto no_ack; 4355 goto no_ack;
4356 } 4356 }
4357 4357
4358 if (eaten) { 4358 __tcp_ack_snd_check(sk, 0);
4359 if (tcp_in_quickack_mode(tp)) {
4360 tcp_send_ack(sk);
4361 } else {
4362 tcp_send_delayed_ack(sk);
4363 }
4364 } else {
4365 __tcp_ack_snd_check(sk, 0);
4366 }
4367
4368no_ack: 4359no_ack:
4369 if (eaten) 4360 if (eaten)
4370 __kfree_skb(skb); 4361 __kfree_skb(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4a6952e3fee9..7c24e64b443f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -738,7 +738,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
738 unsigned long amount; 738 unsigned long amount;
739 739
740 amount = 0; 740 amount = 0;
741 spin_lock_irq(&sk->sk_receive_queue.lock); 741 spin_lock_bh(&sk->sk_receive_queue.lock);
742 skb = skb_peek(&sk->sk_receive_queue); 742 skb = skb_peek(&sk->sk_receive_queue);
743 if (skb != NULL) { 743 if (skb != NULL) {
744 /* 744 /*
@@ -748,7 +748,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
748 */ 748 */
749 amount = skb->len - sizeof(struct udphdr); 749 amount = skb->len - sizeof(struct udphdr);
750 } 750 }
751 spin_unlock_irq(&sk->sk_receive_queue.lock); 751 spin_unlock_bh(&sk->sk_receive_queue.lock);
752 return put_user(amount, (int __user *)arg); 752 return put_user(amount, (int __user *)arg);
753 } 753 }
754 754
@@ -848,12 +848,12 @@ csum_copy_err:
848 /* Clear queue. */ 848 /* Clear queue. */
849 if (flags&MSG_PEEK) { 849 if (flags&MSG_PEEK) {
850 int clear = 0; 850 int clear = 0;
851 spin_lock_irq(&sk->sk_receive_queue.lock); 851 spin_lock_bh(&sk->sk_receive_queue.lock);
852 if (skb == skb_peek(&sk->sk_receive_queue)) { 852 if (skb == skb_peek(&sk->sk_receive_queue)) {
853 __skb_unlink(skb, &sk->sk_receive_queue); 853 __skb_unlink(skb, &sk->sk_receive_queue);
854 clear = 1; 854 clear = 1;
855 } 855 }
856 spin_unlock_irq(&sk->sk_receive_queue.lock); 856 spin_unlock_bh(&sk->sk_receive_queue.lock);
857 if (clear) 857 if (clear)
858 kfree_skb(skb); 858 kfree_skb(skb);
859 } 859 }
@@ -1334,7 +1334,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1334 struct sk_buff_head *rcvq = &sk->sk_receive_queue; 1334 struct sk_buff_head *rcvq = &sk->sk_receive_queue;
1335 struct sk_buff *skb; 1335 struct sk_buff *skb;
1336 1336
1337 spin_lock_irq(&rcvq->lock); 1337 spin_lock_bh(&rcvq->lock);
1338 while ((skb = skb_peek(rcvq)) != NULL) { 1338 while ((skb = skb_peek(rcvq)) != NULL) {
1339 if (udp_checksum_complete(skb)) { 1339 if (udp_checksum_complete(skb)) {
1340 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1340 UDP_INC_STATS_BH(UDP_MIB_INERRORS);
@@ -1345,7 +1345,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1345 break; 1345 break;
1346 } 1346 }
1347 } 1347 }
1348 spin_unlock_irq(&rcvq->lock); 1348 spin_unlock_bh(&rcvq->lock);
1349 1349
1350 /* nothing to see, move along */ 1350 /* nothing to see, move along */
1351 if (skb == NULL) 1351 if (skb == NULL)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index a93f6dc51979..0e5f7499debb 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -535,10 +535,12 @@ release:
535 if (err) 535 if (err)
536 goto done; 536 goto done;
537 537
538 /* Do not check for fault */ 538 if (!freq.flr_label) {
539 if (!freq.flr_label) 539 if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
540 copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, 540 &fl->label, sizeof(fl->label))) {
541 &fl->label, sizeof(fl->label)); 541 /* Intentionally ignore fault. */
542 }
543 }
542 544
543 sfl1->fl = fl; 545 sfl1->fl = fl;
544 sfl1->next = np->ipv6_fl_list; 546 sfl1->next = np->ipv6_fl_list;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0f0711417c9d..b78a53586804 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -552,13 +552,17 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
552 skb_headroom(frag) < hlen) 552 skb_headroom(frag) < hlen)
553 goto slow_path; 553 goto slow_path;
554 554
555 /* Correct socket ownership. */
556 if (frag->sk == NULL)
557 goto slow_path;
558
559 /* Partially cloned skb? */ 555 /* Partially cloned skb? */
560 if (skb_shared(frag)) 556 if (skb_shared(frag))
561 goto slow_path; 557 goto slow_path;
558
559 BUG_ON(frag->sk);
560 if (skb->sk) {
561 sock_hold(skb->sk);
562 frag->sk = skb->sk;
563 frag->destructor = sock_wfree;
564 skb->truesize -= frag->truesize;
565 }
562 } 566 }
563 567
564 err = 0; 568 err = 0;
@@ -1116,12 +1120,10 @@ int ip6_push_pending_frames(struct sock *sk)
1116 tail_skb = &(tmp_skb->next); 1120 tail_skb = &(tmp_skb->next);
1117 skb->len += tmp_skb->len; 1121 skb->len += tmp_skb->len;
1118 skb->data_len += tmp_skb->len; 1122 skb->data_len += tmp_skb->len;
1119#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
1120 skb->truesize += tmp_skb->truesize; 1123 skb->truesize += tmp_skb->truesize;
1121 __sock_put(tmp_skb->sk); 1124 __sock_put(tmp_skb->sk);
1122 tmp_skb->destructor = NULL; 1125 tmp_skb->destructor = NULL;
1123 tmp_skb->sk = NULL; 1126 tmp_skb->sk = NULL;
1124#endif
1125 } 1127 }
1126 1128
1127 ipv6_addr_copy(final_dst, &fl->fl6_dst); 1129 ipv6_addr_copy(final_dst, &fl->fl6_dst);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 601a148f60f3..6b9867717d11 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -84,6 +84,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
84 mtu = IPV6_MIN_MTU; 84 mtu = IPV6_MIN_MTU;
85 85
86 if (skb->len > mtu) { 86 if (skb->len > mtu) {
87 skb->dev = dst->dev;
87 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); 88 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
88 ret = -EMSGSIZE; 89 ret = -EMSGSIZE;
89 } 90 }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4429b1a1fe5f..cf1d91e74c82 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -113,6 +113,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
113 113
114 xdst = (struct xfrm_dst *)dst1; 114 xdst = (struct xfrm_dst *)dst1;
115 xdst->route = &rt->u.dst; 115 xdst->route = &rt->u.dst;
116 if (rt->rt6i_node)
117 xdst->route_cookie = rt->rt6i_node->fn_sernum;
116 118
117 dst1->next = dst_prev; 119 dst1->next = dst_prev;
118 dst_prev = dst1; 120 dst_prev = dst1;
@@ -137,6 +139,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
137 139
138 dst_prev->child = &rt->u.dst; 140 dst_prev->child = &rt->u.dst;
139 dst->path = &rt->u.dst; 141 dst->path = &rt->u.dst;
142 if (rt->rt6i_node)
143 ((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum;
140 144
141 *dst_p = dst; 145 *dst_p = dst;
142 dst = dst_prev; 146 dst = dst_prev;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 733bf52cef3e..e41ce458c2a9 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -735,11 +735,15 @@ static inline int do_one_broadcast(struct sock *sk,
735 735
736 sock_hold(sk); 736 sock_hold(sk);
737 if (p->skb2 == NULL) { 737 if (p->skb2 == NULL) {
738 if (atomic_read(&p->skb->users) != 1) { 738 if (skb_shared(p->skb)) {
739 p->skb2 = skb_clone(p->skb, p->allocation); 739 p->skb2 = skb_clone(p->skb, p->allocation);
740 } else { 740 } else {
741 p->skb2 = p->skb; 741 p->skb2 = skb_get(p->skb);
742 atomic_inc(&p->skb->users); 742 /*
743 * skb ownership may have been set when
744 * delivered to a previous socket.
745 */
746 skb_orphan(p->skb2);
743 } 747 }
744 } 748 }
745 if (p->skb2 == NULL) { 749 if (p->skb2 == NULL) {
@@ -785,11 +789,12 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
785 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 789 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
786 do_one_broadcast(sk, &info); 790 do_one_broadcast(sk, &info);
787 791
792 kfree_skb(skb);
793
788 netlink_unlock_table(); 794 netlink_unlock_table();
789 795
790 if (info.skb2) 796 if (info.skb2)
791 kfree_skb(info.skb2); 797 kfree_skb(info.skb2);
792 kfree_skb(skb);
793 798
794 if (info.delivered) { 799 if (info.delivered) {
795 if (info.congested && (allocation & __GFP_WAIT)) 800 if (info.congested && (allocation & __GFP_WAIT))
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 8a3db9d95bab..d8bd2a569c7c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -18,7 +18,7 @@
18#include <asm/byteorder.h> 18#include <asm/byteorder.h>
19 19
20 20
21#if 1 /* control */ 21#if 0 /* control */
22#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) 22#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
23#else 23#else
24#define DPRINTK(format,args...) 24#define DPRINTK(format,args...)
@@ -73,8 +73,13 @@ static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
73 73
74 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new, 74 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new,
75 old); 75 old);
76 if (!new) 76
77 new = &noop_qdisc; 77 if (new == NULL) {
78 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
79 if (new == NULL)
80 new = &noop_qdisc;
81 }
82
78 sch_tree_lock(sch); 83 sch_tree_lock(sch);
79 *old = xchg(&p->q,new); 84 *old = xchg(&p->q,new);
80 if (*old) 85 if (*old)
@@ -163,14 +168,15 @@ static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
163 return; 168 return;
164 for (i = 0; i < p->indices; i++) { 169 for (i = 0; i < p->indices; i++) {
165 if (p->mask[i] == 0xff && !p->value[i]) 170 if (p->mask[i] == 0xff && !p->value[i])
166 continue; 171 goto ignore;
167 if (walker->count >= walker->skip) { 172 if (walker->count >= walker->skip) {
168 if (walker->fn(sch, i+1, walker) < 0) { 173 if (walker->fn(sch, i+1, walker) < 0) {
169 walker->stop = 1; 174 walker->stop = 1;
170 break; 175 break;
171 } 176 }
172 } 177 }
173 walker->count++; 178ignore:
179 walker->count++;
174 } 180 }
175} 181}
176 182
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index e0c9fbe73b15..bb9bf8d5003c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -53,7 +53,6 @@
53 53
54struct netem_sched_data { 54struct netem_sched_data {
55 struct Qdisc *qdisc; 55 struct Qdisc *qdisc;
56 struct sk_buff_head delayed;
57 struct timer_list timer; 56 struct timer_list timer;
58 57
59 u32 latency; 58 u32 latency;
@@ -63,11 +62,12 @@ struct netem_sched_data {
63 u32 gap; 62 u32 gap;
64 u32 jitter; 63 u32 jitter;
65 u32 duplicate; 64 u32 duplicate;
65 u32 reorder;
66 66
67 struct crndstate { 67 struct crndstate {
68 unsigned long last; 68 unsigned long last;
69 unsigned long rho; 69 unsigned long rho;
70 } delay_cor, loss_cor, dup_cor; 70 } delay_cor, loss_cor, dup_cor, reorder_cor;
71 71
72 struct disttable { 72 struct disttable {
73 u32 size; 73 u32 size;
@@ -137,122 +137,68 @@ static long tabledist(unsigned long mu, long sigma,
137 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 137 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
138} 138}
139 139
140/* Put skb in the private delayed queue. */
141static int netem_delay(struct Qdisc *sch, struct sk_buff *skb)
142{
143 struct netem_sched_data *q = qdisc_priv(sch);
144 psched_tdiff_t td;
145 psched_time_t now;
146
147 PSCHED_GET_TIME(now);
148 td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
149
150 /* Always queue at tail to keep packets in order */
151 if (likely(q->delayed.qlen < q->limit)) {
152 struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
153
154 PSCHED_TADD2(now, td, cb->time_to_send);
155
156 pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb,
157 now, cb->time_to_send);
158
159 __skb_queue_tail(&q->delayed, skb);
160 return NET_XMIT_SUCCESS;
161 }
162
163 pr_debug("netem_delay: queue over limit %d\n", q->limit);
164 sch->qstats.overlimits++;
165 kfree_skb(skb);
166 return NET_XMIT_DROP;
167}
168
169/* 140/*
170 * Move a packet that is ready to send from the delay holding 141 * Insert one skb into qdisc.
171 * list to the underlying qdisc. 142 * Note: parent depends on return value to account for queue length.
143 * NET_XMIT_DROP: queue length didn't change.
144 * NET_XMIT_SUCCESS: one skb was queued.
172 */ 145 */
173static int netem_run(struct Qdisc *sch)
174{
175 struct netem_sched_data *q = qdisc_priv(sch);
176 struct sk_buff *skb;
177 psched_time_t now;
178
179 PSCHED_GET_TIME(now);
180
181 skb = skb_peek(&q->delayed);
182 if (skb) {
183 const struct netem_skb_cb *cb
184 = (const struct netem_skb_cb *)skb->cb;
185 long delay
186 = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
187 pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
188
189 /* if more time remaining? */
190 if (delay > 0) {
191 mod_timer(&q->timer, jiffies + delay);
192 return 1;
193 }
194
195 __skb_unlink(skb, &q->delayed);
196
197 if (q->qdisc->enqueue(skb, q->qdisc)) {
198 sch->q.qlen--;
199 sch->qstats.drops++;
200 }
201 }
202
203 return 0;
204}
205
206static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 146static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
207{ 147{
208 struct netem_sched_data *q = qdisc_priv(sch); 148 struct netem_sched_data *q = qdisc_priv(sch);
149 struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
150 struct sk_buff *skb2;
209 int ret; 151 int ret;
152 int count = 1;
210 153
211 pr_debug("netem_enqueue skb=%p\n", skb); 154 pr_debug("netem_enqueue skb=%p\n", skb);
212 155
156 /* Random duplication */
157 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
158 ++count;
159
213 /* Random packet drop 0 => none, ~0 => all */ 160 /* Random packet drop 0 => none, ~0 => all */
214 if (q->loss && q->loss >= get_crandom(&q->loss_cor)) { 161 if (q->loss && q->loss >= get_crandom(&q->loss_cor))
215 pr_debug("netem_enqueue: random loss\n"); 162 --count;
163
164 if (count == 0) {
216 sch->qstats.drops++; 165 sch->qstats.drops++;
217 kfree_skb(skb); 166 kfree_skb(skb);
218 return 0; /* lie about loss so TCP doesn't know */ 167 return NET_XMIT_DROP;
219 } 168 }
220 169
221 /* Random duplication */ 170 /*
222 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) { 171 * If we need to duplicate packet, then re-insert at top of the
223 struct sk_buff *skb2; 172 * qdisc tree, since parent queuer expects that only one
224 173 * skb will be queued.
225 skb2 = skb_clone(skb, GFP_ATOMIC); 174 */
226 if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) { 175 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
227 struct Qdisc *qp; 176 struct Qdisc *rootq = sch->dev->qdisc;
228 177 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
229 /* Since one packet can generate two packets in the 178 q->duplicate = 0;
230 * queue, the parent's qlen accounting gets confused, 179
231 * so fix it. 180 rootq->enqueue(skb2, rootq);
232 */ 181 q->duplicate = dupsave;
233 qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent));
234 if (qp)
235 qp->q.qlen++;
236
237 sch->q.qlen++;
238 sch->bstats.bytes += skb2->len;
239 sch->bstats.packets++;
240 } else
241 sch->qstats.drops++;
242 } 182 }
243 183
244 /* If doing simple delay then gap == 0 so all packets 184 if (q->gap == 0 /* not doing reordering */
245 * go into the delayed holding queue 185 || q->counter < q->gap /* inside last reordering gap */
246 * otherwise if doing out of order only "1 out of gap" 186 || q->reorder < get_crandom(&q->reorder_cor)) {
247 * packets will be delayed. 187 psched_time_t now;
248 */ 188 PSCHED_GET_TIME(now);
249 if (q->counter < q->gap) { 189 PSCHED_TADD2(now, tabledist(q->latency, q->jitter,
190 &q->delay_cor, q->delay_dist),
191 cb->time_to_send);
250 ++q->counter; 192 ++q->counter;
251 ret = q->qdisc->enqueue(skb, q->qdisc); 193 ret = q->qdisc->enqueue(skb, q->qdisc);
252 } else { 194 } else {
195 /*
196 * Do re-ordering by putting one out of N packets at the front
197 * of the queue.
198 */
199 PSCHED_GET_TIME(cb->time_to_send);
253 q->counter = 0; 200 q->counter = 0;
254 ret = netem_delay(sch, skb); 201 ret = q->qdisc->ops->requeue(skb, q->qdisc);
255 netem_run(sch);
256 } 202 }
257 203
258 if (likely(ret == NET_XMIT_SUCCESS)) { 204 if (likely(ret == NET_XMIT_SUCCESS)) {
@@ -296,22 +242,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
296{ 242{
297 struct netem_sched_data *q = qdisc_priv(sch); 243 struct netem_sched_data *q = qdisc_priv(sch);
298 struct sk_buff *skb; 244 struct sk_buff *skb;
299 int pending;
300
301 pending = netem_run(sch);
302 245
303 skb = q->qdisc->dequeue(q->qdisc); 246 skb = q->qdisc->dequeue(q->qdisc);
304 if (skb) { 247 if (skb) {
305 pr_debug("netem_dequeue: return skb=%p\n", skb); 248 const struct netem_skb_cb *cb
306 sch->q.qlen--; 249 = (const struct netem_skb_cb *)skb->cb;
307 sch->flags &= ~TCQ_F_THROTTLED; 250 psched_time_t now;
308 } 251 long delay;
309 else if (pending) { 252
310 pr_debug("netem_dequeue: throttling\n"); 253 /* if more time remaining? */
254 PSCHED_GET_TIME(now);
255 delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
256 pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
257 if (delay <= 0) {
258 pr_debug("netem_dequeue: return skb=%p\n", skb);
259 sch->q.qlen--;
260 sch->flags &= ~TCQ_F_THROTTLED;
261 return skb;
262 }
263
264 mod_timer(&q->timer, jiffies + delay);
311 sch->flags |= TCQ_F_THROTTLED; 265 sch->flags |= TCQ_F_THROTTLED;
312 }
313 266
314 return skb; 267 if (q->qdisc->ops->requeue(skb, q->qdisc) != 0)
268 sch->qstats.drops++;
269 }
270
271 return NULL;
315} 272}
316 273
317static void netem_watchdog(unsigned long arg) 274static void netem_watchdog(unsigned long arg)
@@ -328,8 +285,6 @@ static void netem_reset(struct Qdisc *sch)
328 struct netem_sched_data *q = qdisc_priv(sch); 285 struct netem_sched_data *q = qdisc_priv(sch);
329 286
330 qdisc_reset(q->qdisc); 287 qdisc_reset(q->qdisc);
331 skb_queue_purge(&q->delayed);
332
333 sch->q.qlen = 0; 288 sch->q.qlen = 0;
334 sch->flags &= ~TCQ_F_THROTTLED; 289 sch->flags &= ~TCQ_F_THROTTLED;
335 del_timer_sync(&q->timer); 290 del_timer_sync(&q->timer);
@@ -397,6 +352,19 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
397 return 0; 352 return 0;
398} 353}
399 354
355static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
356{
357 struct netem_sched_data *q = qdisc_priv(sch);
358 const struct tc_netem_reorder *r = RTA_DATA(attr);
359
360 if (RTA_PAYLOAD(attr) != sizeof(*r))
361 return -EINVAL;
362
363 q->reorder = r->probability;
364 init_crandom(&q->reorder_cor, r->correlation);
365 return 0;
366}
367
400static int netem_change(struct Qdisc *sch, struct rtattr *opt) 368static int netem_change(struct Qdisc *sch, struct rtattr *opt)
401{ 369{
402 struct netem_sched_data *q = qdisc_priv(sch); 370 struct netem_sched_data *q = qdisc_priv(sch);
@@ -417,9 +385,15 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
417 q->jitter = qopt->jitter; 385 q->jitter = qopt->jitter;
418 q->limit = qopt->limit; 386 q->limit = qopt->limit;
419 q->gap = qopt->gap; 387 q->gap = qopt->gap;
388 q->counter = 0;
420 q->loss = qopt->loss; 389 q->loss = qopt->loss;
421 q->duplicate = qopt->duplicate; 390 q->duplicate = qopt->duplicate;
422 391
392 /* for compatiablity with earlier versions.
393 * if gap is set, need to assume 100% probablity
394 */
395 q->reorder = ~0;
396
423 /* Handle nested options after initial queue options. 397 /* Handle nested options after initial queue options.
424 * Should have put all options in nested format but too late now. 398 * Should have put all options in nested format but too late now.
425 */ 399 */
@@ -441,6 +415,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
441 if (ret) 415 if (ret)
442 return ret; 416 return ret;
443 } 417 }
418 if (tb[TCA_NETEM_REORDER-1]) {
419 ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
420 if (ret)
421 return ret;
422 }
444 } 423 }
445 424
446 425
@@ -455,11 +434,9 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
455 if (!opt) 434 if (!opt)
456 return -EINVAL; 435 return -EINVAL;
457 436
458 skb_queue_head_init(&q->delayed);
459 init_timer(&q->timer); 437 init_timer(&q->timer);
460 q->timer.function = netem_watchdog; 438 q->timer.function = netem_watchdog;
461 q->timer.data = (unsigned long) sch; 439 q->timer.data = (unsigned long) sch;
462 q->counter = 0;
463 440
464 q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); 441 q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
465 if (!q->qdisc) { 442 if (!q->qdisc) {
@@ -491,6 +468,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
491 struct rtattr *rta = (struct rtattr *) b; 468 struct rtattr *rta = (struct rtattr *) b;
492 struct tc_netem_qopt qopt; 469 struct tc_netem_qopt qopt;
493 struct tc_netem_corr cor; 470 struct tc_netem_corr cor;
471 struct tc_netem_reorder reorder;
494 472
495 qopt.latency = q->latency; 473 qopt.latency = q->latency;
496 qopt.jitter = q->jitter; 474 qopt.jitter = q->jitter;
@@ -504,6 +482,11 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
504 cor.loss_corr = q->loss_cor.rho; 482 cor.loss_corr = q->loss_cor.rho;
505 cor.dup_corr = q->dup_cor.rho; 483 cor.dup_corr = q->dup_cor.rho;
506 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); 484 RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
485
486 reorder.probability = q->reorder;
487 reorder.correlation = q->reorder_cor.rho;
488 RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
489
507 rta->rta_len = skb->tail - b; 490 rta->rta_len = skb->tail - b;
508 491
509 return skb->len; 492 return skb->len;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c478fc8db776..c420eba4876b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -770,33 +770,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
770 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 770 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
771 if (err) 771 if (err)
772 goto out_mknod_parent; 772 goto out_mknod_parent;
773 /* 773
774 * Yucky last component or no last component at all? 774 dentry = lookup_create(&nd, 0);
775 * (foo/., foo/.., /////)
776 */
777 err = -EEXIST;
778 if (nd.last_type != LAST_NORM)
779 goto out_mknod;
780 /*
781 * Lock the directory.
782 */
783 down(&nd.dentry->d_inode->i_sem);
784 /*
785 * Do the final lookup.
786 */
787 dentry = lookup_hash(&nd.last, nd.dentry);
788 err = PTR_ERR(dentry); 775 err = PTR_ERR(dentry);
789 if (IS_ERR(dentry)) 776 if (IS_ERR(dentry))
790 goto out_mknod_unlock; 777 goto out_mknod_unlock;
791 err = -ENOENT; 778
792 /*
793 * Special case - lookup gave negative, but... we had foo/bar/
794 * From the vfs_mknod() POV we just have a negative dentry -
795 * all is fine. Let's be bastards - you had / on the end, you've
796 * been asking for (non-existent) directory. -ENOENT for you.
797 */
798 if (nd.last.name[nd.last.len] && !dentry->d_inode)
799 goto out_mknod_dput;
800 /* 779 /*
801 * All right, let's create it. 780 * All right, let's create it.
802 */ 781 */
@@ -845,7 +824,6 @@ out_mknod_dput:
845 dput(dentry); 824 dput(dentry);
846out_mknod_unlock: 825out_mknod_unlock:
847 up(&nd.dentry->d_inode->i_sem); 826 up(&nd.dentry->d_inode->i_sem);
848out_mknod:
849 path_release(&nd); 827 path_release(&nd);
850out_mknod_parent: 828out_mknod_parent:
851 if (err==-EEXIST) 829 if (err==-EEXIST)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 080aae243ce0..2f4531fcaca2 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -698,7 +698,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
698 return -ENOMEM; 698 return -ENOMEM;
699 699
700 if (skb1->sk) 700 if (skb1->sk)
701 skb_set_owner_w(skb, skb1->sk); 701 skb_set_owner_w(skb2, skb1->sk);
702 702
703 /* Looking around. Are we still alive? 703 /* Looking around. Are we still alive?
704 * OK, link new skb, drop old one */ 704 * OK, link new skb, drop old one */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 55ed979db144..d07f5ce31824 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1136,7 +1136,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
1136 struct xfrm_dst *last; 1136 struct xfrm_dst *last;
1137 u32 mtu; 1137 u32 mtu;
1138 1138
1139 if (!dst_check(dst->path, 0) || 1139 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
1140 (dst->dev && !netif_running(dst->dev))) 1140 (dst->dev && !netif_running(dst->dev)))
1141 return 0; 1141 return 0;
1142 1142
@@ -1156,7 +1156,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
1156 xdst->child_mtu_cached = mtu; 1156 xdst->child_mtu_cached = mtu;
1157 } 1157 }
1158 1158
1159 if (!dst_check(xdst->route, 0)) 1159 if (!dst_check(xdst->route, xdst->route_cookie))
1160 return 0; 1160 return 0;
1161 mtu = dst_mtu(xdst->route); 1161 mtu = dst_mtu(xdst->route);
1162 if (xdst->route_mtu_cached != mtu) { 1162 if (xdst->route_mtu_cached != mtu) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5ddda2c98af9..97509011c274 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -34,14 +34,21 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
34{ 34{
35 struct rtattr *rt = xfrma[type - 1]; 35 struct rtattr *rt = xfrma[type - 1];
36 struct xfrm_algo *algp; 36 struct xfrm_algo *algp;
37 int len;
37 38
38 if (!rt) 39 if (!rt)
39 return 0; 40 return 0;
40 41
41 if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp)) 42 len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp);
43 if (len < 0)
42 return -EINVAL; 44 return -EINVAL;
43 45
44 algp = RTA_DATA(rt); 46 algp = RTA_DATA(rt);
47
48 len -= (algp->alg_key_len + 7U) / 8;
49 if (len < 0)
50 return -EINVAL;
51
45 switch (type) { 52 switch (type) {
46 case XFRMA_ALG_AUTH: 53 case XFRMA_ALG_AUTH:
47 if (!algp->alg_key_len && 54 if (!algp->alg_key_len &&
@@ -162,6 +169,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
162 struct rtattr *rta = u_arg; 169 struct rtattr *rta = u_arg;
163 struct xfrm_algo *p, *ualg; 170 struct xfrm_algo *p, *ualg;
164 struct xfrm_algo_desc *algo; 171 struct xfrm_algo_desc *algo;
172 int len;
165 173
166 if (!rta) 174 if (!rta)
167 return 0; 175 return 0;
@@ -173,11 +181,12 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
173 return -ENOSYS; 181 return -ENOSYS;
174 *props = algo->desc.sadb_alg_id; 182 *props = algo->desc.sadb_alg_id;
175 183
176 p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL); 184 len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8;
185 p = kmalloc(len, GFP_KERNEL);
177 if (!p) 186 if (!p)
178 return -ENOMEM; 187 return -ENOMEM;
179 188
180 memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len); 189 memcpy(p, ualg, len);
181 *algpp = p; 190 *algpp = p;
182 return 0; 191 return 0;
183} 192}