diff options
Diffstat (limited to 'net')
35 files changed, 311 insertions, 457 deletions
diff --git a/net/802/tr.c b/net/802/tr.c index 85293ccf7efc..a755e880f4ba 100644 --- a/net/802/tr.c +++ b/net/802/tr.c | |||
@@ -47,12 +47,12 @@ static void rif_check_expire(unsigned long dummy); | |||
47 | * Each RIF entry we learn is kept this way | 47 | * Each RIF entry we learn is kept this way |
48 | */ | 48 | */ |
49 | 49 | ||
50 | struct rif_cache_s { | 50 | struct rif_cache { |
51 | unsigned char addr[TR_ALEN]; | 51 | unsigned char addr[TR_ALEN]; |
52 | int iface; | 52 | int iface; |
53 | __u16 rcf; | 53 | __be16 rcf; |
54 | __u16 rseg[8]; | 54 | __be16 rseg[8]; |
55 | struct rif_cache_s *next; | 55 | struct rif_cache *next; |
56 | unsigned long last_used; | 56 | unsigned long last_used; |
57 | unsigned char local_ring; | 57 | unsigned char local_ring; |
58 | }; | 58 | }; |
@@ -64,7 +64,7 @@ struct rif_cache_s { | |||
64 | * up a lot. | 64 | * up a lot. |
65 | */ | 65 | */ |
66 | 66 | ||
67 | static struct rif_cache_s *rif_table[RIF_TABLE_SIZE]; | 67 | static struct rif_cache *rif_table[RIF_TABLE_SIZE]; |
68 | 68 | ||
69 | static DEFINE_SPINLOCK(rif_lock); | 69 | static DEFINE_SPINLOCK(rif_lock); |
70 | 70 | ||
@@ -249,7 +249,7 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device * | |||
249 | { | 249 | { |
250 | int slack; | 250 | int slack; |
251 | unsigned int hash; | 251 | unsigned int hash; |
252 | struct rif_cache_s *entry; | 252 | struct rif_cache *entry; |
253 | unsigned char *olddata; | 253 | unsigned char *olddata; |
254 | static const unsigned char mcast_func_addr[] | 254 | static const unsigned char mcast_func_addr[] |
255 | = {0xC0,0x00,0x00,0x04,0x00,0x00}; | 255 | = {0xC0,0x00,0x00,0x04,0x00,0x00}; |
@@ -337,7 +337,7 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0], | |||
337 | static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) | 337 | static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) |
338 | { | 338 | { |
339 | unsigned int hash, rii_p = 0; | 339 | unsigned int hash, rii_p = 0; |
340 | struct rif_cache_s *entry; | 340 | struct rif_cache *entry; |
341 | 341 | ||
342 | 342 | ||
343 | spin_lock_bh(&rif_lock); | 343 | spin_lock_bh(&rif_lock); |
@@ -373,7 +373,7 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", | |||
373 | * FIXME: We ought to keep some kind of cache size | 373 | * FIXME: We ought to keep some kind of cache size |
374 | * limiting and adjust the timers to suit. | 374 | * limiting and adjust the timers to suit. |
375 | */ | 375 | */ |
376 | entry=kmalloc(sizeof(struct rif_cache_s),GFP_ATOMIC); | 376 | entry=kmalloc(sizeof(struct rif_cache),GFP_ATOMIC); |
377 | 377 | ||
378 | if(!entry) | 378 | if(!entry) |
379 | { | 379 | { |
@@ -435,7 +435,7 @@ static void rif_check_expire(unsigned long dummy) | |||
435 | spin_lock_bh(&rif_lock); | 435 | spin_lock_bh(&rif_lock); |
436 | 436 | ||
437 | for(i =0; i < RIF_TABLE_SIZE; i++) { | 437 | for(i =0; i < RIF_TABLE_SIZE; i++) { |
438 | struct rif_cache_s *entry, **pentry; | 438 | struct rif_cache *entry, **pentry; |
439 | 439 | ||
440 | pentry = rif_table+i; | 440 | pentry = rif_table+i; |
441 | while((entry=*pentry) != NULL) { | 441 | while((entry=*pentry) != NULL) { |
@@ -467,10 +467,10 @@ static void rif_check_expire(unsigned long dummy) | |||
467 | 467 | ||
468 | #ifdef CONFIG_PROC_FS | 468 | #ifdef CONFIG_PROC_FS |
469 | 469 | ||
470 | static struct rif_cache_s *rif_get_idx(loff_t pos) | 470 | static struct rif_cache *rif_get_idx(loff_t pos) |
471 | { | 471 | { |
472 | int i; | 472 | int i; |
473 | struct rif_cache_s *entry; | 473 | struct rif_cache *entry; |
474 | loff_t off = 0; | 474 | loff_t off = 0; |
475 | 475 | ||
476 | for(i = 0; i < RIF_TABLE_SIZE; i++) | 476 | for(i = 0; i < RIF_TABLE_SIZE; i++) |
@@ -493,7 +493,7 @@ static void *rif_seq_start(struct seq_file *seq, loff_t *pos) | |||
493 | static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 493 | static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
494 | { | 494 | { |
495 | int i; | 495 | int i; |
496 | struct rif_cache_s *ent = v; | 496 | struct rif_cache *ent = v; |
497 | 497 | ||
498 | ++*pos; | 498 | ++*pos; |
499 | 499 | ||
@@ -522,7 +522,7 @@ static void rif_seq_stop(struct seq_file *seq, void *v) | |||
522 | static int rif_seq_show(struct seq_file *seq, void *v) | 522 | static int rif_seq_show(struct seq_file *seq, void *v) |
523 | { | 523 | { |
524 | int j, rcf_len, segment, brdgnmb; | 524 | int j, rcf_len, segment, brdgnmb; |
525 | struct rif_cache_s *entry = v; | 525 | struct rif_cache *entry = v; |
526 | 526 | ||
527 | if (v == SEQ_START_TOKEN) | 527 | if (v == SEQ_START_TOKEN) |
528 | seq_puts(seq, | 528 | seq_puts(seq, |
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index d9b72fde433c..f564ee99782d 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c | |||
@@ -21,10 +21,7 @@ | |||
21 | 21 | ||
22 | static struct net_device_stats *br_dev_get_stats(struct net_device *dev) | 22 | static struct net_device_stats *br_dev_get_stats(struct net_device *dev) |
23 | { | 23 | { |
24 | struct net_bridge *br; | 24 | struct net_bridge *br = netdev_priv(dev); |
25 | |||
26 | br = dev->priv; | ||
27 | |||
28 | return &br->statistics; | 25 | return &br->statistics; |
29 | } | 26 | } |
30 | 27 | ||
@@ -54,9 +51,11 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) | |||
54 | 51 | ||
55 | static int br_dev_open(struct net_device *dev) | 52 | static int br_dev_open(struct net_device *dev) |
56 | { | 53 | { |
57 | netif_start_queue(dev); | 54 | struct net_bridge *br = netdev_priv(dev); |
58 | 55 | ||
59 | br_stp_enable_bridge(dev->priv); | 56 | br_features_recompute(br); |
57 | netif_start_queue(dev); | ||
58 | br_stp_enable_bridge(br); | ||
60 | 59 | ||
61 | return 0; | 60 | return 0; |
62 | } | 61 | } |
@@ -67,7 +66,7 @@ static void br_dev_set_multicast_list(struct net_device *dev) | |||
67 | 66 | ||
68 | static int br_dev_stop(struct net_device *dev) | 67 | static int br_dev_stop(struct net_device *dev) |
69 | { | 68 | { |
70 | br_stp_disable_bridge(dev->priv); | 69 | br_stp_disable_bridge(netdev_priv(dev)); |
71 | 70 | ||
72 | netif_stop_queue(dev); | 71 | netif_stop_queue(dev); |
73 | 72 | ||
@@ -76,7 +75,7 @@ static int br_dev_stop(struct net_device *dev) | |||
76 | 75 | ||
77 | static int br_change_mtu(struct net_device *dev, int new_mtu) | 76 | static int br_change_mtu(struct net_device *dev, int new_mtu) |
78 | { | 77 | { |
79 | if ((new_mtu < 68) || new_mtu > br_min_mtu(dev->priv)) | 78 | if (new_mtu < 68 || new_mtu > br_min_mtu(netdev_priv(dev))) |
80 | return -EINVAL; | 79 | return -EINVAL; |
81 | 80 | ||
82 | dev->mtu = new_mtu; | 81 | dev->mtu = new_mtu; |
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 69872bf3b87e..91bb895375f4 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c | |||
@@ -314,6 +314,28 @@ int br_min_mtu(const struct net_bridge *br) | |||
314 | return mtu; | 314 | return mtu; |
315 | } | 315 | } |
316 | 316 | ||
317 | /* | ||
318 | * Recomputes features using slave's features | ||
319 | */ | ||
320 | void br_features_recompute(struct net_bridge *br) | ||
321 | { | ||
322 | struct net_bridge_port *p; | ||
323 | unsigned long features, checksum; | ||
324 | |||
325 | features = NETIF_F_SG | NETIF_F_FRAGLIST | ||
326 | | NETIF_F_HIGHDMA | NETIF_F_TSO; | ||
327 | checksum = NETIF_F_IP_CSUM; /* least commmon subset */ | ||
328 | |||
329 | list_for_each_entry(p, &br->port_list, list) { | ||
330 | if (!(p->dev->features | ||
331 | & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM))) | ||
332 | checksum = 0; | ||
333 | features &= p->dev->features; | ||
334 | } | ||
335 | |||
336 | br->dev->features = features | checksum | NETIF_F_LLTX; | ||
337 | } | ||
338 | |||
317 | /* called with RTNL */ | 339 | /* called with RTNL */ |
318 | int br_add_if(struct net_bridge *br, struct net_device *dev) | 340 | int br_add_if(struct net_bridge *br, struct net_device *dev) |
319 | { | 341 | { |
@@ -368,6 +390,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) | |||
368 | 390 | ||
369 | spin_lock_bh(&br->lock); | 391 | spin_lock_bh(&br->lock); |
370 | br_stp_recalculate_bridge_id(br); | 392 | br_stp_recalculate_bridge_id(br); |
393 | br_features_recompute(br); | ||
371 | spin_unlock_bh(&br->lock); | 394 | spin_unlock_bh(&br->lock); |
372 | 395 | ||
373 | return 0; | 396 | return 0; |
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 2b1cce46cab4..8f5f2e730992 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c | |||
@@ -26,7 +26,7 @@ static int br_pass_frame_up_finish(struct sk_buff *skb) | |||
26 | #ifdef CONFIG_NETFILTER_DEBUG | 26 | #ifdef CONFIG_NETFILTER_DEBUG |
27 | skb->nf_debug = 0; | 27 | skb->nf_debug = 0; |
28 | #endif | 28 | #endif |
29 | netif_rx(skb); | 29 | netif_receive_skb(skb); |
30 | 30 | ||
31 | return 0; | 31 | return 0; |
32 | } | 32 | } |
@@ -54,6 +54,9 @@ int br_handle_frame_finish(struct sk_buff *skb) | |||
54 | struct net_bridge_fdb_entry *dst; | 54 | struct net_bridge_fdb_entry *dst; |
55 | int passedup = 0; | 55 | int passedup = 0; |
56 | 56 | ||
57 | /* insert into forwarding database after filtering to avoid spoofing */ | ||
58 | br_fdb_update(p->br, p, eth_hdr(skb)->h_source); | ||
59 | |||
57 | if (br->dev->flags & IFF_PROMISC) { | 60 | if (br->dev->flags & IFF_PROMISC) { |
58 | struct sk_buff *skb2; | 61 | struct sk_buff *skb2; |
59 | 62 | ||
@@ -108,8 +111,7 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb) | |||
108 | if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) | 111 | if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) |
109 | goto err; | 112 | goto err; |
110 | 113 | ||
111 | if (p->state == BR_STATE_LEARNING || | 114 | if (p->state == BR_STATE_LEARNING) |
112 | p->state == BR_STATE_FORWARDING) | ||
113 | br_fdb_update(p->br, p, eth_hdr(skb)->h_source); | 115 | br_fdb_update(p->br, p, eth_hdr(skb)->h_source); |
114 | 116 | ||
115 | if (p->br->stp_enabled && | 117 | if (p->br->stp_enabled && |
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index f8fb49e34764..917311c6828b 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c | |||
@@ -65,6 +65,15 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v | |||
65 | } | 65 | } |
66 | break; | 66 | break; |
67 | 67 | ||
68 | case NETDEV_FEAT_CHANGE: | ||
69 | if (br->dev->flags & IFF_UP) | ||
70 | br_features_recompute(br); | ||
71 | |||
72 | /* could do recursive feature change notification | ||
73 | * but who would care?? | ||
74 | */ | ||
75 | break; | ||
76 | |||
68 | case NETDEV_DOWN: | 77 | case NETDEV_DOWN: |
69 | if (br->dev->flags & IFF_UP) | 78 | if (br->dev->flags & IFF_UP) |
70 | br_stp_disable_port(p); | 79 | br_stp_disable_port(p); |
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 54d63f1372a0..bdf95a74d8cd 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h | |||
@@ -174,6 +174,7 @@ extern int br_add_if(struct net_bridge *br, | |||
174 | extern int br_del_if(struct net_bridge *br, | 174 | extern int br_del_if(struct net_bridge *br, |
175 | struct net_device *dev); | 175 | struct net_device *dev); |
176 | extern int br_min_mtu(const struct net_bridge *br); | 176 | extern int br_min_mtu(const struct net_bridge *br); |
177 | extern void br_features_recompute(struct net_bridge *br); | ||
177 | 178 | ||
178 | /* br_input.c */ | 179 | /* br_input.c */ |
179 | extern int br_handle_frame_finish(struct sk_buff *skb); | 180 | extern int br_handle_frame_finish(struct sk_buff *skb); |
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index b91a875aca01..d071f1c9ad0b 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c | |||
@@ -140,6 +140,9 @@ int br_stp_handle_bpdu(struct sk_buff *skb) | |||
140 | struct net_bridge *br = p->br; | 140 | struct net_bridge *br = p->br; |
141 | unsigned char *buf; | 141 | unsigned char *buf; |
142 | 142 | ||
143 | /* insert into forwarding database after filtering to avoid spoofing */ | ||
144 | br_fdb_update(p->br, p, eth_hdr(skb)->h_source); | ||
145 | |||
143 | /* need at least the 802 and STP headers */ | 146 | /* need at least the 802 and STP headers */ |
144 | if (!pskb_may_pull(skb, sizeof(header)+1) || | 147 | if (!pskb_may_pull(skb, sizeof(header)+1) || |
145 | memcmp(skb->data, header, sizeof(header))) | 148 | memcmp(skb->data, header, sizeof(header))) |
diff --git a/net/core/dev.c b/net/core/dev.c index d4d9e2680adb..f15a3ffff635 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -761,6 +761,18 @@ int dev_change_name(struct net_device *dev, char *newname) | |||
761 | } | 761 | } |
762 | 762 | ||
763 | /** | 763 | /** |
764 | * netdev_features_change - device changes fatures | ||
765 | * @dev: device to cause notification | ||
766 | * | ||
767 | * Called to indicate a device has changed features. | ||
768 | */ | ||
769 | void netdev_features_change(struct net_device *dev) | ||
770 | { | ||
771 | notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); | ||
772 | } | ||
773 | EXPORT_SYMBOL(netdev_features_change); | ||
774 | |||
775 | /** | ||
764 | * netdev_state_change - device changes state | 776 | * netdev_state_change - device changes state |
765 | * @dev: device to cause notification | 777 | * @dev: device to cause notification |
766 | * | 778 | * |
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f05fde97c43d..8ec484894d68 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -29,7 +29,7 @@ u32 ethtool_op_get_link(struct net_device *dev) | |||
29 | 29 | ||
30 | u32 ethtool_op_get_tx_csum(struct net_device *dev) | 30 | u32 ethtool_op_get_tx_csum(struct net_device *dev) |
31 | { | 31 | { |
32 | return (dev->features & NETIF_F_IP_CSUM) != 0; | 32 | return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0; |
33 | } | 33 | } |
34 | 34 | ||
35 | int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) | 35 | int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) |
@@ -42,6 +42,15 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) | |||
42 | return 0; | 42 | return 0; |
43 | } | 43 | } |
44 | 44 | ||
45 | int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) | ||
46 | { | ||
47 | if (data) | ||
48 | dev->features |= NETIF_F_HW_CSUM; | ||
49 | else | ||
50 | dev->features &= ~NETIF_F_HW_CSUM; | ||
51 | |||
52 | return 0; | ||
53 | } | ||
45 | u32 ethtool_op_get_sg(struct net_device *dev) | 54 | u32 ethtool_op_get_sg(struct net_device *dev) |
46 | { | 55 | { |
47 | return (dev->features & NETIF_F_SG) != 0; | 56 | return (dev->features & NETIF_F_SG) != 0; |
@@ -682,6 +691,7 @@ int dev_ethtool(struct ifreq *ifr) | |||
682 | void __user *useraddr = ifr->ifr_data; | 691 | void __user *useraddr = ifr->ifr_data; |
683 | u32 ethcmd; | 692 | u32 ethcmd; |
684 | int rc; | 693 | int rc; |
694 | unsigned long old_features; | ||
685 | 695 | ||
686 | /* | 696 | /* |
687 | * XXX: This can be pushed down into the ethtool_* handlers that | 697 | * XXX: This can be pushed down into the ethtool_* handlers that |
@@ -703,6 +713,8 @@ int dev_ethtool(struct ifreq *ifr) | |||
703 | if ((rc = dev->ethtool_ops->begin(dev)) < 0) | 713 | if ((rc = dev->ethtool_ops->begin(dev)) < 0) |
704 | return rc; | 714 | return rc; |
705 | 715 | ||
716 | old_features = dev->features; | ||
717 | |||
706 | switch (ethcmd) { | 718 | switch (ethcmd) { |
707 | case ETHTOOL_GSET: | 719 | case ETHTOOL_GSET: |
708 | rc = ethtool_get_settings(dev, useraddr); | 720 | rc = ethtool_get_settings(dev, useraddr); |
@@ -712,7 +724,6 @@ int dev_ethtool(struct ifreq *ifr) | |||
712 | break; | 724 | break; |
713 | case ETHTOOL_GDRVINFO: | 725 | case ETHTOOL_GDRVINFO: |
714 | rc = ethtool_get_drvinfo(dev, useraddr); | 726 | rc = ethtool_get_drvinfo(dev, useraddr); |
715 | |||
716 | break; | 727 | break; |
717 | case ETHTOOL_GREGS: | 728 | case ETHTOOL_GREGS: |
718 | rc = ethtool_get_regs(dev, useraddr); | 729 | rc = ethtool_get_regs(dev, useraddr); |
@@ -801,6 +812,10 @@ int dev_ethtool(struct ifreq *ifr) | |||
801 | 812 | ||
802 | if(dev->ethtool_ops->complete) | 813 | if(dev->ethtool_ops->complete) |
803 | dev->ethtool_ops->complete(dev); | 814 | dev->ethtool_ops->complete(dev); |
815 | |||
816 | if (old_features != dev->features) | ||
817 | netdev_features_change(dev); | ||
818 | |||
804 | return rc; | 819 | return rc; |
805 | 820 | ||
806 | ioctl: | 821 | ioctl: |
@@ -817,3 +832,4 @@ EXPORT_SYMBOL(ethtool_op_get_tx_csum); | |||
817 | EXPORT_SYMBOL(ethtool_op_set_sg); | 832 | EXPORT_SYMBOL(ethtool_op_set_sg); |
818 | EXPORT_SYMBOL(ethtool_op_set_tso); | 833 | EXPORT_SYMBOL(ethtool_op_set_tso); |
819 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | 834 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); |
835 | EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); | ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 060f703659e8..910eb4c05a47 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #define to_net_dev(class) container_of(class, struct net_device, class_dev) | 21 | #define to_net_dev(class) container_of(class, struct net_device, class_dev) |
22 | 22 | ||
23 | static const char fmt_hex[] = "%#x\n"; | 23 | static const char fmt_hex[] = "%#x\n"; |
24 | static const char fmt_long_hex[] = "%#lx\n"; | ||
24 | static const char fmt_dec[] = "%d\n"; | 25 | static const char fmt_dec[] = "%d\n"; |
25 | static const char fmt_ulong[] = "%lu\n"; | 26 | static const char fmt_ulong[] = "%lu\n"; |
26 | 27 | ||
@@ -91,7 +92,7 @@ static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL) \ | |||
91 | NETDEVICE_ATTR(addr_len, fmt_dec); | 92 | NETDEVICE_ATTR(addr_len, fmt_dec); |
92 | NETDEVICE_ATTR(iflink, fmt_dec); | 93 | NETDEVICE_ATTR(iflink, fmt_dec); |
93 | NETDEVICE_ATTR(ifindex, fmt_dec); | 94 | NETDEVICE_ATTR(ifindex, fmt_dec); |
94 | NETDEVICE_ATTR(features, fmt_hex); | 95 | NETDEVICE_ATTR(features, fmt_long_hex); |
95 | NETDEVICE_ATTR(type, fmt_dec); | 96 | NETDEVICE_ATTR(type, fmt_dec); |
96 | 97 | ||
97 | /* use same locking rules as GIFHWADDR ioctl's */ | 98 | /* use same locking rules as GIFHWADDR ioctl's */ |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 3cc96730c4ed..478a30179a52 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -233,11 +233,14 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) | |||
233 | static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | 233 | static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, |
234 | int destroy) | 234 | int destroy) |
235 | { | 235 | { |
236 | struct in_ifaddr *promote = NULL; | ||
236 | struct in_ifaddr *ifa1 = *ifap; | 237 | struct in_ifaddr *ifa1 = *ifap; |
237 | 238 | ||
238 | ASSERT_RTNL(); | 239 | ASSERT_RTNL(); |
239 | 240 | ||
240 | /* 1. Deleting primary ifaddr forces deletion all secondaries */ | 241 | /* 1. Deleting primary ifaddr forces deletion all secondaries |
242 | * unless alias promotion is set | ||
243 | **/ | ||
241 | 244 | ||
242 | if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { | 245 | if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { |
243 | struct in_ifaddr *ifa; | 246 | struct in_ifaddr *ifa; |
@@ -251,11 +254,16 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
251 | continue; | 254 | continue; |
252 | } | 255 | } |
253 | 256 | ||
254 | *ifap1 = ifa->ifa_next; | 257 | if (!IN_DEV_PROMOTE_SECONDARIES(in_dev)) { |
258 | *ifap1 = ifa->ifa_next; | ||
255 | 259 | ||
256 | rtmsg_ifa(RTM_DELADDR, ifa); | 260 | rtmsg_ifa(RTM_DELADDR, ifa); |
257 | notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); | 261 | notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); |
258 | inet_free_ifa(ifa); | 262 | inet_free_ifa(ifa); |
263 | } else { | ||
264 | promote = ifa; | ||
265 | break; | ||
266 | } | ||
259 | } | 267 | } |
260 | } | 268 | } |
261 | 269 | ||
@@ -281,6 +289,13 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
281 | if (!in_dev->ifa_list) | 289 | if (!in_dev->ifa_list) |
282 | inetdev_destroy(in_dev); | 290 | inetdev_destroy(in_dev); |
283 | } | 291 | } |
292 | |||
293 | if (promote && IN_DEV_PROMOTE_SECONDARIES(in_dev)) { | ||
294 | /* not sure if we should send a delete notify first? */ | ||
295 | promote->ifa_flags &= ~IFA_F_SECONDARY; | ||
296 | rtmsg_ifa(RTM_NEWADDR, promote); | ||
297 | notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); | ||
298 | } | ||
284 | } | 299 | } |
285 | 300 | ||
286 | static int inet_insert_ifa(struct in_ifaddr *ifa) | 301 | static int inet_insert_ifa(struct in_ifaddr *ifa) |
@@ -1384,6 +1399,15 @@ static struct devinet_sysctl_table { | |||
1384 | .proc_handler = &ipv4_doint_and_flush, | 1399 | .proc_handler = &ipv4_doint_and_flush, |
1385 | .strategy = &ipv4_doint_and_flush_strategy, | 1400 | .strategy = &ipv4_doint_and_flush_strategy, |
1386 | }, | 1401 | }, |
1402 | { | ||
1403 | .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES, | ||
1404 | .procname = "promote_secondaries", | ||
1405 | .data = &ipv4_devconf.promote_secondaries, | ||
1406 | .maxlen = sizeof(int), | ||
1407 | .mode = 0644, | ||
1408 | .proc_handler = &ipv4_doint_and_flush, | ||
1409 | .strategy = &ipv4_doint_and_flush_strategy, | ||
1410 | }, | ||
1387 | }, | 1411 | }, |
1388 | .devinet_dev = { | 1412 | .devinet_dev = { |
1389 | { | 1413 | { |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 053a883247ba..eae84cc39d3f 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -478,7 +478,7 @@ static int __init esp4_init(void) | |||
478 | { | 478 | { |
479 | struct xfrm_decap_state decap; | 479 | struct xfrm_decap_state decap; |
480 | 480 | ||
481 | if (sizeof(struct esp_decap_data) < | 481 | if (sizeof(struct esp_decap_data) > |
482 | sizeof(decap.decap_data)) { | 482 | sizeof(decap.decap_data)) { |
483 | extern void decap_data_too_small(void); | 483 | extern void decap_data_too_small(void); |
484 | 484 | ||
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index daebd93fd8a0..760dc8238d65 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -490,6 +490,14 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
490 | /* Partially cloned skb? */ | 490 | /* Partially cloned skb? */ |
491 | if (skb_shared(frag)) | 491 | if (skb_shared(frag)) |
492 | goto slow_path; | 492 | goto slow_path; |
493 | |||
494 | BUG_ON(frag->sk); | ||
495 | if (skb->sk) { | ||
496 | sock_hold(skb->sk); | ||
497 | frag->sk = skb->sk; | ||
498 | frag->destructor = sock_wfree; | ||
499 | skb->truesize -= frag->truesize; | ||
500 | } | ||
493 | } | 501 | } |
494 | 502 | ||
495 | /* Everything is OK. Generate! */ | 503 | /* Everything is OK. Generate! */ |
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile index a788461a40c9..30e85de9ffff 100644 --- a/net/ipv4/ipvs/Makefile +++ b/net/ipv4/ipvs/Makefile | |||
@@ -11,7 +11,7 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o | |||
11 | 11 | ||
12 | ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ | 12 | ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ |
13 | ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ | 13 | ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ |
14 | ip_vs_est.o ip_vs_proto.o ip_vs_proto_icmp.o \ | 14 | ip_vs_est.o ip_vs_proto.o \ |
15 | $(ip_vs_proto-objs-y) | 15 | $(ip_vs_proto-objs-y) |
16 | 16 | ||
17 | 17 | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 253c46252bd5..867d4e9c6594 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c | |||
@@ -216,9 +216,6 @@ int ip_vs_protocol_init(void) | |||
216 | #ifdef CONFIG_IP_VS_PROTO_UDP | 216 | #ifdef CONFIG_IP_VS_PROTO_UDP |
217 | REGISTER_PROTOCOL(&ip_vs_protocol_udp); | 217 | REGISTER_PROTOCOL(&ip_vs_protocol_udp); |
218 | #endif | 218 | #endif |
219 | #ifdef CONFIG_IP_VS_PROTO_ICMP | ||
220 | REGISTER_PROTOCOL(&ip_vs_protocol_icmp); | ||
221 | #endif | ||
222 | #ifdef CONFIG_IP_VS_PROTO_AH | 219 | #ifdef CONFIG_IP_VS_PROTO_AH |
223 | REGISTER_PROTOCOL(&ip_vs_protocol_ah); | 220 | REGISTER_PROTOCOL(&ip_vs_protocol_ah); |
224 | #endif | 221 | #endif |
diff --git a/net/ipv4/ipvs/ip_vs_proto_icmp.c b/net/ipv4/ipvs/ip_vs_proto_icmp.c deleted file mode 100644 index 191e94aa1c1f..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_icmp.c +++ /dev/null | |||
@@ -1,182 +0,0 @@ | |||
1 | /* | ||
2 | * ip_vs_proto_icmp.c: ICMP load balancing support for IP Virtual Server | ||
3 | * | ||
4 | * Authors: Julian Anastasov <ja@ssi.bg>, March 2002 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * version 2 as published by the Free Software Foundation; | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/icmp.h> | ||
15 | #include <linux/netfilter.h> | ||
16 | #include <linux/netfilter_ipv4.h> | ||
17 | |||
18 | #include <net/ip_vs.h> | ||
19 | |||
20 | |||
21 | static int icmp_timeouts[1] = { 1*60*HZ }; | ||
22 | |||
23 | static char * icmp_state_name_table[1] = { "ICMP" }; | ||
24 | |||
25 | static struct ip_vs_conn * | ||
26 | icmp_conn_in_get(const struct sk_buff *skb, | ||
27 | struct ip_vs_protocol *pp, | ||
28 | const struct iphdr *iph, | ||
29 | unsigned int proto_off, | ||
30 | int inverse) | ||
31 | { | ||
32 | #if 0 | ||
33 | struct ip_vs_conn *cp; | ||
34 | |||
35 | if (likely(!inverse)) { | ||
36 | cp = ip_vs_conn_in_get(iph->protocol, | ||
37 | iph->saddr, 0, | ||
38 | iph->daddr, 0); | ||
39 | } else { | ||
40 | cp = ip_vs_conn_in_get(iph->protocol, | ||
41 | iph->daddr, 0, | ||
42 | iph->saddr, 0); | ||
43 | } | ||
44 | |||
45 | return cp; | ||
46 | |||
47 | #else | ||
48 | return NULL; | ||
49 | #endif | ||
50 | } | ||
51 | |||
52 | static struct ip_vs_conn * | ||
53 | icmp_conn_out_get(const struct sk_buff *skb, | ||
54 | struct ip_vs_protocol *pp, | ||
55 | const struct iphdr *iph, | ||
56 | unsigned int proto_off, | ||
57 | int inverse) | ||
58 | { | ||
59 | #if 0 | ||
60 | struct ip_vs_conn *cp; | ||
61 | |||
62 | if (likely(!inverse)) { | ||
63 | cp = ip_vs_conn_out_get(iph->protocol, | ||
64 | iph->saddr, 0, | ||
65 | iph->daddr, 0); | ||
66 | } else { | ||
67 | cp = ip_vs_conn_out_get(IPPROTO_UDP, | ||
68 | iph->daddr, 0, | ||
69 | iph->saddr, 0); | ||
70 | } | ||
71 | |||
72 | return cp; | ||
73 | #else | ||
74 | return NULL; | ||
75 | #endif | ||
76 | } | ||
77 | |||
78 | static int | ||
79 | icmp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
80 | int *verdict, struct ip_vs_conn **cpp) | ||
81 | { | ||
82 | *verdict = NF_ACCEPT; | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static int | ||
87 | icmp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | ||
88 | { | ||
89 | if (!(skb->nh.iph->frag_off & __constant_htons(IP_OFFSET))) { | ||
90 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) { | ||
91 | if (ip_vs_checksum_complete(skb, skb->nh.iph->ihl * 4)) { | ||
92 | IP_VS_DBG_RL_PKT(0, pp, skb, 0, "Failed checksum for"); | ||
93 | return 0; | ||
94 | } | ||
95 | } | ||
96 | } | ||
97 | return 1; | ||
98 | } | ||
99 | |||
100 | static void | ||
101 | icmp_debug_packet(struct ip_vs_protocol *pp, | ||
102 | const struct sk_buff *skb, | ||
103 | int offset, | ||
104 | const char *msg) | ||
105 | { | ||
106 | char buf[256]; | ||
107 | struct iphdr _iph, *ih; | ||
108 | |||
109 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | ||
110 | if (ih == NULL) | ||
111 | sprintf(buf, "%s TRUNCATED", pp->name); | ||
112 | else if (ih->frag_off & __constant_htons(IP_OFFSET)) | ||
113 | sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", | ||
114 | pp->name, NIPQUAD(ih->saddr), | ||
115 | NIPQUAD(ih->daddr)); | ||
116 | else { | ||
117 | struct icmphdr _icmph, *ic; | ||
118 | |||
119 | ic = skb_header_pointer(skb, offset + ih->ihl*4, | ||
120 | sizeof(_icmph), &_icmph); | ||
121 | if (ic == NULL) | ||
122 | sprintf(buf, "%s TRUNCATED to %u bytes\n", | ||
123 | pp->name, skb->len - offset); | ||
124 | else | ||
125 | sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d", | ||
126 | pp->name, NIPQUAD(ih->saddr), | ||
127 | NIPQUAD(ih->daddr), | ||
128 | ic->type, ic->code); | ||
129 | } | ||
130 | printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); | ||
131 | } | ||
132 | |||
133 | static int | ||
134 | icmp_state_transition(struct ip_vs_conn *cp, int direction, | ||
135 | const struct sk_buff *skb, | ||
136 | struct ip_vs_protocol *pp) | ||
137 | { | ||
138 | cp->timeout = pp->timeout_table[IP_VS_ICMP_S_NORMAL]; | ||
139 | return 1; | ||
140 | } | ||
141 | |||
142 | static int | ||
143 | icmp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) | ||
144 | { | ||
145 | int num; | ||
146 | char **names; | ||
147 | |||
148 | num = IP_VS_ICMP_S_LAST; | ||
149 | names = icmp_state_name_table; | ||
150 | return ip_vs_set_state_timeout(pp->timeout_table, num, names, sname, to); | ||
151 | } | ||
152 | |||
153 | |||
154 | static void icmp_init(struct ip_vs_protocol *pp) | ||
155 | { | ||
156 | pp->timeout_table = icmp_timeouts; | ||
157 | } | ||
158 | |||
159 | static void icmp_exit(struct ip_vs_protocol *pp) | ||
160 | { | ||
161 | } | ||
162 | |||
163 | struct ip_vs_protocol ip_vs_protocol_icmp = { | ||
164 | .name = "ICMP", | ||
165 | .protocol = IPPROTO_ICMP, | ||
166 | .dont_defrag = 0, | ||
167 | .init = icmp_init, | ||
168 | .exit = icmp_exit, | ||
169 | .conn_schedule = icmp_conn_schedule, | ||
170 | .conn_in_get = icmp_conn_in_get, | ||
171 | .conn_out_get = icmp_conn_out_get, | ||
172 | .snat_handler = NULL, | ||
173 | .dnat_handler = NULL, | ||
174 | .csum_check = icmp_csum_check, | ||
175 | .state_transition = icmp_state_transition, | ||
176 | .register_app = NULL, | ||
177 | .unregister_app = NULL, | ||
178 | .app_conn_bind = NULL, | ||
179 | .debug_packet = icmp_debug_packet, | ||
180 | .timeout_change = NULL, | ||
181 | .set_state_timeout = icmp_set_state_timeout, | ||
182 | }; | ||
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index faa6176bbeb1..de21da00057f 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c | |||
@@ -508,7 +508,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
508 | rc = NF_ACCEPT; | 508 | rc = NF_ACCEPT; |
509 | /* do not touch skb anymore */ | 509 | /* do not touch skb anymore */ |
510 | atomic_inc(&cp->in_pkts); | 510 | atomic_inc(&cp->in_pkts); |
511 | __ip_vs_conn_put(cp); | ||
512 | goto out; | 511 | goto out; |
513 | } | 512 | } |
514 | 513 | ||
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c index 9349686131fc..cf2e6bcf7973 100644 --- a/net/ipv4/multipath_drr.c +++ b/net/ipv4/multipath_drr.c | |||
@@ -57,7 +57,6 @@ struct multipath_device { | |||
57 | 57 | ||
58 | static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; | 58 | static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; |
59 | static DEFINE_SPINLOCK(state_lock); | 59 | static DEFINE_SPINLOCK(state_lock); |
60 | static struct rtable *last_selection = NULL; | ||
61 | 60 | ||
62 | static int inline __multipath_findslot(void) | 61 | static int inline __multipath_findslot(void) |
63 | { | 62 | { |
@@ -111,11 +110,6 @@ struct notifier_block drr_dev_notifier = { | |||
111 | .notifier_call = drr_dev_event, | 110 | .notifier_call = drr_dev_event, |
112 | }; | 111 | }; |
113 | 112 | ||
114 | static void drr_remove(struct rtable *rt) | ||
115 | { | ||
116 | if (last_selection == rt) | ||
117 | last_selection = NULL; | ||
118 | } | ||
119 | 113 | ||
120 | static void drr_safe_inc(atomic_t *usecount) | 114 | static void drr_safe_inc(atomic_t *usecount) |
121 | { | 115 | { |
@@ -144,14 +138,6 @@ static void drr_select_route(const struct flowi *flp, | |||
144 | int devidx = -1; | 138 | int devidx = -1; |
145 | int cur_min_devidx = -1; | 139 | int cur_min_devidx = -1; |
146 | 140 | ||
147 | /* if necessary and possible utilize the old alternative */ | ||
148 | if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 && | ||
149 | last_selection != NULL) { | ||
150 | result = last_selection; | ||
151 | *rp = result; | ||
152 | return; | ||
153 | } | ||
154 | |||
155 | /* 1. make sure all alt. nexthops have the same GC related data */ | 141 | /* 1. make sure all alt. nexthops have the same GC related data */ |
156 | /* 2. determine the new candidate to be returned */ | 142 | /* 2. determine the new candidate to be returned */ |
157 | result = NULL; | 143 | result = NULL; |
@@ -229,12 +215,10 @@ static void drr_select_route(const struct flowi *flp, | |||
229 | } | 215 | } |
230 | 216 | ||
231 | *rp = result; | 217 | *rp = result; |
232 | last_selection = result; | ||
233 | } | 218 | } |
234 | 219 | ||
235 | static struct ip_mp_alg_ops drr_ops = { | 220 | static struct ip_mp_alg_ops drr_ops = { |
236 | .mp_alg_select_route = drr_select_route, | 221 | .mp_alg_select_route = drr_select_route, |
237 | .mp_alg_remove = drr_remove, | ||
238 | }; | 222 | }; |
239 | 223 | ||
240 | static int __init drr_init(void) | 224 | static int __init drr_init(void) |
@@ -244,7 +228,7 @@ static int __init drr_init(void) | |||
244 | if (err) | 228 | if (err) |
245 | return err; | 229 | return err; |
246 | 230 | ||
247 | err = multipath_alg_register(&drr_ops, IP_MP_ALG_RR); | 231 | err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR); |
248 | if (err) | 232 | if (err) |
249 | goto fail; | 233 | goto fail; |
250 | 234 | ||
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c index 554a82568160..061b6b253982 100644 --- a/net/ipv4/multipath_rr.c +++ b/net/ipv4/multipath_rr.c | |||
@@ -47,29 +47,12 @@ | |||
47 | #include <net/checksum.h> | 47 | #include <net/checksum.h> |
48 | #include <net/ip_mp_alg.h> | 48 | #include <net/ip_mp_alg.h> |
49 | 49 | ||
50 | #define MULTIPATH_MAX_CANDIDATES 40 | ||
51 | |||
52 | static struct rtable* last_used = NULL; | ||
53 | |||
54 | static void rr_remove(struct rtable *rt) | ||
55 | { | ||
56 | if (last_used == rt) | ||
57 | last_used = NULL; | ||
58 | } | ||
59 | |||
60 | static void rr_select_route(const struct flowi *flp, | 50 | static void rr_select_route(const struct flowi *flp, |
61 | struct rtable *first, struct rtable **rp) | 51 | struct rtable *first, struct rtable **rp) |
62 | { | 52 | { |
63 | struct rtable *nh, *result, *min_use_cand = NULL; | 53 | struct rtable *nh, *result, *min_use_cand = NULL; |
64 | int min_use = -1; | 54 | int min_use = -1; |
65 | 55 | ||
66 | /* if necessary and possible utilize the old alternative */ | ||
67 | if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 && | ||
68 | last_used != NULL) { | ||
69 | result = last_used; | ||
70 | goto out; | ||
71 | } | ||
72 | |||
73 | /* 1. make sure all alt. nexthops have the same GC related data | 56 | /* 1. make sure all alt. nexthops have the same GC related data |
74 | * 2. determine the new candidate to be returned | 57 | * 2. determine the new candidate to be returned |
75 | */ | 58 | */ |
@@ -90,15 +73,12 @@ static void rr_select_route(const struct flowi *flp, | |||
90 | if (!result) | 73 | if (!result) |
91 | result = first; | 74 | result = first; |
92 | 75 | ||
93 | out: | ||
94 | last_used = result; | ||
95 | result->u.dst.__use++; | 76 | result->u.dst.__use++; |
96 | *rp = result; | 77 | *rp = result; |
97 | } | 78 | } |
98 | 79 | ||
99 | static struct ip_mp_alg_ops rr_ops = { | 80 | static struct ip_mp_alg_ops rr_ops = { |
100 | .mp_alg_select_route = rr_select_route, | 81 | .mp_alg_select_route = rr_select_route, |
101 | .mp_alg_remove = rr_remove, | ||
102 | }; | 82 | }; |
103 | 83 | ||
104 | static int __init rr_init(void) | 84 | static int __init rr_init(void) |
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 28d9425d5c39..09e824622977 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c | |||
@@ -940,37 +940,25 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, | |||
940 | struct sk_buff * | 940 | struct sk_buff * |
941 | ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) | 941 | ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) |
942 | { | 942 | { |
943 | struct sock *sk = skb->sk; | ||
944 | #ifdef CONFIG_NETFILTER_DEBUG | 943 | #ifdef CONFIG_NETFILTER_DEBUG |
945 | unsigned int olddebug = skb->nf_debug; | 944 | unsigned int olddebug = skb->nf_debug; |
946 | #endif | 945 | #endif |
947 | 946 | ||
948 | if (sk) { | 947 | skb_orphan(skb); |
949 | sock_hold(sk); | ||
950 | skb_orphan(skb); | ||
951 | } | ||
952 | 948 | ||
953 | local_bh_disable(); | 949 | local_bh_disable(); |
954 | skb = ip_defrag(skb, user); | 950 | skb = ip_defrag(skb, user); |
955 | local_bh_enable(); | 951 | local_bh_enable(); |
956 | 952 | ||
957 | if (!skb) { | 953 | if (skb) { |
958 | if (sk) | 954 | ip_send_check(skb->nh.iph); |
959 | sock_put(sk); | 955 | skb->nfcache |= NFC_ALTERED; |
960 | return skb; | ||
961 | } | ||
962 | |||
963 | if (sk) { | ||
964 | skb_set_owner_w(skb, sk); | ||
965 | sock_put(sk); | ||
966 | } | ||
967 | |||
968 | ip_send_check(skb->nh.iph); | ||
969 | skb->nfcache |= NFC_ALTERED; | ||
970 | #ifdef CONFIG_NETFILTER_DEBUG | 956 | #ifdef CONFIG_NETFILTER_DEBUG |
971 | /* Packet path as if nothing had happened. */ | 957 | /* Packet path as if nothing had happened. */ |
972 | skb->nf_debug = olddebug; | 958 | skb->nf_debug = olddebug; |
973 | #endif | 959 | #endif |
960 | } | ||
961 | |||
974 | return skb; | 962 | return skb; |
975 | } | 963 | } |
976 | 964 | ||
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index e5746b674413..eda1fba431a4 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * communicating with userspace via netlink. | 3 | * communicating with userspace via netlink. |
4 | * | 4 | * |
5 | * (C) 2000-2002 James Morris <jmorris@intercode.com.au> | 5 | * (C) 2000-2002 James Morris <jmorris@intercode.com.au> |
6 | * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
6 | * | 7 | * |
7 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as | 9 | * it under the terms of the GNU General Public License version 2 as |
@@ -17,6 +18,7 @@ | |||
17 | * 2005-01-10: Added /proc counter for dropped packets; fixed so | 18 | * 2005-01-10: Added /proc counter for dropped packets; fixed so |
18 | * packets aren't delivered to user space if they're going | 19 | * packets aren't delivered to user space if they're going |
19 | * to be dropped. | 20 | * to be dropped. |
21 | * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte) | ||
20 | * | 22 | * |
21 | */ | 23 | */ |
22 | #include <linux/module.h> | 24 | #include <linux/module.h> |
@@ -71,7 +73,15 @@ static DECLARE_MUTEX(ipqnl_sem); | |||
71 | static void | 73 | static void |
72 | ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) | 74 | ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) |
73 | { | 75 | { |
76 | /* TCP input path (and probably other bits) assume to be called | ||
77 | * from softirq context, not from syscall, like ipq_issue_verdict is | ||
78 | * called. TCP input path deadlocks with locks taken from timer | ||
79 | * softirq, e.g. We therefore emulate this by local_bh_disable() */ | ||
80 | |||
81 | local_bh_disable(); | ||
74 | nf_reinject(entry->skb, entry->info, verdict); | 82 | nf_reinject(entry->skb, entry->info, verdict); |
83 | local_bh_enable(); | ||
84 | |||
75 | kfree(entry); | 85 | kfree(entry); |
76 | } | 86 | } |
77 | 87 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 79835a67a274..5bad504630a3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -4355,16 +4355,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
4355 | goto no_ack; | 4355 | goto no_ack; |
4356 | } | 4356 | } |
4357 | 4357 | ||
4358 | if (eaten) { | 4358 | __tcp_ack_snd_check(sk, 0); |
4359 | if (tcp_in_quickack_mode(tp)) { | ||
4360 | tcp_send_ack(sk); | ||
4361 | } else { | ||
4362 | tcp_send_delayed_ack(sk); | ||
4363 | } | ||
4364 | } else { | ||
4365 | __tcp_ack_snd_check(sk, 0); | ||
4366 | } | ||
4367 | |||
4368 | no_ack: | 4359 | no_ack: |
4369 | if (eaten) | 4360 | if (eaten) |
4370 | __kfree_skb(skb); | 4361 | __kfree_skb(skb); |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4a6952e3fee9..7c24e64b443f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -738,7 +738,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
738 | unsigned long amount; | 738 | unsigned long amount; |
739 | 739 | ||
740 | amount = 0; | 740 | amount = 0; |
741 | spin_lock_irq(&sk->sk_receive_queue.lock); | 741 | spin_lock_bh(&sk->sk_receive_queue.lock); |
742 | skb = skb_peek(&sk->sk_receive_queue); | 742 | skb = skb_peek(&sk->sk_receive_queue); |
743 | if (skb != NULL) { | 743 | if (skb != NULL) { |
744 | /* | 744 | /* |
@@ -748,7 +748,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
748 | */ | 748 | */ |
749 | amount = skb->len - sizeof(struct udphdr); | 749 | amount = skb->len - sizeof(struct udphdr); |
750 | } | 750 | } |
751 | spin_unlock_irq(&sk->sk_receive_queue.lock); | 751 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
752 | return put_user(amount, (int __user *)arg); | 752 | return put_user(amount, (int __user *)arg); |
753 | } | 753 | } |
754 | 754 | ||
@@ -848,12 +848,12 @@ csum_copy_err: | |||
848 | /* Clear queue. */ | 848 | /* Clear queue. */ |
849 | if (flags&MSG_PEEK) { | 849 | if (flags&MSG_PEEK) { |
850 | int clear = 0; | 850 | int clear = 0; |
851 | spin_lock_irq(&sk->sk_receive_queue.lock); | 851 | spin_lock_bh(&sk->sk_receive_queue.lock); |
852 | if (skb == skb_peek(&sk->sk_receive_queue)) { | 852 | if (skb == skb_peek(&sk->sk_receive_queue)) { |
853 | __skb_unlink(skb, &sk->sk_receive_queue); | 853 | __skb_unlink(skb, &sk->sk_receive_queue); |
854 | clear = 1; | 854 | clear = 1; |
855 | } | 855 | } |
856 | spin_unlock_irq(&sk->sk_receive_queue.lock); | 856 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
857 | if (clear) | 857 | if (clear) |
858 | kfree_skb(skb); | 858 | kfree_skb(skb); |
859 | } | 859 | } |
@@ -1334,7 +1334,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1334 | struct sk_buff_head *rcvq = &sk->sk_receive_queue; | 1334 | struct sk_buff_head *rcvq = &sk->sk_receive_queue; |
1335 | struct sk_buff *skb; | 1335 | struct sk_buff *skb; |
1336 | 1336 | ||
1337 | spin_lock_irq(&rcvq->lock); | 1337 | spin_lock_bh(&rcvq->lock); |
1338 | while ((skb = skb_peek(rcvq)) != NULL) { | 1338 | while ((skb = skb_peek(rcvq)) != NULL) { |
1339 | if (udp_checksum_complete(skb)) { | 1339 | if (udp_checksum_complete(skb)) { |
1340 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1340 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); |
@@ -1345,7 +1345,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1345 | break; | 1345 | break; |
1346 | } | 1346 | } |
1347 | } | 1347 | } |
1348 | spin_unlock_irq(&rcvq->lock); | 1348 | spin_unlock_bh(&rcvq->lock); |
1349 | 1349 | ||
1350 | /* nothing to see, move along */ | 1350 | /* nothing to see, move along */ |
1351 | if (skb == NULL) | 1351 | if (skb == NULL) |
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index a93f6dc51979..0e5f7499debb 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c | |||
@@ -535,10 +535,12 @@ release: | |||
535 | if (err) | 535 | if (err) |
536 | goto done; | 536 | goto done; |
537 | 537 | ||
538 | /* Do not check for fault */ | 538 | if (!freq.flr_label) { |
539 | if (!freq.flr_label) | 539 | if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, |
540 | copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, | 540 | &fl->label, sizeof(fl->label))) { |
541 | &fl->label, sizeof(fl->label)); | 541 | /* Intentionally ignore fault. */ |
542 | } | ||
543 | } | ||
542 | 544 | ||
543 | sfl1->fl = fl; | 545 | sfl1->fl = fl; |
544 | sfl1->next = np->ipv6_fl_list; | 546 | sfl1->next = np->ipv6_fl_list; |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0f0711417c9d..b78a53586804 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -552,13 +552,17 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
552 | skb_headroom(frag) < hlen) | 552 | skb_headroom(frag) < hlen) |
553 | goto slow_path; | 553 | goto slow_path; |
554 | 554 | ||
555 | /* Correct socket ownership. */ | ||
556 | if (frag->sk == NULL) | ||
557 | goto slow_path; | ||
558 | |||
559 | /* Partially cloned skb? */ | 555 | /* Partially cloned skb? */ |
560 | if (skb_shared(frag)) | 556 | if (skb_shared(frag)) |
561 | goto slow_path; | 557 | goto slow_path; |
558 | |||
559 | BUG_ON(frag->sk); | ||
560 | if (skb->sk) { | ||
561 | sock_hold(skb->sk); | ||
562 | frag->sk = skb->sk; | ||
563 | frag->destructor = sock_wfree; | ||
564 | skb->truesize -= frag->truesize; | ||
565 | } | ||
562 | } | 566 | } |
563 | 567 | ||
564 | err = 0; | 568 | err = 0; |
@@ -1116,12 +1120,10 @@ int ip6_push_pending_frames(struct sock *sk) | |||
1116 | tail_skb = &(tmp_skb->next); | 1120 | tail_skb = &(tmp_skb->next); |
1117 | skb->len += tmp_skb->len; | 1121 | skb->len += tmp_skb->len; |
1118 | skb->data_len += tmp_skb->len; | 1122 | skb->data_len += tmp_skb->len; |
1119 | #if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */ | ||
1120 | skb->truesize += tmp_skb->truesize; | 1123 | skb->truesize += tmp_skb->truesize; |
1121 | __sock_put(tmp_skb->sk); | 1124 | __sock_put(tmp_skb->sk); |
1122 | tmp_skb->destructor = NULL; | 1125 | tmp_skb->destructor = NULL; |
1123 | tmp_skb->sk = NULL; | 1126 | tmp_skb->sk = NULL; |
1124 | #endif | ||
1125 | } | 1127 | } |
1126 | 1128 | ||
1127 | ipv6_addr_copy(final_dst, &fl->fl6_dst); | 1129 | ipv6_addr_copy(final_dst, &fl->fl6_dst); |
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 2f4c91ddc9a3..5ade5a5d1990 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c | |||
@@ -37,5 +37,4 @@ EXPORT_SYMBOL(in6_dev_finish_destroy); | |||
37 | EXPORT_SYMBOL(xfrm6_rcv); | 37 | EXPORT_SYMBOL(xfrm6_rcv); |
38 | #endif | 38 | #endif |
39 | EXPORT_SYMBOL(rt6_lookup); | 39 | EXPORT_SYMBOL(rt6_lookup); |
40 | EXPORT_SYMBOL(fl6_sock_lookup); | ||
41 | EXPORT_SYMBOL(ipv6_push_nfrag_opts); | 40 | EXPORT_SYMBOL(ipv6_push_nfrag_opts); |
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 601a148f60f3..6b9867717d11 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c | |||
@@ -84,6 +84,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) | |||
84 | mtu = IPV6_MIN_MTU; | 84 | mtu = IPV6_MIN_MTU; |
85 | 85 | ||
86 | if (skb->len > mtu) { | 86 | if (skb->len > mtu) { |
87 | skb->dev = dst->dev; | ||
87 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); | 88 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); |
88 | ret = -EMSGSIZE; | 89 | ret = -EMSGSIZE; |
89 | } | 90 | } |
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4429b1a1fe5f..cf1d91e74c82 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c | |||
@@ -113,6 +113,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int | |||
113 | 113 | ||
114 | xdst = (struct xfrm_dst *)dst1; | 114 | xdst = (struct xfrm_dst *)dst1; |
115 | xdst->route = &rt->u.dst; | 115 | xdst->route = &rt->u.dst; |
116 | if (rt->rt6i_node) | ||
117 | xdst->route_cookie = rt->rt6i_node->fn_sernum; | ||
116 | 118 | ||
117 | dst1->next = dst_prev; | 119 | dst1->next = dst_prev; |
118 | dst_prev = dst1; | 120 | dst_prev = dst1; |
@@ -137,6 +139,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int | |||
137 | 139 | ||
138 | dst_prev->child = &rt->u.dst; | 140 | dst_prev->child = &rt->u.dst; |
139 | dst->path = &rt->u.dst; | 141 | dst->path = &rt->u.dst; |
142 | if (rt->rt6i_node) | ||
143 | ((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum; | ||
140 | 144 | ||
141 | *dst_p = dst; | 145 | *dst_p = dst; |
142 | dst = dst_prev; | 146 | dst = dst_prev; |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 733bf52cef3e..e41ce458c2a9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -735,11 +735,15 @@ static inline int do_one_broadcast(struct sock *sk, | |||
735 | 735 | ||
736 | sock_hold(sk); | 736 | sock_hold(sk); |
737 | if (p->skb2 == NULL) { | 737 | if (p->skb2 == NULL) { |
738 | if (atomic_read(&p->skb->users) != 1) { | 738 | if (skb_shared(p->skb)) { |
739 | p->skb2 = skb_clone(p->skb, p->allocation); | 739 | p->skb2 = skb_clone(p->skb, p->allocation); |
740 | } else { | 740 | } else { |
741 | p->skb2 = p->skb; | 741 | p->skb2 = skb_get(p->skb); |
742 | atomic_inc(&p->skb->users); | 742 | /* |
743 | * skb ownership may have been set when | ||
744 | * delivered to a previous socket. | ||
745 | */ | ||
746 | skb_orphan(p->skb2); | ||
743 | } | 747 | } |
744 | } | 748 | } |
745 | if (p->skb2 == NULL) { | 749 | if (p->skb2 == NULL) { |
@@ -785,11 +789,12 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, | |||
785 | sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) | 789 | sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) |
786 | do_one_broadcast(sk, &info); | 790 | do_one_broadcast(sk, &info); |
787 | 791 | ||
792 | kfree_skb(skb); | ||
793 | |||
788 | netlink_unlock_table(); | 794 | netlink_unlock_table(); |
789 | 795 | ||
790 | if (info.skb2) | 796 | if (info.skb2) |
791 | kfree_skb(info.skb2); | 797 | kfree_skb(info.skb2); |
792 | kfree_skb(skb); | ||
793 | 798 | ||
794 | if (info.delivered) { | 799 | if (info.delivered) { |
795 | if (info.congested && (allocation & __GFP_WAIT)) | 800 | if (info.congested && (allocation & __GFP_WAIT)) |
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 8a3db9d95bab..d8bd2a569c7c 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <asm/byteorder.h> | 18 | #include <asm/byteorder.h> |
19 | 19 | ||
20 | 20 | ||
21 | #if 1 /* control */ | 21 | #if 0 /* control */ |
22 | #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) | 22 | #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) |
23 | #else | 23 | #else |
24 | #define DPRINTK(format,args...) | 24 | #define DPRINTK(format,args...) |
@@ -73,8 +73,13 @@ static int dsmark_graft(struct Qdisc *sch,unsigned long arg, | |||
73 | 73 | ||
74 | DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new, | 74 | DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new, |
75 | old); | 75 | old); |
76 | if (!new) | 76 | |
77 | new = &noop_qdisc; | 77 | if (new == NULL) { |
78 | new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); | ||
79 | if (new == NULL) | ||
80 | new = &noop_qdisc; | ||
81 | } | ||
82 | |||
78 | sch_tree_lock(sch); | 83 | sch_tree_lock(sch); |
79 | *old = xchg(&p->q,new); | 84 | *old = xchg(&p->q,new); |
80 | if (*old) | 85 | if (*old) |
@@ -163,14 +168,15 @@ static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker) | |||
163 | return; | 168 | return; |
164 | for (i = 0; i < p->indices; i++) { | 169 | for (i = 0; i < p->indices; i++) { |
165 | if (p->mask[i] == 0xff && !p->value[i]) | 170 | if (p->mask[i] == 0xff && !p->value[i]) |
166 | continue; | 171 | goto ignore; |
167 | if (walker->count >= walker->skip) { | 172 | if (walker->count >= walker->skip) { |
168 | if (walker->fn(sch, i+1, walker) < 0) { | 173 | if (walker->fn(sch, i+1, walker) < 0) { |
169 | walker->stop = 1; | 174 | walker->stop = 1; |
170 | break; | 175 | break; |
171 | } | 176 | } |
172 | } | 177 | } |
173 | walker->count++; | 178 | ignore: |
179 | walker->count++; | ||
174 | } | 180 | } |
175 | } | 181 | } |
176 | 182 | ||
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index e0c9fbe73b15..bb9bf8d5003c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -53,7 +53,6 @@ | |||
53 | 53 | ||
54 | struct netem_sched_data { | 54 | struct netem_sched_data { |
55 | struct Qdisc *qdisc; | 55 | struct Qdisc *qdisc; |
56 | struct sk_buff_head delayed; | ||
57 | struct timer_list timer; | 56 | struct timer_list timer; |
58 | 57 | ||
59 | u32 latency; | 58 | u32 latency; |
@@ -63,11 +62,12 @@ struct netem_sched_data { | |||
63 | u32 gap; | 62 | u32 gap; |
64 | u32 jitter; | 63 | u32 jitter; |
65 | u32 duplicate; | 64 | u32 duplicate; |
65 | u32 reorder; | ||
66 | 66 | ||
67 | struct crndstate { | 67 | struct crndstate { |
68 | unsigned long last; | 68 | unsigned long last; |
69 | unsigned long rho; | 69 | unsigned long rho; |
70 | } delay_cor, loss_cor, dup_cor; | 70 | } delay_cor, loss_cor, dup_cor, reorder_cor; |
71 | 71 | ||
72 | struct disttable { | 72 | struct disttable { |
73 | u32 size; | 73 | u32 size; |
@@ -137,122 +137,68 @@ static long tabledist(unsigned long mu, long sigma, | |||
137 | return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; | 137 | return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; |
138 | } | 138 | } |
139 | 139 | ||
140 | /* Put skb in the private delayed queue. */ | ||
141 | static int netem_delay(struct Qdisc *sch, struct sk_buff *skb) | ||
142 | { | ||
143 | struct netem_sched_data *q = qdisc_priv(sch); | ||
144 | psched_tdiff_t td; | ||
145 | psched_time_t now; | ||
146 | |||
147 | PSCHED_GET_TIME(now); | ||
148 | td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); | ||
149 | |||
150 | /* Always queue at tail to keep packets in order */ | ||
151 | if (likely(q->delayed.qlen < q->limit)) { | ||
152 | struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; | ||
153 | |||
154 | PSCHED_TADD2(now, td, cb->time_to_send); | ||
155 | |||
156 | pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb, | ||
157 | now, cb->time_to_send); | ||
158 | |||
159 | __skb_queue_tail(&q->delayed, skb); | ||
160 | return NET_XMIT_SUCCESS; | ||
161 | } | ||
162 | |||
163 | pr_debug("netem_delay: queue over limit %d\n", q->limit); | ||
164 | sch->qstats.overlimits++; | ||
165 | kfree_skb(skb); | ||
166 | return NET_XMIT_DROP; | ||
167 | } | ||
168 | |||
169 | /* | 140 | /* |
170 | * Move a packet that is ready to send from the delay holding | 141 | * Insert one skb into qdisc. |
171 | * list to the underlying qdisc. | 142 | * Note: parent depends on return value to account for queue length. |
143 | * NET_XMIT_DROP: queue length didn't change. | ||
144 | * NET_XMIT_SUCCESS: one skb was queued. | ||
172 | */ | 145 | */ |
173 | static int netem_run(struct Qdisc *sch) | ||
174 | { | ||
175 | struct netem_sched_data *q = qdisc_priv(sch); | ||
176 | struct sk_buff *skb; | ||
177 | psched_time_t now; | ||
178 | |||
179 | PSCHED_GET_TIME(now); | ||
180 | |||
181 | skb = skb_peek(&q->delayed); | ||
182 | if (skb) { | ||
183 | const struct netem_skb_cb *cb | ||
184 | = (const struct netem_skb_cb *)skb->cb; | ||
185 | long delay | ||
186 | = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); | ||
187 | pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); | ||
188 | |||
189 | /* if more time remaining? */ | ||
190 | if (delay > 0) { | ||
191 | mod_timer(&q->timer, jiffies + delay); | ||
192 | return 1; | ||
193 | } | ||
194 | |||
195 | __skb_unlink(skb, &q->delayed); | ||
196 | |||
197 | if (q->qdisc->enqueue(skb, q->qdisc)) { | ||
198 | sch->q.qlen--; | ||
199 | sch->qstats.drops++; | ||
200 | } | ||
201 | } | ||
202 | |||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 146 | static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
207 | { | 147 | { |
208 | struct netem_sched_data *q = qdisc_priv(sch); | 148 | struct netem_sched_data *q = qdisc_priv(sch); |
149 | struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; | ||
150 | struct sk_buff *skb2; | ||
209 | int ret; | 151 | int ret; |
152 | int count = 1; | ||
210 | 153 | ||
211 | pr_debug("netem_enqueue skb=%p\n", skb); | 154 | pr_debug("netem_enqueue skb=%p\n", skb); |
212 | 155 | ||
156 | /* Random duplication */ | ||
157 | if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) | ||
158 | ++count; | ||
159 | |||
213 | /* Random packet drop 0 => none, ~0 => all */ | 160 | /* Random packet drop 0 => none, ~0 => all */ |
214 | if (q->loss && q->loss >= get_crandom(&q->loss_cor)) { | 161 | if (q->loss && q->loss >= get_crandom(&q->loss_cor)) |
215 | pr_debug("netem_enqueue: random loss\n"); | 162 | --count; |
163 | |||
164 | if (count == 0) { | ||
216 | sch->qstats.drops++; | 165 | sch->qstats.drops++; |
217 | kfree_skb(skb); | 166 | kfree_skb(skb); |
218 | return 0; /* lie about loss so TCP doesn't know */ | 167 | return NET_XMIT_DROP; |
219 | } | 168 | } |
220 | 169 | ||
221 | /* Random duplication */ | 170 | /* |
222 | if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) { | 171 | * If we need to duplicate packet, then re-insert at top of the |
223 | struct sk_buff *skb2; | 172 | * qdisc tree, since parent queuer expects that only one |
224 | 173 | * skb will be queued. | |
225 | skb2 = skb_clone(skb, GFP_ATOMIC); | 174 | */ |
226 | if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) { | 175 | if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { |
227 | struct Qdisc *qp; | 176 | struct Qdisc *rootq = sch->dev->qdisc; |
228 | 177 | u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ | |
229 | /* Since one packet can generate two packets in the | 178 | q->duplicate = 0; |
230 | * queue, the parent's qlen accounting gets confused, | 179 | |
231 | * so fix it. | 180 | rootq->enqueue(skb2, rootq); |
232 | */ | 181 | q->duplicate = dupsave; |
233 | qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent)); | ||
234 | if (qp) | ||
235 | qp->q.qlen++; | ||
236 | |||
237 | sch->q.qlen++; | ||
238 | sch->bstats.bytes += skb2->len; | ||
239 | sch->bstats.packets++; | ||
240 | } else | ||
241 | sch->qstats.drops++; | ||
242 | } | 182 | } |
243 | 183 | ||
244 | /* If doing simple delay then gap == 0 so all packets | 184 | if (q->gap == 0 /* not doing reordering */ |
245 | * go into the delayed holding queue | 185 | || q->counter < q->gap /* inside last reordering gap */ |
246 | * otherwise if doing out of order only "1 out of gap" | 186 | || q->reorder < get_crandom(&q->reorder_cor)) { |
247 | * packets will be delayed. | 187 | psched_time_t now; |
248 | */ | 188 | PSCHED_GET_TIME(now); |
249 | if (q->counter < q->gap) { | 189 | PSCHED_TADD2(now, tabledist(q->latency, q->jitter, |
190 | &q->delay_cor, q->delay_dist), | ||
191 | cb->time_to_send); | ||
250 | ++q->counter; | 192 | ++q->counter; |
251 | ret = q->qdisc->enqueue(skb, q->qdisc); | 193 | ret = q->qdisc->enqueue(skb, q->qdisc); |
252 | } else { | 194 | } else { |
195 | /* | ||
196 | * Do re-ordering by putting one out of N packets at the front | ||
197 | * of the queue. | ||
198 | */ | ||
199 | PSCHED_GET_TIME(cb->time_to_send); | ||
253 | q->counter = 0; | 200 | q->counter = 0; |
254 | ret = netem_delay(sch, skb); | 201 | ret = q->qdisc->ops->requeue(skb, q->qdisc); |
255 | netem_run(sch); | ||
256 | } | 202 | } |
257 | 203 | ||
258 | if (likely(ret == NET_XMIT_SUCCESS)) { | 204 | if (likely(ret == NET_XMIT_SUCCESS)) { |
@@ -296,22 +242,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
296 | { | 242 | { |
297 | struct netem_sched_data *q = qdisc_priv(sch); | 243 | struct netem_sched_data *q = qdisc_priv(sch); |
298 | struct sk_buff *skb; | 244 | struct sk_buff *skb; |
299 | int pending; | ||
300 | |||
301 | pending = netem_run(sch); | ||
302 | 245 | ||
303 | skb = q->qdisc->dequeue(q->qdisc); | 246 | skb = q->qdisc->dequeue(q->qdisc); |
304 | if (skb) { | 247 | if (skb) { |
305 | pr_debug("netem_dequeue: return skb=%p\n", skb); | 248 | const struct netem_skb_cb *cb |
306 | sch->q.qlen--; | 249 | = (const struct netem_skb_cb *)skb->cb; |
307 | sch->flags &= ~TCQ_F_THROTTLED; | 250 | psched_time_t now; |
308 | } | 251 | long delay; |
309 | else if (pending) { | 252 | |
310 | pr_debug("netem_dequeue: throttling\n"); | 253 | /* if more time remaining? */ |
254 | PSCHED_GET_TIME(now); | ||
255 | delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); | ||
256 | pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); | ||
257 | if (delay <= 0) { | ||
258 | pr_debug("netem_dequeue: return skb=%p\n", skb); | ||
259 | sch->q.qlen--; | ||
260 | sch->flags &= ~TCQ_F_THROTTLED; | ||
261 | return skb; | ||
262 | } | ||
263 | |||
264 | mod_timer(&q->timer, jiffies + delay); | ||
311 | sch->flags |= TCQ_F_THROTTLED; | 265 | sch->flags |= TCQ_F_THROTTLED; |
312 | } | ||
313 | 266 | ||
314 | return skb; | 267 | if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) |
268 | sch->qstats.drops++; | ||
269 | } | ||
270 | |||
271 | return NULL; | ||
315 | } | 272 | } |
316 | 273 | ||
317 | static void netem_watchdog(unsigned long arg) | 274 | static void netem_watchdog(unsigned long arg) |
@@ -328,8 +285,6 @@ static void netem_reset(struct Qdisc *sch) | |||
328 | struct netem_sched_data *q = qdisc_priv(sch); | 285 | struct netem_sched_data *q = qdisc_priv(sch); |
329 | 286 | ||
330 | qdisc_reset(q->qdisc); | 287 | qdisc_reset(q->qdisc); |
331 | skb_queue_purge(&q->delayed); | ||
332 | |||
333 | sch->q.qlen = 0; | 288 | sch->q.qlen = 0; |
334 | sch->flags &= ~TCQ_F_THROTTLED; | 289 | sch->flags &= ~TCQ_F_THROTTLED; |
335 | del_timer_sync(&q->timer); | 290 | del_timer_sync(&q->timer); |
@@ -397,6 +352,19 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr) | |||
397 | return 0; | 352 | return 0; |
398 | } | 353 | } |
399 | 354 | ||
355 | static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) | ||
356 | { | ||
357 | struct netem_sched_data *q = qdisc_priv(sch); | ||
358 | const struct tc_netem_reorder *r = RTA_DATA(attr); | ||
359 | |||
360 | if (RTA_PAYLOAD(attr) != sizeof(*r)) | ||
361 | return -EINVAL; | ||
362 | |||
363 | q->reorder = r->probability; | ||
364 | init_crandom(&q->reorder_cor, r->correlation); | ||
365 | return 0; | ||
366 | } | ||
367 | |||
400 | static int netem_change(struct Qdisc *sch, struct rtattr *opt) | 368 | static int netem_change(struct Qdisc *sch, struct rtattr *opt) |
401 | { | 369 | { |
402 | struct netem_sched_data *q = qdisc_priv(sch); | 370 | struct netem_sched_data *q = qdisc_priv(sch); |
@@ -417,9 +385,15 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) | |||
417 | q->jitter = qopt->jitter; | 385 | q->jitter = qopt->jitter; |
418 | q->limit = qopt->limit; | 386 | q->limit = qopt->limit; |
419 | q->gap = qopt->gap; | 387 | q->gap = qopt->gap; |
388 | q->counter = 0; | ||
420 | q->loss = qopt->loss; | 389 | q->loss = qopt->loss; |
421 | q->duplicate = qopt->duplicate; | 390 | q->duplicate = qopt->duplicate; |
422 | 391 | ||
392 | /* for compatiablity with earlier versions. | ||
393 | * if gap is set, need to assume 100% probablity | ||
394 | */ | ||
395 | q->reorder = ~0; | ||
396 | |||
423 | /* Handle nested options after initial queue options. | 397 | /* Handle nested options after initial queue options. |
424 | * Should have put all options in nested format but too late now. | 398 | * Should have put all options in nested format but too late now. |
425 | */ | 399 | */ |
@@ -441,6 +415,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) | |||
441 | if (ret) | 415 | if (ret) |
442 | return ret; | 416 | return ret; |
443 | } | 417 | } |
418 | if (tb[TCA_NETEM_REORDER-1]) { | ||
419 | ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); | ||
420 | if (ret) | ||
421 | return ret; | ||
422 | } | ||
444 | } | 423 | } |
445 | 424 | ||
446 | 425 | ||
@@ -455,11 +434,9 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) | |||
455 | if (!opt) | 434 | if (!opt) |
456 | return -EINVAL; | 435 | return -EINVAL; |
457 | 436 | ||
458 | skb_queue_head_init(&q->delayed); | ||
459 | init_timer(&q->timer); | 437 | init_timer(&q->timer); |
460 | q->timer.function = netem_watchdog; | 438 | q->timer.function = netem_watchdog; |
461 | q->timer.data = (unsigned long) sch; | 439 | q->timer.data = (unsigned long) sch; |
462 | q->counter = 0; | ||
463 | 440 | ||
464 | q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); | 441 | q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); |
465 | if (!q->qdisc) { | 442 | if (!q->qdisc) { |
@@ -491,6 +468,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
491 | struct rtattr *rta = (struct rtattr *) b; | 468 | struct rtattr *rta = (struct rtattr *) b; |
492 | struct tc_netem_qopt qopt; | 469 | struct tc_netem_qopt qopt; |
493 | struct tc_netem_corr cor; | 470 | struct tc_netem_corr cor; |
471 | struct tc_netem_reorder reorder; | ||
494 | 472 | ||
495 | qopt.latency = q->latency; | 473 | qopt.latency = q->latency; |
496 | qopt.jitter = q->jitter; | 474 | qopt.jitter = q->jitter; |
@@ -504,6 +482,11 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
504 | cor.loss_corr = q->loss_cor.rho; | 482 | cor.loss_corr = q->loss_cor.rho; |
505 | cor.dup_corr = q->dup_cor.rho; | 483 | cor.dup_corr = q->dup_cor.rho; |
506 | RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); | 484 | RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); |
485 | |||
486 | reorder.probability = q->reorder; | ||
487 | reorder.correlation = q->reorder_cor.rho; | ||
488 | RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); | ||
489 | |||
507 | rta->rta_len = skb->tail - b; | 490 | rta->rta_len = skb->tail - b; |
508 | 491 | ||
509 | return skb->len; | 492 | return skb->len; |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c478fc8db776..c420eba4876b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -770,33 +770,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
770 | err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); | 770 | err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); |
771 | if (err) | 771 | if (err) |
772 | goto out_mknod_parent; | 772 | goto out_mknod_parent; |
773 | /* | 773 | |
774 | * Yucky last component or no last component at all? | 774 | dentry = lookup_create(&nd, 0); |
775 | * (foo/., foo/.., /////) | ||
776 | */ | ||
777 | err = -EEXIST; | ||
778 | if (nd.last_type != LAST_NORM) | ||
779 | goto out_mknod; | ||
780 | /* | ||
781 | * Lock the directory. | ||
782 | */ | ||
783 | down(&nd.dentry->d_inode->i_sem); | ||
784 | /* | ||
785 | * Do the final lookup. | ||
786 | */ | ||
787 | dentry = lookup_hash(&nd.last, nd.dentry); | ||
788 | err = PTR_ERR(dentry); | 775 | err = PTR_ERR(dentry); |
789 | if (IS_ERR(dentry)) | 776 | if (IS_ERR(dentry)) |
790 | goto out_mknod_unlock; | 777 | goto out_mknod_unlock; |
791 | err = -ENOENT; | 778 | |
792 | /* | ||
793 | * Special case - lookup gave negative, but... we had foo/bar/ | ||
794 | * From the vfs_mknod() POV we just have a negative dentry - | ||
795 | * all is fine. Let's be bastards - you had / on the end, you've | ||
796 | * been asking for (non-existent) directory. -ENOENT for you. | ||
797 | */ | ||
798 | if (nd.last.name[nd.last.len] && !dentry->d_inode) | ||
799 | goto out_mknod_dput; | ||
800 | /* | 779 | /* |
801 | * All right, let's create it. | 780 | * All right, let's create it. |
802 | */ | 781 | */ |
@@ -845,7 +824,6 @@ out_mknod_dput: | |||
845 | dput(dentry); | 824 | dput(dentry); |
846 | out_mknod_unlock: | 825 | out_mknod_unlock: |
847 | up(&nd.dentry->d_inode->i_sem); | 826 | up(&nd.dentry->d_inode->i_sem); |
848 | out_mknod: | ||
849 | path_release(&nd); | 827 | path_release(&nd); |
850 | out_mknod_parent: | 828 | out_mknod_parent: |
851 | if (err==-EEXIST) | 829 | if (err==-EEXIST) |
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 080aae243ce0..2f4531fcaca2 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c | |||
@@ -698,7 +698,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) | |||
698 | return -ENOMEM; | 698 | return -ENOMEM; |
699 | 699 | ||
700 | if (skb1->sk) | 700 | if (skb1->sk) |
701 | skb_set_owner_w(skb, skb1->sk); | 701 | skb_set_owner_w(skb2, skb1->sk); |
702 | 702 | ||
703 | /* Looking around. Are we still alive? | 703 | /* Looking around. Are we still alive? |
704 | * OK, link new skb, drop old one */ | 704 | * OK, link new skb, drop old one */ |
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 55ed979db144..d07f5ce31824 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c | |||
@@ -1136,7 +1136,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) | |||
1136 | struct xfrm_dst *last; | 1136 | struct xfrm_dst *last; |
1137 | u32 mtu; | 1137 | u32 mtu; |
1138 | 1138 | ||
1139 | if (!dst_check(dst->path, 0) || | 1139 | if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || |
1140 | (dst->dev && !netif_running(dst->dev))) | 1140 | (dst->dev && !netif_running(dst->dev))) |
1141 | return 0; | 1141 | return 0; |
1142 | 1142 | ||
@@ -1156,7 +1156,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) | |||
1156 | xdst->child_mtu_cached = mtu; | 1156 | xdst->child_mtu_cached = mtu; |
1157 | } | 1157 | } |
1158 | 1158 | ||
1159 | if (!dst_check(xdst->route, 0)) | 1159 | if (!dst_check(xdst->route, xdst->route_cookie)) |
1160 | return 0; | 1160 | return 0; |
1161 | mtu = dst_mtu(xdst->route); | 1161 | mtu = dst_mtu(xdst->route); |
1162 | if (xdst->route_mtu_cached != mtu) { | 1162 | if (xdst->route_mtu_cached != mtu) { |
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5ddda2c98af9..97509011c274 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c | |||
@@ -34,14 +34,21 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) | |||
34 | { | 34 | { |
35 | struct rtattr *rt = xfrma[type - 1]; | 35 | struct rtattr *rt = xfrma[type - 1]; |
36 | struct xfrm_algo *algp; | 36 | struct xfrm_algo *algp; |
37 | int len; | ||
37 | 38 | ||
38 | if (!rt) | 39 | if (!rt) |
39 | return 0; | 40 | return 0; |
40 | 41 | ||
41 | if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp)) | 42 | len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp); |
43 | if (len < 0) | ||
42 | return -EINVAL; | 44 | return -EINVAL; |
43 | 45 | ||
44 | algp = RTA_DATA(rt); | 46 | algp = RTA_DATA(rt); |
47 | |||
48 | len -= (algp->alg_key_len + 7U) / 8; | ||
49 | if (len < 0) | ||
50 | return -EINVAL; | ||
51 | |||
45 | switch (type) { | 52 | switch (type) { |
46 | case XFRMA_ALG_AUTH: | 53 | case XFRMA_ALG_AUTH: |
47 | if (!algp->alg_key_len && | 54 | if (!algp->alg_key_len && |
@@ -162,6 +169,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, | |||
162 | struct rtattr *rta = u_arg; | 169 | struct rtattr *rta = u_arg; |
163 | struct xfrm_algo *p, *ualg; | 170 | struct xfrm_algo *p, *ualg; |
164 | struct xfrm_algo_desc *algo; | 171 | struct xfrm_algo_desc *algo; |
172 | int len; | ||
165 | 173 | ||
166 | if (!rta) | 174 | if (!rta) |
167 | return 0; | 175 | return 0; |
@@ -173,11 +181,12 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, | |||
173 | return -ENOSYS; | 181 | return -ENOSYS; |
174 | *props = algo->desc.sadb_alg_id; | 182 | *props = algo->desc.sadb_alg_id; |
175 | 183 | ||
176 | p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL); | 184 | len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8; |
185 | p = kmalloc(len, GFP_KERNEL); | ||
177 | if (!p) | 186 | if (!p) |
178 | return -ENOMEM; | 187 | return -ENOMEM; |
179 | 188 | ||
180 | memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len); | 189 | memcpy(p, ualg, len); |
181 | *algpp = p; | 190 | *algpp = p; |
182 | return 0; | 191 | return 0; |
183 | } | 192 | } |