diff options
Diffstat (limited to 'net')
32 files changed, 1917 insertions, 395 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 7850412f52b7..e47600b4e2e3 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c | |||
@@ -327,10 +327,6 @@ static void vlan_sync_address(struct net_device *dev, | |||
327 | static void vlan_transfer_features(struct net_device *dev, | 327 | static void vlan_transfer_features(struct net_device *dev, |
328 | struct net_device *vlandev) | 328 | struct net_device *vlandev) |
329 | { | 329 | { |
330 | u32 old_features = vlandev->features; | ||
331 | |||
332 | vlandev->features &= ~dev->vlan_features; | ||
333 | vlandev->features |= dev->features & dev->vlan_features; | ||
334 | vlandev->gso_max_size = dev->gso_max_size; | 330 | vlandev->gso_max_size = dev->gso_max_size; |
335 | 331 | ||
336 | if (dev->features & NETIF_F_HW_VLAN_TX) | 332 | if (dev->features & NETIF_F_HW_VLAN_TX) |
@@ -341,8 +337,8 @@ static void vlan_transfer_features(struct net_device *dev, | |||
341 | #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) | 337 | #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) |
342 | vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; | 338 | vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; |
343 | #endif | 339 | #endif |
344 | if (old_features != vlandev->features) | 340 | |
345 | netdev_features_change(vlandev); | 341 | netdev_update_features(vlandev); |
346 | } | 342 | } |
347 | 343 | ||
348 | static void __vlan_device_event(struct net_device *dev, unsigned long event) | 344 | static void __vlan_device_event(struct net_device *dev, unsigned long event) |
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e34ea9e5e28b..b84a46b30c0c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c | |||
@@ -704,8 +704,8 @@ static int vlan_dev_init(struct net_device *dev) | |||
704 | (1<<__LINK_STATE_DORMANT))) | | 704 | (1<<__LINK_STATE_DORMANT))) | |
705 | (1<<__LINK_STATE_PRESENT); | 705 | (1<<__LINK_STATE_PRESENT); |
706 | 706 | ||
707 | dev->features |= real_dev->features & real_dev->vlan_features; | 707 | dev->hw_features = real_dev->vlan_features & NETIF_F_ALL_TX_OFFLOADS; |
708 | dev->features |= NETIF_F_LLTX; | 708 | dev->features |= real_dev->vlan_features | NETIF_F_LLTX; |
709 | dev->gso_max_size = real_dev->gso_max_size; | 709 | dev->gso_max_size = real_dev->gso_max_size; |
710 | 710 | ||
711 | /* ipv6 shared card related stuff */ | 711 | /* ipv6 shared card related stuff */ |
@@ -759,6 +759,17 @@ static void vlan_dev_uninit(struct net_device *dev) | |||
759 | } | 759 | } |
760 | } | 760 | } |
761 | 761 | ||
762 | static u32 vlan_dev_fix_features(struct net_device *dev, u32 features) | ||
763 | { | ||
764 | struct net_device *real_dev = vlan_dev_info(dev)->real_dev; | ||
765 | |||
766 | features &= (real_dev->features | NETIF_F_LLTX); | ||
767 | if (dev_ethtool_get_rx_csum(real_dev)) | ||
768 | features |= NETIF_F_RXCSUM; | ||
769 | |||
770 | return features; | ||
771 | } | ||
772 | |||
762 | static int vlan_ethtool_get_settings(struct net_device *dev, | 773 | static int vlan_ethtool_get_settings(struct net_device *dev, |
763 | struct ethtool_cmd *cmd) | 774 | struct ethtool_cmd *cmd) |
764 | { | 775 | { |
@@ -774,18 +785,6 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, | |||
774 | strcpy(info->fw_version, "N/A"); | 785 | strcpy(info->fw_version, "N/A"); |
775 | } | 786 | } |
776 | 787 | ||
777 | static u32 vlan_ethtool_get_rx_csum(struct net_device *dev) | ||
778 | { | ||
779 | const struct vlan_dev_info *vlan = vlan_dev_info(dev); | ||
780 | return dev_ethtool_get_rx_csum(vlan->real_dev); | ||
781 | } | ||
782 | |||
783 | static u32 vlan_ethtool_get_flags(struct net_device *dev) | ||
784 | { | ||
785 | const struct vlan_dev_info *vlan = vlan_dev_info(dev); | ||
786 | return dev_ethtool_get_flags(vlan->real_dev); | ||
787 | } | ||
788 | |||
789 | static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) | 788 | static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) |
790 | { | 789 | { |
791 | 790 | ||
@@ -823,32 +822,10 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st | |||
823 | return stats; | 822 | return stats; |
824 | } | 823 | } |
825 | 824 | ||
826 | static int vlan_ethtool_set_tso(struct net_device *dev, u32 data) | ||
827 | { | ||
828 | if (data) { | ||
829 | struct net_device *real_dev = vlan_dev_info(dev)->real_dev; | ||
830 | |||
831 | /* Underlying device must support TSO for VLAN-tagged packets | ||
832 | * and must have TSO enabled now. | ||
833 | */ | ||
834 | if (!(real_dev->vlan_features & NETIF_F_TSO)) | ||
835 | return -EOPNOTSUPP; | ||
836 | if (!(real_dev->features & NETIF_F_TSO)) | ||
837 | return -EINVAL; | ||
838 | dev->features |= NETIF_F_TSO; | ||
839 | } else { | ||
840 | dev->features &= ~NETIF_F_TSO; | ||
841 | } | ||
842 | return 0; | ||
843 | } | ||
844 | |||
845 | static const struct ethtool_ops vlan_ethtool_ops = { | 825 | static const struct ethtool_ops vlan_ethtool_ops = { |
846 | .get_settings = vlan_ethtool_get_settings, | 826 | .get_settings = vlan_ethtool_get_settings, |
847 | .get_drvinfo = vlan_ethtool_get_drvinfo, | 827 | .get_drvinfo = vlan_ethtool_get_drvinfo, |
848 | .get_link = ethtool_op_get_link, | 828 | .get_link = ethtool_op_get_link, |
849 | .get_rx_csum = vlan_ethtool_get_rx_csum, | ||
850 | .get_flags = vlan_ethtool_get_flags, | ||
851 | .set_tso = vlan_ethtool_set_tso, | ||
852 | }; | 829 | }; |
853 | 830 | ||
854 | static const struct net_device_ops vlan_netdev_ops = { | 831 | static const struct net_device_ops vlan_netdev_ops = { |
@@ -874,6 +851,7 @@ static const struct net_device_ops vlan_netdev_ops = { | |||
874 | .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, | 851 | .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, |
875 | .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, | 852 | .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, |
876 | #endif | 853 | #endif |
854 | .ndo_fix_features = vlan_dev_fix_features, | ||
877 | }; | 855 | }; |
878 | 856 | ||
879 | void vlan_setup(struct net_device *dev) | 857 | void vlan_setup(struct net_device *dev) |
diff --git a/net/bridge/br.c b/net/bridge/br.c index 84bbb82599b2..f20c4fd915a8 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c | |||
@@ -104,3 +104,4 @@ module_init(br_init) | |||
104 | module_exit(br_deinit) | 104 | module_exit(br_deinit) |
105 | MODULE_LICENSE("GPL"); | 105 | MODULE_LICENSE("GPL"); |
106 | MODULE_VERSION(BR_VERSION); | 106 | MODULE_VERSION(BR_VERSION); |
107 | MODULE_ALIAS_RTNL_LINK("bridge"); | ||
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 21e5901186ea..45cfd54b06d3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c | |||
@@ -74,6 +74,17 @@ out: | |||
74 | return NETDEV_TX_OK; | 74 | return NETDEV_TX_OK; |
75 | } | 75 | } |
76 | 76 | ||
77 | static int br_dev_init(struct net_device *dev) | ||
78 | { | ||
79 | struct net_bridge *br = netdev_priv(dev); | ||
80 | |||
81 | br->stats = alloc_percpu(struct br_cpu_netstats); | ||
82 | if (!br->stats) | ||
83 | return -ENOMEM; | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
77 | static int br_dev_open(struct net_device *dev) | 88 | static int br_dev_open(struct net_device *dev) |
78 | { | 89 | { |
79 | struct net_bridge *br = netdev_priv(dev); | 90 | struct net_bridge *br = netdev_priv(dev); |
@@ -334,6 +345,7 @@ static const struct ethtool_ops br_ethtool_ops = { | |||
334 | static const struct net_device_ops br_netdev_ops = { | 345 | static const struct net_device_ops br_netdev_ops = { |
335 | .ndo_open = br_dev_open, | 346 | .ndo_open = br_dev_open, |
336 | .ndo_stop = br_dev_stop, | 347 | .ndo_stop = br_dev_stop, |
348 | .ndo_init = br_dev_init, | ||
337 | .ndo_start_xmit = br_dev_xmit, | 349 | .ndo_start_xmit = br_dev_xmit, |
338 | .ndo_get_stats64 = br_get_stats64, | 350 | .ndo_get_stats64 = br_get_stats64, |
339 | .ndo_set_mac_address = br_set_mac_address, | 351 | .ndo_set_mac_address = br_set_mac_address, |
@@ -357,18 +369,47 @@ static void br_dev_free(struct net_device *dev) | |||
357 | free_netdev(dev); | 369 | free_netdev(dev); |
358 | } | 370 | } |
359 | 371 | ||
372 | static struct device_type br_type = { | ||
373 | .name = "bridge", | ||
374 | }; | ||
375 | |||
360 | void br_dev_setup(struct net_device *dev) | 376 | void br_dev_setup(struct net_device *dev) |
361 | { | 377 | { |
378 | struct net_bridge *br = netdev_priv(dev); | ||
379 | |||
362 | random_ether_addr(dev->dev_addr); | 380 | random_ether_addr(dev->dev_addr); |
363 | ether_setup(dev); | 381 | ether_setup(dev); |
364 | 382 | ||
365 | dev->netdev_ops = &br_netdev_ops; | 383 | dev->netdev_ops = &br_netdev_ops; |
366 | dev->destructor = br_dev_free; | 384 | dev->destructor = br_dev_free; |
367 | SET_ETHTOOL_OPS(dev, &br_ethtool_ops); | 385 | SET_ETHTOOL_OPS(dev, &br_ethtool_ops); |
386 | SET_NETDEV_DEVTYPE(dev, &br_type); | ||
368 | dev->tx_queue_len = 0; | 387 | dev->tx_queue_len = 0; |
369 | dev->priv_flags = IFF_EBRIDGE; | 388 | dev->priv_flags = IFF_EBRIDGE; |
370 | 389 | ||
371 | dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | | 390 | dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | |
372 | NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | | 391 | NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | |
373 | NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX; | 392 | NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX; |
393 | |||
394 | br->dev = dev; | ||
395 | spin_lock_init(&br->lock); | ||
396 | INIT_LIST_HEAD(&br->port_list); | ||
397 | spin_lock_init(&br->hash_lock); | ||
398 | |||
399 | br->bridge_id.prio[0] = 0x80; | ||
400 | br->bridge_id.prio[1] = 0x00; | ||
401 | |||
402 | memcpy(br->group_addr, br_group_address, ETH_ALEN); | ||
403 | |||
404 | br->feature_mask = dev->features; | ||
405 | br->stp_enabled = BR_NO_STP; | ||
406 | br->designated_root = br->bridge_id; | ||
407 | br->bridge_max_age = br->max_age = 20 * HZ; | ||
408 | br->bridge_hello_time = br->hello_time = 2 * HZ; | ||
409 | br->bridge_forward_delay = br->forward_delay = 15 * HZ; | ||
410 | br->ageing_time = 300 * HZ; | ||
411 | |||
412 | br_netfilter_rtable_init(br); | ||
413 | br_stp_timer_init(br); | ||
414 | br_multicast_init(br); | ||
374 | } | 415 | } |
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index cc4d3c5ab1c6..e0dfbc151dd7 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c | |||
@@ -28,6 +28,7 @@ | |||
28 | static struct kmem_cache *br_fdb_cache __read_mostly; | 28 | static struct kmem_cache *br_fdb_cache __read_mostly; |
29 | static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, | 29 | static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, |
30 | const unsigned char *addr); | 30 | const unsigned char *addr); |
31 | static void fdb_notify(const struct net_bridge_fdb_entry *, int); | ||
31 | 32 | ||
32 | static u32 fdb_salt __read_mostly; | 33 | static u32 fdb_salt __read_mostly; |
33 | 34 | ||
@@ -62,7 +63,7 @@ static inline int has_expired(const struct net_bridge *br, | |||
62 | const struct net_bridge_fdb_entry *fdb) | 63 | const struct net_bridge_fdb_entry *fdb) |
63 | { | 64 | { |
64 | return !fdb->is_static && | 65 | return !fdb->is_static && |
65 | time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); | 66 | time_before_eq(fdb->updated + hold_time(br), jiffies); |
66 | } | 67 | } |
67 | 68 | ||
68 | static inline int br_mac_hash(const unsigned char *mac) | 69 | static inline int br_mac_hash(const unsigned char *mac) |
@@ -81,6 +82,7 @@ static void fdb_rcu_free(struct rcu_head *head) | |||
81 | 82 | ||
82 | static inline void fdb_delete(struct net_bridge_fdb_entry *f) | 83 | static inline void fdb_delete(struct net_bridge_fdb_entry *f) |
83 | { | 84 | { |
85 | fdb_notify(f, RTM_DELNEIGH); | ||
84 | hlist_del_rcu(&f->hlist); | 86 | hlist_del_rcu(&f->hlist); |
85 | call_rcu(&f->rcu, fdb_rcu_free); | 87 | call_rcu(&f->rcu, fdb_rcu_free); |
86 | } | 88 | } |
@@ -140,7 +142,7 @@ void br_fdb_cleanup(unsigned long _data) | |||
140 | unsigned long this_timer; | 142 | unsigned long this_timer; |
141 | if (f->is_static) | 143 | if (f->is_static) |
142 | continue; | 144 | continue; |
143 | this_timer = f->ageing_timer + delay; | 145 | this_timer = f->updated + delay; |
144 | if (time_before_eq(this_timer, jiffies)) | 146 | if (time_before_eq(this_timer, jiffies)) |
145 | fdb_delete(f); | 147 | fdb_delete(f); |
146 | else if (time_before(this_timer, next_timer)) | 148 | else if (time_before(this_timer, next_timer)) |
@@ -293,7 +295,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, | |||
293 | 295 | ||
294 | fe->is_local = f->is_local; | 296 | fe->is_local = f->is_local; |
295 | if (!f->is_static) | 297 | if (!f->is_static) |
296 | fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->ageing_timer); | 298 | fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated); |
297 | ++fe; | 299 | ++fe; |
298 | ++num; | 300 | ++num; |
299 | } | 301 | } |
@@ -305,8 +307,21 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, | |||
305 | return num; | 307 | return num; |
306 | } | 308 | } |
307 | 309 | ||
308 | static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, | 310 | static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, |
309 | const unsigned char *addr) | 311 | const unsigned char *addr) |
312 | { | ||
313 | struct hlist_node *h; | ||
314 | struct net_bridge_fdb_entry *fdb; | ||
315 | |||
316 | hlist_for_each_entry(fdb, h, head, hlist) { | ||
317 | if (!compare_ether_addr(fdb->addr.addr, addr)) | ||
318 | return fdb; | ||
319 | } | ||
320 | return NULL; | ||
321 | } | ||
322 | |||
323 | static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, | ||
324 | const unsigned char *addr) | ||
310 | { | 325 | { |
311 | struct hlist_node *h; | 326 | struct hlist_node *h; |
312 | struct net_bridge_fdb_entry *fdb; | 327 | struct net_bridge_fdb_entry *fdb; |
@@ -320,8 +335,7 @@ static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, | |||
320 | 335 | ||
321 | static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, | 336 | static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, |
322 | struct net_bridge_port *source, | 337 | struct net_bridge_port *source, |
323 | const unsigned char *addr, | 338 | const unsigned char *addr) |
324 | int is_local) | ||
325 | { | 339 | { |
326 | struct net_bridge_fdb_entry *fdb; | 340 | struct net_bridge_fdb_entry *fdb; |
327 | 341 | ||
@@ -329,11 +343,11 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, | |||
329 | if (fdb) { | 343 | if (fdb) { |
330 | memcpy(fdb->addr.addr, addr, ETH_ALEN); | 344 | memcpy(fdb->addr.addr, addr, ETH_ALEN); |
331 | fdb->dst = source; | 345 | fdb->dst = source; |
332 | fdb->is_local = is_local; | 346 | fdb->is_local = 0; |
333 | fdb->is_static = is_local; | 347 | fdb->is_static = 0; |
334 | fdb->ageing_timer = jiffies; | 348 | fdb->updated = fdb->used = jiffies; |
335 | |||
336 | hlist_add_head_rcu(&fdb->hlist, head); | 349 | hlist_add_head_rcu(&fdb->hlist, head); |
350 | fdb_notify(fdb, RTM_NEWNEIGH); | ||
337 | } | 351 | } |
338 | return fdb; | 352 | return fdb; |
339 | } | 353 | } |
@@ -360,12 +374,15 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, | |||
360 | fdb_delete(fdb); | 374 | fdb_delete(fdb); |
361 | } | 375 | } |
362 | 376 | ||
363 | if (!fdb_create(head, source, addr, 1)) | 377 | fdb = fdb_create(head, source, addr); |
378 | if (!fdb) | ||
364 | return -ENOMEM; | 379 | return -ENOMEM; |
365 | 380 | ||
381 | fdb->is_local = fdb->is_static = 1; | ||
366 | return 0; | 382 | return 0; |
367 | } | 383 | } |
368 | 384 | ||
385 | /* Add entry for local address of interface */ | ||
369 | int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, | 386 | int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, |
370 | const unsigned char *addr) | 387 | const unsigned char *addr) |
371 | { | 388 | { |
@@ -392,7 +409,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, | |||
392 | source->state == BR_STATE_FORWARDING)) | 409 | source->state == BR_STATE_FORWARDING)) |
393 | return; | 410 | return; |
394 | 411 | ||
395 | fdb = fdb_find(head, addr); | 412 | fdb = fdb_find_rcu(head, addr); |
396 | if (likely(fdb)) { | 413 | if (likely(fdb)) { |
397 | /* attempt to update an entry for a local interface */ | 414 | /* attempt to update an entry for a local interface */ |
398 | if (unlikely(fdb->is_local)) { | 415 | if (unlikely(fdb->is_local)) { |
@@ -403,15 +420,277 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, | |||
403 | } else { | 420 | } else { |
404 | /* fastpath: update of existing entry */ | 421 | /* fastpath: update of existing entry */ |
405 | fdb->dst = source; | 422 | fdb->dst = source; |
406 | fdb->ageing_timer = jiffies; | 423 | fdb->updated = jiffies; |
407 | } | 424 | } |
408 | } else { | 425 | } else { |
409 | spin_lock(&br->hash_lock); | 426 | spin_lock(&br->hash_lock); |
410 | if (!fdb_find(head, addr)) | 427 | if (likely(!fdb_find(head, addr))) |
411 | fdb_create(head, source, addr, 0); | 428 | fdb_create(head, source, addr); |
429 | |||
412 | /* else we lose race and someone else inserts | 430 | /* else we lose race and someone else inserts |
413 | * it first, don't bother updating | 431 | * it first, don't bother updating |
414 | */ | 432 | */ |
415 | spin_unlock(&br->hash_lock); | 433 | spin_unlock(&br->hash_lock); |
416 | } | 434 | } |
417 | } | 435 | } |
436 | |||
437 | static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) | ||
438 | { | ||
439 | if (fdb->is_local) | ||
440 | return NUD_PERMANENT; | ||
441 | else if (fdb->is_static) | ||
442 | return NUD_NOARP; | ||
443 | else if (has_expired(fdb->dst->br, fdb)) | ||
444 | return NUD_STALE; | ||
445 | else | ||
446 | return NUD_REACHABLE; | ||
447 | } | ||
448 | |||
449 | static int fdb_fill_info(struct sk_buff *skb, | ||
450 | const struct net_bridge_fdb_entry *fdb, | ||
451 | u32 pid, u32 seq, int type, unsigned int flags) | ||
452 | { | ||
453 | unsigned long now = jiffies; | ||
454 | struct nda_cacheinfo ci; | ||
455 | struct nlmsghdr *nlh; | ||
456 | struct ndmsg *ndm; | ||
457 | |||
458 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); | ||
459 | if (nlh == NULL) | ||
460 | return -EMSGSIZE; | ||
461 | |||
462 | |||
463 | ndm = nlmsg_data(nlh); | ||
464 | ndm->ndm_family = AF_BRIDGE; | ||
465 | ndm->ndm_pad1 = 0; | ||
466 | ndm->ndm_pad2 = 0; | ||
467 | ndm->ndm_flags = 0; | ||
468 | ndm->ndm_type = 0; | ||
469 | ndm->ndm_ifindex = fdb->dst->dev->ifindex; | ||
470 | ndm->ndm_state = fdb_to_nud(fdb); | ||
471 | |||
472 | NLA_PUT(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr); | ||
473 | |||
474 | ci.ndm_used = jiffies_to_clock_t(now - fdb->used); | ||
475 | ci.ndm_confirmed = 0; | ||
476 | ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); | ||
477 | ci.ndm_refcnt = 0; | ||
478 | NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); | ||
479 | |||
480 | return nlmsg_end(skb, nlh); | ||
481 | |||
482 | nla_put_failure: | ||
483 | nlmsg_cancel(skb, nlh); | ||
484 | return -EMSGSIZE; | ||
485 | } | ||
486 | |||
487 | static inline size_t fdb_nlmsg_size(void) | ||
488 | { | ||
489 | return NLMSG_ALIGN(sizeof(struct ndmsg)) | ||
490 | + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ | ||
491 | + nla_total_size(sizeof(struct nda_cacheinfo)); | ||
492 | } | ||
493 | |||
494 | static void fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) | ||
495 | { | ||
496 | struct net *net = dev_net(fdb->dst->dev); | ||
497 | struct sk_buff *skb; | ||
498 | int err = -ENOBUFS; | ||
499 | |||
500 | skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); | ||
501 | if (skb == NULL) | ||
502 | goto errout; | ||
503 | |||
504 | err = fdb_fill_info(skb, fdb, 0, 0, type, 0); | ||
505 | if (err < 0) { | ||
506 | /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */ | ||
507 | WARN_ON(err == -EMSGSIZE); | ||
508 | kfree_skb(skb); | ||
509 | goto errout; | ||
510 | } | ||
511 | rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); | ||
512 | return; | ||
513 | errout: | ||
514 | if (err < 0) | ||
515 | rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); | ||
516 | } | ||
517 | |||
518 | /* Dump information about entries, in response to GETNEIGH */ | ||
519 | int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
520 | { | ||
521 | struct net *net = sock_net(skb->sk); | ||
522 | struct net_device *dev; | ||
523 | int idx = 0; | ||
524 | |||
525 | rcu_read_lock(); | ||
526 | for_each_netdev_rcu(net, dev) { | ||
527 | struct net_bridge *br = netdev_priv(dev); | ||
528 | int i; | ||
529 | |||
530 | if (!(dev->priv_flags & IFF_EBRIDGE)) | ||
531 | continue; | ||
532 | |||
533 | for (i = 0; i < BR_HASH_SIZE; i++) { | ||
534 | struct hlist_node *h; | ||
535 | struct net_bridge_fdb_entry *f; | ||
536 | |||
537 | hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { | ||
538 | if (idx < cb->args[0]) | ||
539 | goto skip; | ||
540 | |||
541 | if (fdb_fill_info(skb, f, | ||
542 | NETLINK_CB(cb->skb).pid, | ||
543 | cb->nlh->nlmsg_seq, | ||
544 | RTM_NEWNEIGH, | ||
545 | NLM_F_MULTI) < 0) | ||
546 | break; | ||
547 | skip: | ||
548 | ++idx; | ||
549 | } | ||
550 | } | ||
551 | } | ||
552 | rcu_read_unlock(); | ||
553 | |||
554 | cb->args[0] = idx; | ||
555 | |||
556 | return skb->len; | ||
557 | } | ||
558 | |||
559 | /* Create new static fdb entry */ | ||
560 | static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, | ||
561 | __u16 state) | ||
562 | { | ||
563 | struct net_bridge *br = source->br; | ||
564 | struct hlist_head *head = &br->hash[br_mac_hash(addr)]; | ||
565 | struct net_bridge_fdb_entry *fdb; | ||
566 | |||
567 | fdb = fdb_find(head, addr); | ||
568 | if (fdb) | ||
569 | return -EEXIST; | ||
570 | |||
571 | fdb = fdb_create(head, source, addr); | ||
572 | if (!fdb) | ||
573 | return -ENOMEM; | ||
574 | |||
575 | if (state & NUD_PERMANENT) | ||
576 | fdb->is_local = fdb->is_static = 1; | ||
577 | else if (state & NUD_NOARP) | ||
578 | fdb->is_static = 1; | ||
579 | return 0; | ||
580 | } | ||
581 | |||
582 | /* Add new permanent fdb entry with RTM_NEWNEIGH */ | ||
583 | int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | ||
584 | { | ||
585 | struct net *net = sock_net(skb->sk); | ||
586 | struct ndmsg *ndm; | ||
587 | struct nlattr *tb[NDA_MAX+1]; | ||
588 | struct net_device *dev; | ||
589 | struct net_bridge_port *p; | ||
590 | const __u8 *addr; | ||
591 | int err; | ||
592 | |||
593 | ASSERT_RTNL(); | ||
594 | err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); | ||
595 | if (err < 0) | ||
596 | return err; | ||
597 | |||
598 | ndm = nlmsg_data(nlh); | ||
599 | if (ndm->ndm_ifindex == 0) { | ||
600 | pr_info("bridge: RTM_NEWNEIGH with invalid ifindex\n"); | ||
601 | return -EINVAL; | ||
602 | } | ||
603 | |||
604 | dev = __dev_get_by_index(net, ndm->ndm_ifindex); | ||
605 | if (dev == NULL) { | ||
606 | pr_info("bridge: RTM_NEWNEIGH with unknown ifindex\n"); | ||
607 | return -ENODEV; | ||
608 | } | ||
609 | |||
610 | if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { | ||
611 | pr_info("bridge: RTM_NEWNEIGH with invalid address\n"); | ||
612 | return -EINVAL; | ||
613 | } | ||
614 | |||
615 | addr = nla_data(tb[NDA_LLADDR]); | ||
616 | if (!is_valid_ether_addr(addr)) { | ||
617 | pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n"); | ||
618 | return -EINVAL; | ||
619 | } | ||
620 | |||
621 | p = br_port_get_rtnl(dev); | ||
622 | if (p == NULL) { | ||
623 | pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", | ||
624 | dev->name); | ||
625 | return -EINVAL; | ||
626 | } | ||
627 | |||
628 | spin_lock_bh(&p->br->hash_lock); | ||
629 | err = fdb_add_entry(p, addr, ndm->ndm_state); | ||
630 | spin_unlock_bh(&p->br->hash_lock); | ||
631 | |||
632 | return err; | ||
633 | } | ||
634 | |||
635 | static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) | ||
636 | { | ||
637 | struct net_bridge *br = p->br; | ||
638 | struct hlist_head *head = &br->hash[br_mac_hash(addr)]; | ||
639 | struct net_bridge_fdb_entry *fdb; | ||
640 | |||
641 | fdb = fdb_find(head, addr); | ||
642 | if (!fdb) | ||
643 | return -ENOENT; | ||
644 | |||
645 | fdb_delete(fdb); | ||
646 | return 0; | ||
647 | } | ||
648 | |||
649 | /* Remove neighbor entry with RTM_DELNEIGH */ | ||
650 | int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | ||
651 | { | ||
652 | struct net *net = sock_net(skb->sk); | ||
653 | struct ndmsg *ndm; | ||
654 | struct net_bridge_port *p; | ||
655 | struct nlattr *llattr; | ||
656 | const __u8 *addr; | ||
657 | struct net_device *dev; | ||
658 | int err; | ||
659 | |||
660 | ASSERT_RTNL(); | ||
661 | if (nlmsg_len(nlh) < sizeof(*ndm)) | ||
662 | return -EINVAL; | ||
663 | |||
664 | ndm = nlmsg_data(nlh); | ||
665 | if (ndm->ndm_ifindex == 0) { | ||
666 | pr_info("bridge: RTM_DELNEIGH with invalid ifindex\n"); | ||
667 | return -EINVAL; | ||
668 | } | ||
669 | |||
670 | dev = __dev_get_by_index(net, ndm->ndm_ifindex); | ||
671 | if (dev == NULL) { | ||
672 | pr_info("bridge: RTM_DELNEIGH with unknown ifindex\n"); | ||
673 | return -ENODEV; | ||
674 | } | ||
675 | |||
676 | llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); | ||
677 | if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { | ||
678 | pr_info("bridge: RTM_DELNEIGH with invalid address\n"); | ||
679 | return -EINVAL; | ||
680 | } | ||
681 | |||
682 | addr = nla_data(llattr); | ||
683 | |||
684 | p = br_port_get_rtnl(dev); | ||
685 | if (p == NULL) { | ||
686 | pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", | ||
687 | dev->name); | ||
688 | return -EINVAL; | ||
689 | } | ||
690 | |||
691 | spin_lock_bh(&p->br->hash_lock); | ||
692 | err = fdb_delete_by_addr(p, addr); | ||
693 | spin_unlock_bh(&p->br->hash_lock); | ||
694 | |||
695 | return err; | ||
696 | } | ||
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 718b60366dfe..7f5379c593d9 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c | |||
@@ -175,56 +175,6 @@ static void del_br(struct net_bridge *br, struct list_head *head) | |||
175 | unregister_netdevice_queue(br->dev, head); | 175 | unregister_netdevice_queue(br->dev, head); |
176 | } | 176 | } |
177 | 177 | ||
178 | static struct net_device *new_bridge_dev(struct net *net, const char *name) | ||
179 | { | ||
180 | struct net_bridge *br; | ||
181 | struct net_device *dev; | ||
182 | |||
183 | dev = alloc_netdev(sizeof(struct net_bridge), name, | ||
184 | br_dev_setup); | ||
185 | |||
186 | if (!dev) | ||
187 | return NULL; | ||
188 | dev_net_set(dev, net); | ||
189 | |||
190 | br = netdev_priv(dev); | ||
191 | br->dev = dev; | ||
192 | |||
193 | br->stats = alloc_percpu(struct br_cpu_netstats); | ||
194 | if (!br->stats) { | ||
195 | free_netdev(dev); | ||
196 | return NULL; | ||
197 | } | ||
198 | |||
199 | spin_lock_init(&br->lock); | ||
200 | INIT_LIST_HEAD(&br->port_list); | ||
201 | spin_lock_init(&br->hash_lock); | ||
202 | |||
203 | br->bridge_id.prio[0] = 0x80; | ||
204 | br->bridge_id.prio[1] = 0x00; | ||
205 | |||
206 | memcpy(br->group_addr, br_group_address, ETH_ALEN); | ||
207 | |||
208 | br->feature_mask = dev->features; | ||
209 | br->stp_enabled = BR_NO_STP; | ||
210 | br->designated_root = br->bridge_id; | ||
211 | br->root_path_cost = 0; | ||
212 | br->root_port = 0; | ||
213 | br->bridge_max_age = br->max_age = 20 * HZ; | ||
214 | br->bridge_hello_time = br->hello_time = 2 * HZ; | ||
215 | br->bridge_forward_delay = br->forward_delay = 15 * HZ; | ||
216 | br->topology_change = 0; | ||
217 | br->topology_change_detected = 0; | ||
218 | br->ageing_time = 300 * HZ; | ||
219 | |||
220 | br_netfilter_rtable_init(br); | ||
221 | |||
222 | br_stp_timer_init(br); | ||
223 | br_multicast_init(br); | ||
224 | |||
225 | return dev; | ||
226 | } | ||
227 | |||
228 | /* find an available port number */ | 178 | /* find an available port number */ |
229 | static int find_portno(struct net_bridge *br) | 179 | static int find_portno(struct net_bridge *br) |
230 | { | 180 | { |
@@ -277,42 +227,19 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, | |||
277 | return p; | 227 | return p; |
278 | } | 228 | } |
279 | 229 | ||
280 | static struct device_type br_type = { | ||
281 | .name = "bridge", | ||
282 | }; | ||
283 | |||
284 | int br_add_bridge(struct net *net, const char *name) | 230 | int br_add_bridge(struct net *net, const char *name) |
285 | { | 231 | { |
286 | struct net_device *dev; | 232 | struct net_device *dev; |
287 | int ret; | ||
288 | 233 | ||
289 | dev = new_bridge_dev(net, name); | 234 | dev = alloc_netdev(sizeof(struct net_bridge), name, |
235 | br_dev_setup); | ||
236 | |||
290 | if (!dev) | 237 | if (!dev) |
291 | return -ENOMEM; | 238 | return -ENOMEM; |
292 | 239 | ||
293 | rtnl_lock(); | 240 | dev_net_set(dev, net); |
294 | if (strchr(dev->name, '%')) { | ||
295 | ret = dev_alloc_name(dev, dev->name); | ||
296 | if (ret < 0) | ||
297 | goto out_free; | ||
298 | } | ||
299 | |||
300 | SET_NETDEV_DEVTYPE(dev, &br_type); | ||
301 | |||
302 | ret = register_netdevice(dev); | ||
303 | if (ret) | ||
304 | goto out_free; | ||
305 | |||
306 | ret = br_sysfs_addbr(dev); | ||
307 | if (ret) | ||
308 | unregister_netdevice(dev); | ||
309 | out: | ||
310 | rtnl_unlock(); | ||
311 | return ret; | ||
312 | 241 | ||
313 | out_free: | 242 | return register_netdev(dev); |
314 | free_netdev(dev); | ||
315 | goto out; | ||
316 | } | 243 | } |
317 | 244 | ||
318 | int br_del_bridge(struct net *net, const char *name) | 245 | int br_del_bridge(struct net *net, const char *name) |
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e2160792e1bc..785932d7ad32 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c | |||
@@ -98,9 +98,10 @@ int br_handle_frame_finish(struct sk_buff *skb) | |||
98 | } | 98 | } |
99 | 99 | ||
100 | if (skb) { | 100 | if (skb) { |
101 | if (dst) | 101 | if (dst) { |
102 | dst->used = jiffies; | ||
102 | br_forward(dst->dst, skb, skb2); | 103 | br_forward(dst->dst, skb, skb2); |
103 | else | 104 | } else |
104 | br_flood_forward(br, skb, skb2); | 105 | br_flood_forward(br, skb, skb2); |
105 | } | 106 | } |
106 | 107 | ||
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 3d9fca0e3370..7222fe1d5460 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c | |||
@@ -181,40 +181,19 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) | |||
181 | if (!capable(CAP_NET_ADMIN)) | 181 | if (!capable(CAP_NET_ADMIN)) |
182 | return -EPERM; | 182 | return -EPERM; |
183 | 183 | ||
184 | spin_lock_bh(&br->lock); | 184 | return br_set_forward_delay(br, args[1]); |
185 | br->bridge_forward_delay = clock_t_to_jiffies(args[1]); | ||
186 | if (br_is_root_bridge(br)) | ||
187 | br->forward_delay = br->bridge_forward_delay; | ||
188 | spin_unlock_bh(&br->lock); | ||
189 | return 0; | ||
190 | 185 | ||
191 | case BRCTL_SET_BRIDGE_HELLO_TIME: | 186 | case BRCTL_SET_BRIDGE_HELLO_TIME: |
192 | { | ||
193 | unsigned long t = clock_t_to_jiffies(args[1]); | ||
194 | if (!capable(CAP_NET_ADMIN)) | 187 | if (!capable(CAP_NET_ADMIN)) |
195 | return -EPERM; | 188 | return -EPERM; |
196 | 189 | ||
197 | if (t < HZ) | 190 | return br_set_hello_time(br, args[1]); |
198 | return -EINVAL; | ||
199 | |||
200 | spin_lock_bh(&br->lock); | ||
201 | br->bridge_hello_time = t; | ||
202 | if (br_is_root_bridge(br)) | ||
203 | br->hello_time = br->bridge_hello_time; | ||
204 | spin_unlock_bh(&br->lock); | ||
205 | return 0; | ||
206 | } | ||
207 | 191 | ||
208 | case BRCTL_SET_BRIDGE_MAX_AGE: | 192 | case BRCTL_SET_BRIDGE_MAX_AGE: |
209 | if (!capable(CAP_NET_ADMIN)) | 193 | if (!capable(CAP_NET_ADMIN)) |
210 | return -EPERM; | 194 | return -EPERM; |
211 | 195 | ||
212 | spin_lock_bh(&br->lock); | 196 | return br_set_max_age(br, args[1]); |
213 | br->bridge_max_age = clock_t_to_jiffies(args[1]); | ||
214 | if (br_is_root_bridge(br)) | ||
215 | br->max_age = br->bridge_max_age; | ||
216 | spin_unlock_bh(&br->lock); | ||
217 | return 0; | ||
218 | 197 | ||
219 | case BRCTL_SET_AGEING_TIME: | 198 | case BRCTL_SET_AGEING_TIME: |
220 | if (!capable(CAP_NET_ADMIN)) | 199 | if (!capable(CAP_NET_ADMIN)) |
@@ -275,19 +254,16 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) | |||
275 | case BRCTL_SET_PORT_PRIORITY: | 254 | case BRCTL_SET_PORT_PRIORITY: |
276 | { | 255 | { |
277 | struct net_bridge_port *p; | 256 | struct net_bridge_port *p; |
278 | int ret = 0; | 257 | int ret; |
279 | 258 | ||
280 | if (!capable(CAP_NET_ADMIN)) | 259 | if (!capable(CAP_NET_ADMIN)) |
281 | return -EPERM; | 260 | return -EPERM; |
282 | 261 | ||
283 | if (args[2] >= (1<<(16-BR_PORT_BITS))) | ||
284 | return -ERANGE; | ||
285 | |||
286 | spin_lock_bh(&br->lock); | 262 | spin_lock_bh(&br->lock); |
287 | if ((p = br_get_port(br, args[1])) == NULL) | 263 | if ((p = br_get_port(br, args[1])) == NULL) |
288 | ret = -EINVAL; | 264 | ret = -EINVAL; |
289 | else | 265 | else |
290 | br_stp_set_port_priority(p, args[2]); | 266 | ret = br_stp_set_port_priority(p, args[2]); |
291 | spin_unlock_bh(&br->lock); | 267 | spin_unlock_bh(&br->lock); |
292 | return ret; | 268 | return ret; |
293 | } | 269 | } |
@@ -295,15 +271,17 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) | |||
295 | case BRCTL_SET_PATH_COST: | 271 | case BRCTL_SET_PATH_COST: |
296 | { | 272 | { |
297 | struct net_bridge_port *p; | 273 | struct net_bridge_port *p; |
298 | int ret = 0; | 274 | int ret; |
299 | 275 | ||
300 | if (!capable(CAP_NET_ADMIN)) | 276 | if (!capable(CAP_NET_ADMIN)) |
301 | return -EPERM; | 277 | return -EPERM; |
302 | 278 | ||
279 | spin_lock_bh(&br->lock); | ||
303 | if ((p = br_get_port(br, args[1])) == NULL) | 280 | if ((p = br_get_port(br, args[1])) == NULL) |
304 | ret = -EINVAL; | 281 | ret = -EINVAL; |
305 | else | 282 | else |
306 | br_stp_set_path_cost(p, args[2]); | 283 | ret = br_stp_set_path_cost(p, args[2]); |
284 | spin_unlock_bh(&br->lock); | ||
307 | 285 | ||
308 | return ret; | 286 | return ret; |
309 | } | 287 | } |
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f8bf4c7f842c..134a2ff6b98b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c | |||
@@ -12,9 +12,11 @@ | |||
12 | 12 | ||
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/etherdevice.h> | ||
15 | #include <net/rtnetlink.h> | 16 | #include <net/rtnetlink.h> |
16 | #include <net/net_namespace.h> | 17 | #include <net/net_namespace.h> |
17 | #include <net/sock.h> | 18 | #include <net/sock.h> |
19 | |||
18 | #include "br_private.h" | 20 | #include "br_private.h" |
19 | 21 | ||
20 | static inline size_t br_nlmsg_size(void) | 22 | static inline size_t br_nlmsg_size(void) |
@@ -188,20 +190,61 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
188 | return 0; | 190 | return 0; |
189 | } | 191 | } |
190 | 192 | ||
193 | static int br_validate(struct nlattr *tb[], struct nlattr *data[]) | ||
194 | { | ||
195 | if (tb[IFLA_ADDRESS]) { | ||
196 | if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) | ||
197 | return -EINVAL; | ||
198 | if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) | ||
199 | return -EADDRNOTAVAIL; | ||
200 | } | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static struct rtnl_link_ops br_link_ops __read_mostly = { | ||
206 | .kind = "bridge", | ||
207 | .priv_size = sizeof(struct net_bridge), | ||
208 | .setup = br_dev_setup, | ||
209 | .validate = br_validate, | ||
210 | }; | ||
191 | 211 | ||
192 | int __init br_netlink_init(void) | 212 | int __init br_netlink_init(void) |
193 | { | 213 | { |
194 | if (__rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo)) | 214 | int err; |
195 | return -ENOBUFS; | ||
196 | 215 | ||
197 | /* Only the first call to __rtnl_register can fail */ | 216 | err = rtnl_link_register(&br_link_ops); |
198 | __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL); | 217 | if (err < 0) |
218 | goto err1; | ||
219 | |||
220 | err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo); | ||
221 | if (err) | ||
222 | goto err2; | ||
223 | err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL); | ||
224 | if (err) | ||
225 | goto err3; | ||
226 | err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, br_fdb_add, NULL); | ||
227 | if (err) | ||
228 | goto err3; | ||
229 | err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH, br_fdb_delete, NULL); | ||
230 | if (err) | ||
231 | goto err3; | ||
232 | err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, br_fdb_dump); | ||
233 | if (err) | ||
234 | goto err3; | ||
199 | 235 | ||
200 | return 0; | 236 | return 0; |
237 | |||
238 | err3: | ||
239 | rtnl_unregister_all(PF_BRIDGE); | ||
240 | err2: | ||
241 | rtnl_link_unregister(&br_link_ops); | ||
242 | err1: | ||
243 | return err; | ||
201 | } | 244 | } |
202 | 245 | ||
203 | void __exit br_netlink_fini(void) | 246 | void __exit br_netlink_fini(void) |
204 | { | 247 | { |
248 | rtnl_link_unregister(&br_link_ops); | ||
205 | rtnl_unregister_all(PF_BRIDGE); | 249 | rtnl_unregister_all(PF_BRIDGE); |
206 | } | 250 | } |
207 | |||
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 7d337c9b6082..7a03bb975375 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c | |||
@@ -36,6 +36,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v | |||
36 | struct net_bridge *br; | 36 | struct net_bridge *br; |
37 | int err; | 37 | int err; |
38 | 38 | ||
39 | /* register of bridge completed, add sysfs entries */ | ||
40 | if ((dev->priv_flags && IFF_EBRIDGE) && event == NETDEV_REGISTER) { | ||
41 | br_sysfs_addbr(dev); | ||
42 | return NOTIFY_DONE; | ||
43 | } | ||
44 | |||
39 | /* not a port of a bridge */ | 45 | /* not a port of a bridge */ |
40 | p = br_port_get_rtnl(dev); | 46 | p = br_port_get_rtnl(dev); |
41 | if (!p) | 47 | if (!p) |
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 387013d33745..e2a40343aa09 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h | |||
@@ -64,7 +64,8 @@ struct net_bridge_fdb_entry | |||
64 | struct net_bridge_port *dst; | 64 | struct net_bridge_port *dst; |
65 | 65 | ||
66 | struct rcu_head rcu; | 66 | struct rcu_head rcu; |
67 | unsigned long ageing_timer; | 67 | unsigned long updated; |
68 | unsigned long used; | ||
68 | mac_addr addr; | 69 | mac_addr addr; |
69 | unsigned char is_local; | 70 | unsigned char is_local; |
70 | unsigned char is_static; | 71 | unsigned char is_static; |
@@ -353,6 +354,9 @@ extern int br_fdb_insert(struct net_bridge *br, | |||
353 | extern void br_fdb_update(struct net_bridge *br, | 354 | extern void br_fdb_update(struct net_bridge *br, |
354 | struct net_bridge_port *source, | 355 | struct net_bridge_port *source, |
355 | const unsigned char *addr); | 356 | const unsigned char *addr); |
357 | extern int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb); | ||
358 | extern int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); | ||
359 | extern int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); | ||
356 | 360 | ||
357 | /* br_forward.c */ | 361 | /* br_forward.c */ |
358 | extern void br_deliver(const struct net_bridge_port *to, | 362 | extern void br_deliver(const struct net_bridge_port *to, |
@@ -491,6 +495,11 @@ extern struct net_bridge_port *br_get_port(struct net_bridge *br, | |||
491 | extern void br_init_port(struct net_bridge_port *p); | 495 | extern void br_init_port(struct net_bridge_port *p); |
492 | extern void br_become_designated_port(struct net_bridge_port *p); | 496 | extern void br_become_designated_port(struct net_bridge_port *p); |
493 | 497 | ||
498 | extern int br_set_forward_delay(struct net_bridge *br, unsigned long x); | ||
499 | extern int br_set_hello_time(struct net_bridge *br, unsigned long x); | ||
500 | extern int br_set_max_age(struct net_bridge *br, unsigned long x); | ||
501 | |||
502 | |||
494 | /* br_stp_if.c */ | 503 | /* br_stp_if.c */ |
495 | extern void br_stp_enable_bridge(struct net_bridge *br); | 504 | extern void br_stp_enable_bridge(struct net_bridge *br); |
496 | extern void br_stp_disable_bridge(struct net_bridge *br); | 505 | extern void br_stp_disable_bridge(struct net_bridge *br); |
@@ -501,10 +510,10 @@ extern bool br_stp_recalculate_bridge_id(struct net_bridge *br); | |||
501 | extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); | 510 | extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); |
502 | extern void br_stp_set_bridge_priority(struct net_bridge *br, | 511 | extern void br_stp_set_bridge_priority(struct net_bridge *br, |
503 | u16 newprio); | 512 | u16 newprio); |
504 | extern void br_stp_set_port_priority(struct net_bridge_port *p, | 513 | extern int br_stp_set_port_priority(struct net_bridge_port *p, |
505 | u8 newprio); | 514 | unsigned long newprio); |
506 | extern void br_stp_set_path_cost(struct net_bridge_port *p, | 515 | extern int br_stp_set_path_cost(struct net_bridge_port *p, |
507 | u32 path_cost); | 516 | unsigned long path_cost); |
508 | extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); | 517 | extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); |
509 | 518 | ||
510 | /* br_stp_bpdu.c */ | 519 | /* br_stp_bpdu.c */ |
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h index 8b650f7fbfa0..642ef47a867e 100644 --- a/net/bridge/br_private_stp.h +++ b/net/bridge/br_private_stp.h | |||
@@ -16,6 +16,19 @@ | |||
16 | #define BPDU_TYPE_CONFIG 0 | 16 | #define BPDU_TYPE_CONFIG 0 |
17 | #define BPDU_TYPE_TCN 0x80 | 17 | #define BPDU_TYPE_TCN 0x80 |
18 | 18 | ||
19 | /* IEEE 802.1D-1998 timer values */ | ||
20 | #define BR_MIN_HELLO_TIME (1*HZ) | ||
21 | #define BR_MAX_HELLO_TIME (10*HZ) | ||
22 | |||
23 | #define BR_MIN_FORWARD_DELAY (2*HZ) | ||
24 | #define BR_MAX_FORWARD_DELAY (30*HZ) | ||
25 | |||
26 | #define BR_MIN_MAX_AGE (6*HZ) | ||
27 | #define BR_MAX_MAX_AGE (40*HZ) | ||
28 | |||
29 | #define BR_MIN_PATH_COST 1 | ||
30 | #define BR_MAX_PATH_COST 65535 | ||
31 | |||
19 | struct br_config_bpdu | 32 | struct br_config_bpdu |
20 | { | 33 | { |
21 | unsigned topology_change:1; | 34 | unsigned topology_change:1; |
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 7370d14f634d..bb4383e84de9 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c | |||
@@ -484,3 +484,51 @@ void br_received_tcn_bpdu(struct net_bridge_port *p) | |||
484 | br_topology_change_acknowledge(p); | 484 | br_topology_change_acknowledge(p); |
485 | } | 485 | } |
486 | } | 486 | } |
487 | |||
488 | /* Change bridge STP parameter */ | ||
489 | int br_set_hello_time(struct net_bridge *br, unsigned long val) | ||
490 | { | ||
491 | unsigned long t = clock_t_to_jiffies(val); | ||
492 | |||
493 | if (t < BR_MIN_HELLO_TIME || t > BR_MAX_HELLO_TIME) | ||
494 | return -ERANGE; | ||
495 | |||
496 | spin_lock_bh(&br->lock); | ||
497 | br->bridge_hello_time = t; | ||
498 | if (br_is_root_bridge(br)) | ||
499 | br->hello_time = br->bridge_hello_time; | ||
500 | spin_unlock_bh(&br->lock); | ||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | int br_set_max_age(struct net_bridge *br, unsigned long val) | ||
505 | { | ||
506 | unsigned long t = clock_t_to_jiffies(val); | ||
507 | |||
508 | if (t < BR_MIN_MAX_AGE || t > BR_MAX_MAX_AGE) | ||
509 | return -ERANGE; | ||
510 | |||
511 | spin_lock_bh(&br->lock); | ||
512 | br->bridge_max_age = t; | ||
513 | if (br_is_root_bridge(br)) | ||
514 | br->max_age = br->bridge_max_age; | ||
515 | spin_unlock_bh(&br->lock); | ||
516 | return 0; | ||
517 | |||
518 | } | ||
519 | |||
520 | int br_set_forward_delay(struct net_bridge *br, unsigned long val) | ||
521 | { | ||
522 | unsigned long t = clock_t_to_jiffies(val); | ||
523 | |||
524 | if (br->stp_enabled != BR_NO_STP && | ||
525 | (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)) | ||
526 | return -ERANGE; | ||
527 | |||
528 | spin_lock_bh(&br->lock); | ||
529 | br->bridge_forward_delay = t; | ||
530 | if (br_is_root_bridge(br)) | ||
531 | br->forward_delay = br->bridge_forward_delay; | ||
532 | spin_unlock_bh(&br->lock); | ||
533 | return 0; | ||
534 | } | ||
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 9b61d09de9b9..6f615b8192f4 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c | |||
@@ -20,7 +20,7 @@ | |||
20 | 20 | ||
21 | 21 | ||
22 | /* Port id is composed of priority and port number. | 22 | /* Port id is composed of priority and port number. |
23 | * NB: least significant bits of priority are dropped to | 23 | * NB: some bits of priority are dropped to |
24 | * make room for more ports. | 24 | * make room for more ports. |
25 | */ | 25 | */ |
26 | static inline port_id br_make_port_id(__u8 priority, __u16 port_no) | 26 | static inline port_id br_make_port_id(__u8 priority, __u16 port_no) |
@@ -29,6 +29,8 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) | |||
29 | | (port_no & ((1<<BR_PORT_BITS)-1)); | 29 | | (port_no & ((1<<BR_PORT_BITS)-1)); |
30 | } | 30 | } |
31 | 31 | ||
32 | #define BR_MAX_PORT_PRIORITY ((u16)~0 >> BR_PORT_BITS) | ||
33 | |||
32 | /* called under bridge lock */ | 34 | /* called under bridge lock */ |
33 | void br_init_port(struct net_bridge_port *p) | 35 | void br_init_port(struct net_bridge_port *p) |
34 | { | 36 | { |
@@ -255,10 +257,14 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) | |||
255 | } | 257 | } |
256 | 258 | ||
257 | /* called under bridge lock */ | 259 | /* called under bridge lock */ |
258 | void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) | 260 | int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio) |
259 | { | 261 | { |
260 | port_id new_port_id = br_make_port_id(newprio, p->port_no); | 262 | port_id new_port_id; |
263 | |||
264 | if (newprio > BR_MAX_PORT_PRIORITY) | ||
265 | return -ERANGE; | ||
261 | 266 | ||
267 | new_port_id = br_make_port_id(newprio, p->port_no); | ||
262 | if (br_is_designated_port(p)) | 268 | if (br_is_designated_port(p)) |
263 | p->designated_port = new_port_id; | 269 | p->designated_port = new_port_id; |
264 | 270 | ||
@@ -269,14 +275,21 @@ void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) | |||
269 | br_become_designated_port(p); | 275 | br_become_designated_port(p); |
270 | br_port_state_selection(p->br); | 276 | br_port_state_selection(p->br); |
271 | } | 277 | } |
278 | |||
279 | return 0; | ||
272 | } | 280 | } |
273 | 281 | ||
274 | /* called under bridge lock */ | 282 | /* called under bridge lock */ |
275 | void br_stp_set_path_cost(struct net_bridge_port *p, u32 path_cost) | 283 | int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) |
276 | { | 284 | { |
285 | if (path_cost < BR_MIN_PATH_COST || | ||
286 | path_cost > BR_MAX_PATH_COST) | ||
287 | return -ERANGE; | ||
288 | |||
277 | p->path_cost = path_cost; | 289 | p->path_cost = path_cost; |
278 | br_configuration_update(p->br); | 290 | br_configuration_update(p->br); |
279 | br_port_state_selection(p->br); | 291 | br_port_state_selection(p->br); |
292 | return 0; | ||
280 | } | 293 | } |
281 | 294 | ||
282 | ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id) | 295 | ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id) |
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 5c1e5559ebba..68b893ea8c3a 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c | |||
@@ -43,9 +43,7 @@ static ssize_t store_bridge_parm(struct device *d, | |||
43 | if (endp == buf) | 43 | if (endp == buf) |
44 | return -EINVAL; | 44 | return -EINVAL; |
45 | 45 | ||
46 | spin_lock_bh(&br->lock); | ||
47 | err = (*set)(br, val); | 46 | err = (*set)(br, val); |
48 | spin_unlock_bh(&br->lock); | ||
49 | return err ? err : len; | 47 | return err ? err : len; |
50 | } | 48 | } |
51 | 49 | ||
@@ -57,20 +55,11 @@ static ssize_t show_forward_delay(struct device *d, | |||
57 | return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); | 55 | return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); |
58 | } | 56 | } |
59 | 57 | ||
60 | static int set_forward_delay(struct net_bridge *br, unsigned long val) | ||
61 | { | ||
62 | unsigned long delay = clock_t_to_jiffies(val); | ||
63 | br->forward_delay = delay; | ||
64 | if (br_is_root_bridge(br)) | ||
65 | br->bridge_forward_delay = delay; | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static ssize_t store_forward_delay(struct device *d, | 58 | static ssize_t store_forward_delay(struct device *d, |
70 | struct device_attribute *attr, | 59 | struct device_attribute *attr, |
71 | const char *buf, size_t len) | 60 | const char *buf, size_t len) |
72 | { | 61 | { |
73 | return store_bridge_parm(d, buf, len, set_forward_delay); | 62 | return store_bridge_parm(d, buf, len, br_set_forward_delay); |
74 | } | 63 | } |
75 | static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, | 64 | static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, |
76 | show_forward_delay, store_forward_delay); | 65 | show_forward_delay, store_forward_delay); |
@@ -82,24 +71,11 @@ static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, | |||
82 | jiffies_to_clock_t(to_bridge(d)->hello_time)); | 71 | jiffies_to_clock_t(to_bridge(d)->hello_time)); |
83 | } | 72 | } |
84 | 73 | ||
85 | static int set_hello_time(struct net_bridge *br, unsigned long val) | ||
86 | { | ||
87 | unsigned long t = clock_t_to_jiffies(val); | ||
88 | |||
89 | if (t < HZ) | ||
90 | return -EINVAL; | ||
91 | |||
92 | br->hello_time = t; | ||
93 | if (br_is_root_bridge(br)) | ||
94 | br->bridge_hello_time = t; | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static ssize_t store_hello_time(struct device *d, | 74 | static ssize_t store_hello_time(struct device *d, |
99 | struct device_attribute *attr, const char *buf, | 75 | struct device_attribute *attr, const char *buf, |
100 | size_t len) | 76 | size_t len) |
101 | { | 77 | { |
102 | return store_bridge_parm(d, buf, len, set_hello_time); | 78 | return store_bridge_parm(d, buf, len, br_set_hello_time); |
103 | } | 79 | } |
104 | static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, | 80 | static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, |
105 | store_hello_time); | 81 | store_hello_time); |
@@ -111,19 +87,10 @@ static ssize_t show_max_age(struct device *d, struct device_attribute *attr, | |||
111 | jiffies_to_clock_t(to_bridge(d)->max_age)); | 87 | jiffies_to_clock_t(to_bridge(d)->max_age)); |
112 | } | 88 | } |
113 | 89 | ||
114 | static int set_max_age(struct net_bridge *br, unsigned long val) | ||
115 | { | ||
116 | unsigned long t = clock_t_to_jiffies(val); | ||
117 | br->max_age = t; | ||
118 | if (br_is_root_bridge(br)) | ||
119 | br->bridge_max_age = t; | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | static ssize_t store_max_age(struct device *d, struct device_attribute *attr, | 90 | static ssize_t store_max_age(struct device *d, struct device_attribute *attr, |
124 | const char *buf, size_t len) | 91 | const char *buf, size_t len) |
125 | { | 92 | { |
126 | return store_bridge_parm(d, buf, len, set_max_age); | 93 | return store_bridge_parm(d, buf, len, br_set_max_age); |
127 | } | 94 | } |
128 | static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); | 95 | static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); |
129 | 96 | ||
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index fd5799c9bc8d..6229b62749e8 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c | |||
@@ -23,7 +23,7 @@ | |||
23 | struct brport_attribute { | 23 | struct brport_attribute { |
24 | struct attribute attr; | 24 | struct attribute attr; |
25 | ssize_t (*show)(struct net_bridge_port *, char *); | 25 | ssize_t (*show)(struct net_bridge_port *, char *); |
26 | ssize_t (*store)(struct net_bridge_port *, unsigned long); | 26 | int (*store)(struct net_bridge_port *, unsigned long); |
27 | }; | 27 | }; |
28 | 28 | ||
29 | #define BRPORT_ATTR(_name,_mode,_show,_store) \ | 29 | #define BRPORT_ATTR(_name,_mode,_show,_store) \ |
@@ -38,27 +38,17 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) | |||
38 | { | 38 | { |
39 | return sprintf(buf, "%d\n", p->path_cost); | 39 | return sprintf(buf, "%d\n", p->path_cost); |
40 | } | 40 | } |
41 | static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v) | 41 | |
42 | { | ||
43 | br_stp_set_path_cost(p, v); | ||
44 | return 0; | ||
45 | } | ||
46 | static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, | 42 | static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, |
47 | show_path_cost, store_path_cost); | 43 | show_path_cost, br_stp_set_path_cost); |
48 | 44 | ||
49 | static ssize_t show_priority(struct net_bridge_port *p, char *buf) | 45 | static ssize_t show_priority(struct net_bridge_port *p, char *buf) |
50 | { | 46 | { |
51 | return sprintf(buf, "%d\n", p->priority); | 47 | return sprintf(buf, "%d\n", p->priority); |
52 | } | 48 | } |
53 | static ssize_t store_priority(struct net_bridge_port *p, unsigned long v) | 49 | |
54 | { | ||
55 | if (v >= (1<<(16-BR_PORT_BITS))) | ||
56 | return -ERANGE; | ||
57 | br_stp_set_port_priority(p, v); | ||
58 | return 0; | ||
59 | } | ||
60 | static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, | 50 | static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, |
61 | show_priority, store_priority); | 51 | show_priority, br_stp_set_port_priority); |
62 | 52 | ||
63 | static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) | 53 | static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) |
64 | { | 54 | { |
@@ -136,7 +126,7 @@ static ssize_t show_hold_timer(struct net_bridge_port *p, | |||
136 | } | 126 | } |
137 | static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); | 127 | static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); |
138 | 128 | ||
139 | static ssize_t store_flush(struct net_bridge_port *p, unsigned long v) | 129 | static int store_flush(struct net_bridge_port *p, unsigned long v) |
140 | { | 130 | { |
141 | br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry | 131 | br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry |
142 | return 0; | 132 | return 0; |
@@ -148,7 +138,7 @@ static ssize_t show_hairpin_mode(struct net_bridge_port *p, char *buf) | |||
148 | int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0; | 138 | int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0; |
149 | return sprintf(buf, "%d\n", hairpin_mode); | 139 | return sprintf(buf, "%d\n", hairpin_mode); |
150 | } | 140 | } |
151 | static ssize_t store_hairpin_mode(struct net_bridge_port *p, unsigned long v) | 141 | static int store_hairpin_mode(struct net_bridge_port *p, unsigned long v) |
152 | { | 142 | { |
153 | if (v) | 143 | if (v) |
154 | p->flags |= BR_HAIRPIN_MODE; | 144 | p->flags |= BR_HAIRPIN_MODE; |
@@ -165,7 +155,7 @@ static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) | |||
165 | return sprintf(buf, "%d\n", p->multicast_router); | 155 | return sprintf(buf, "%d\n", p->multicast_router); |
166 | } | 156 | } |
167 | 157 | ||
168 | static ssize_t store_multicast_router(struct net_bridge_port *p, | 158 | static int store_multicast_router(struct net_bridge_port *p, |
169 | unsigned long v) | 159 | unsigned long v) |
170 | { | 160 | { |
171 | return br_multicast_set_port_router(p, v); | 161 | return br_multicast_set_port_router(p, v); |
diff --git a/net/can/af_can.c b/net/can/af_can.c index 733d66f1b05a..a8dcaa49675a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c | |||
@@ -85,7 +85,7 @@ static struct kmem_cache *rcv_cache __read_mostly; | |||
85 | 85 | ||
86 | /* table of registered CAN protocols */ | 86 | /* table of registered CAN protocols */ |
87 | static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; | 87 | static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; |
88 | static DEFINE_SPINLOCK(proto_tab_lock); | 88 | static DEFINE_MUTEX(proto_tab_lock); |
89 | 89 | ||
90 | struct timer_list can_stattimer; /* timer for statistics update */ | 90 | struct timer_list can_stattimer; /* timer for statistics update */ |
91 | struct s_stats can_stats; /* packet statistics */ | 91 | struct s_stats can_stats; /* packet statistics */ |
@@ -115,6 +115,19 @@ static void can_sock_destruct(struct sock *sk) | |||
115 | skb_queue_purge(&sk->sk_receive_queue); | 115 | skb_queue_purge(&sk->sk_receive_queue); |
116 | } | 116 | } |
117 | 117 | ||
118 | static struct can_proto *can_try_module_get(int protocol) | ||
119 | { | ||
120 | struct can_proto *cp; | ||
121 | |||
122 | rcu_read_lock(); | ||
123 | cp = rcu_dereference(proto_tab[protocol]); | ||
124 | if (cp && !try_module_get(cp->prot->owner)) | ||
125 | cp = NULL; | ||
126 | rcu_read_unlock(); | ||
127 | |||
128 | return cp; | ||
129 | } | ||
130 | |||
118 | static int can_create(struct net *net, struct socket *sock, int protocol, | 131 | static int can_create(struct net *net, struct socket *sock, int protocol, |
119 | int kern) | 132 | int kern) |
120 | { | 133 | { |
@@ -130,9 +143,12 @@ static int can_create(struct net *net, struct socket *sock, int protocol, | |||
130 | if (!net_eq(net, &init_net)) | 143 | if (!net_eq(net, &init_net)) |
131 | return -EAFNOSUPPORT; | 144 | return -EAFNOSUPPORT; |
132 | 145 | ||
146 | cp = can_try_module_get(protocol); | ||
147 | |||
133 | #ifdef CONFIG_MODULES | 148 | #ifdef CONFIG_MODULES |
134 | /* try to load protocol module kernel is modular */ | 149 | if (!cp) { |
135 | if (!proto_tab[protocol]) { | 150 | /* try to load protocol module if kernel is modular */ |
151 | |||
136 | err = request_module("can-proto-%d", protocol); | 152 | err = request_module("can-proto-%d", protocol); |
137 | 153 | ||
138 | /* | 154 | /* |
@@ -143,22 +159,18 @@ static int can_create(struct net *net, struct socket *sock, int protocol, | |||
143 | if (err && printk_ratelimit()) | 159 | if (err && printk_ratelimit()) |
144 | printk(KERN_ERR "can: request_module " | 160 | printk(KERN_ERR "can: request_module " |
145 | "(can-proto-%d) failed.\n", protocol); | 161 | "(can-proto-%d) failed.\n", protocol); |
162 | |||
163 | cp = can_try_module_get(protocol); | ||
146 | } | 164 | } |
147 | #endif | 165 | #endif |
148 | 166 | ||
149 | spin_lock(&proto_tab_lock); | ||
150 | cp = proto_tab[protocol]; | ||
151 | if (cp && !try_module_get(cp->prot->owner)) | ||
152 | cp = NULL; | ||
153 | spin_unlock(&proto_tab_lock); | ||
154 | |||
155 | /* check for available protocol and correct usage */ | 167 | /* check for available protocol and correct usage */ |
156 | 168 | ||
157 | if (!cp) | 169 | if (!cp) |
158 | return -EPROTONOSUPPORT; | 170 | return -EPROTONOSUPPORT; |
159 | 171 | ||
160 | if (cp->type != sock->type) { | 172 | if (cp->type != sock->type) { |
161 | err = -EPROTONOSUPPORT; | 173 | err = -EPROTOTYPE; |
162 | goto errout; | 174 | goto errout; |
163 | } | 175 | } |
164 | 176 | ||
@@ -694,15 +706,16 @@ int can_proto_register(struct can_proto *cp) | |||
694 | if (err < 0) | 706 | if (err < 0) |
695 | return err; | 707 | return err; |
696 | 708 | ||
697 | spin_lock(&proto_tab_lock); | 709 | mutex_lock(&proto_tab_lock); |
710 | |||
698 | if (proto_tab[proto]) { | 711 | if (proto_tab[proto]) { |
699 | printk(KERN_ERR "can: protocol %d already registered\n", | 712 | printk(KERN_ERR "can: protocol %d already registered\n", |
700 | proto); | 713 | proto); |
701 | err = -EBUSY; | 714 | err = -EBUSY; |
702 | } else | 715 | } else |
703 | proto_tab[proto] = cp; | 716 | rcu_assign_pointer(proto_tab[proto], cp); |
704 | 717 | ||
705 | spin_unlock(&proto_tab_lock); | 718 | mutex_unlock(&proto_tab_lock); |
706 | 719 | ||
707 | if (err < 0) | 720 | if (err < 0) |
708 | proto_unregister(cp->prot); | 721 | proto_unregister(cp->prot); |
@@ -719,13 +732,12 @@ void can_proto_unregister(struct can_proto *cp) | |||
719 | { | 732 | { |
720 | int proto = cp->protocol; | 733 | int proto = cp->protocol; |
721 | 734 | ||
722 | spin_lock(&proto_tab_lock); | 735 | mutex_lock(&proto_tab_lock); |
723 | if (!proto_tab[proto]) { | 736 | BUG_ON(proto_tab[proto] != cp); |
724 | printk(KERN_ERR "BUG: can: protocol %d is not registered\n", | 737 | rcu_assign_pointer(proto_tab[proto], NULL); |
725 | proto); | 738 | mutex_unlock(&proto_tab_lock); |
726 | } | 739 | |
727 | proto_tab[proto] = NULL; | 740 | synchronize_rcu(); |
728 | spin_unlock(&proto_tab_lock); | ||
729 | 741 | ||
730 | proto_unregister(cp->prot); | 742 | proto_unregister(cp->prot); |
731 | } | 743 | } |
diff --git a/net/core/dev.c b/net/core/dev.c index 956d3b006e8b..95897ff3a76f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -5236,7 +5236,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) | |||
5236 | } | 5236 | } |
5237 | EXPORT_SYMBOL(netdev_fix_features); | 5237 | EXPORT_SYMBOL(netdev_fix_features); |
5238 | 5238 | ||
5239 | void netdev_update_features(struct net_device *dev) | 5239 | int __netdev_update_features(struct net_device *dev) |
5240 | { | 5240 | { |
5241 | u32 features; | 5241 | u32 features; |
5242 | int err = 0; | 5242 | int err = 0; |
@@ -5250,7 +5250,7 @@ void netdev_update_features(struct net_device *dev) | |||
5250 | features = netdev_fix_features(dev, features); | 5250 | features = netdev_fix_features(dev, features); |
5251 | 5251 | ||
5252 | if (dev->features == features) | 5252 | if (dev->features == features) |
5253 | return; | 5253 | return 0; |
5254 | 5254 | ||
5255 | netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", | 5255 | netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", |
5256 | dev->features, features); | 5256 | dev->features, features); |
@@ -5258,12 +5258,23 @@ void netdev_update_features(struct net_device *dev) | |||
5258 | if (dev->netdev_ops->ndo_set_features) | 5258 | if (dev->netdev_ops->ndo_set_features) |
5259 | err = dev->netdev_ops->ndo_set_features(dev, features); | 5259 | err = dev->netdev_ops->ndo_set_features(dev, features); |
5260 | 5260 | ||
5261 | if (!err) | 5261 | if (unlikely(err < 0)) { |
5262 | dev->features = features; | ||
5263 | else if (err < 0) | ||
5264 | netdev_err(dev, | 5262 | netdev_err(dev, |
5265 | "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", | 5263 | "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", |
5266 | err, features, dev->features); | 5264 | err, features, dev->features); |
5265 | return -1; | ||
5266 | } | ||
5267 | |||
5268 | if (!err) | ||
5269 | dev->features = features; | ||
5270 | |||
5271 | return 1; | ||
5272 | } | ||
5273 | |||
5274 | void netdev_update_features(struct net_device *dev) | ||
5275 | { | ||
5276 | if (__netdev_update_features(dev)) | ||
5277 | netdev_features_change(dev); | ||
5267 | } | 5278 | } |
5268 | EXPORT_SYMBOL(netdev_update_features); | 5279 | EXPORT_SYMBOL(netdev_update_features); |
5269 | 5280 | ||
@@ -5414,6 +5425,14 @@ int register_netdevice(struct net_device *dev) | |||
5414 | dev->features &= ~NETIF_F_GSO; | 5425 | dev->features &= ~NETIF_F_GSO; |
5415 | } | 5426 | } |
5416 | 5427 | ||
5428 | /* Turn on no cache copy if HW is doing checksum */ | ||
5429 | dev->hw_features |= NETIF_F_NOCACHE_COPY; | ||
5430 | if ((dev->features & NETIF_F_ALL_CSUM) && | ||
5431 | !(dev->features & NETIF_F_NO_CSUM)) { | ||
5432 | dev->wanted_features |= NETIF_F_NOCACHE_COPY; | ||
5433 | dev->features |= NETIF_F_NOCACHE_COPY; | ||
5434 | } | ||
5435 | |||
5417 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, | 5436 | /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, |
5418 | * vlan_dev_init() will do the dev->features check, so these features | 5437 | * vlan_dev_init() will do the dev->features check, so these features |
5419 | * are enabled only if supported by underlying device. | 5438 | * are enabled only if supported by underlying device. |
@@ -5430,7 +5449,7 @@ int register_netdevice(struct net_device *dev) | |||
5430 | goto err_uninit; | 5449 | goto err_uninit; |
5431 | dev->reg_state = NETREG_REGISTERED; | 5450 | dev->reg_state = NETREG_REGISTERED; |
5432 | 5451 | ||
5433 | netdev_update_features(dev); | 5452 | __netdev_update_features(dev); |
5434 | 5453 | ||
5435 | /* | 5454 | /* |
5436 | * Default initial state at registry is that the | 5455 | * Default initial state at registry is that the |
@@ -6171,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask) | |||
6171 | } | 6190 | } |
6172 | } | 6191 | } |
6173 | 6192 | ||
6193 | /* If device can't no cache copy, don't do for all */ | ||
6194 | if (!(one & NETIF_F_NOCACHE_COPY)) | ||
6195 | all &= ~NETIF_F_NOCACHE_COPY; | ||
6196 | |||
6174 | one |= NETIF_F_ALL_CSUM; | 6197 | one |= NETIF_F_ALL_CSUM; |
6175 | 6198 | ||
6176 | one |= all & NETIF_F_ONE_FOR_ALL; | 6199 | one |= all & NETIF_F_ONE_FOR_ALL; |
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 74ead9eca126..704e176ad3a9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/vmalloc.h> | 22 | #include <linux/vmalloc.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/rtnetlink.h> | ||
25 | #include <linux/sched.h> | ||
24 | 26 | ||
25 | /* | 27 | /* |
26 | * Some useful ethtool_ops methods that're device independent. | 28 | * Some useful ethtool_ops methods that're device independent. |
@@ -317,7 +319,7 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) | |||
317 | 319 | ||
318 | dev->wanted_features &= ~features[0].valid; | 320 | dev->wanted_features &= ~features[0].valid; |
319 | dev->wanted_features |= features[0].valid & features[0].requested; | 321 | dev->wanted_features |= features[0].valid & features[0].requested; |
320 | netdev_update_features(dev); | 322 | __netdev_update_features(dev); |
321 | 323 | ||
322 | if ((dev->wanted_features ^ dev->features) & features[0].valid) | 324 | if ((dev->wanted_features ^ dev->features) & features[0].valid) |
323 | ret |= ETHTOOL_F_WISH; | 325 | ret |= ETHTOOL_F_WISH; |
@@ -359,7 +361,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS | |||
359 | /* NETIF_F_NTUPLE */ "rx-ntuple-filter", | 361 | /* NETIF_F_NTUPLE */ "rx-ntuple-filter", |
360 | /* NETIF_F_RXHASH */ "rx-hashing", | 362 | /* NETIF_F_RXHASH */ "rx-hashing", |
361 | /* NETIF_F_RXCSUM */ "rx-checksum", | 363 | /* NETIF_F_RXCSUM */ "rx-checksum", |
362 | "", | 364 | /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy" |
363 | "", | 365 | "", |
364 | }; | 366 | }; |
365 | 367 | ||
@@ -499,7 +501,7 @@ static int ethtool_set_one_feature(struct net_device *dev, | |||
499 | else | 501 | else |
500 | dev->wanted_features &= ~mask; | 502 | dev->wanted_features &= ~mask; |
501 | 503 | ||
502 | netdev_update_features(dev); | 504 | __netdev_update_features(dev); |
503 | return 0; | 505 | return 0; |
504 | } | 506 | } |
505 | 507 | ||
@@ -551,7 +553,7 @@ int __ethtool_set_flags(struct net_device *dev, u32 data) | |||
551 | dev->wanted_features = | 553 | dev->wanted_features = |
552 | (dev->wanted_features & ~changed) | data; | 554 | (dev->wanted_features & ~changed) | data; |
553 | 555 | ||
554 | netdev_update_features(dev); | 556 | __netdev_update_features(dev); |
555 | 557 | ||
556 | return 0; | 558 | return 0; |
557 | } | 559 | } |
@@ -908,6 +910,9 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, | |||
908 | struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; | 910 | struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; |
909 | int ret; | 911 | int ret; |
910 | 912 | ||
913 | if (!ops->set_rx_ntuple) | ||
914 | return -EOPNOTSUPP; | ||
915 | |||
911 | if (!(dev->features & NETIF_F_NTUPLE)) | 916 | if (!(dev->features & NETIF_F_NTUPLE)) |
912 | return -EINVAL; | 917 | return -EINVAL; |
913 | 918 | ||
@@ -1618,14 +1623,63 @@ out: | |||
1618 | static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) | 1623 | static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) |
1619 | { | 1624 | { |
1620 | struct ethtool_value id; | 1625 | struct ethtool_value id; |
1626 | static bool busy; | ||
1627 | int rc; | ||
1621 | 1628 | ||
1622 | if (!dev->ethtool_ops->phys_id) | 1629 | if (!dev->ethtool_ops->set_phys_id && !dev->ethtool_ops->phys_id) |
1623 | return -EOPNOTSUPP; | 1630 | return -EOPNOTSUPP; |
1624 | 1631 | ||
1632 | if (busy) | ||
1633 | return -EBUSY; | ||
1634 | |||
1625 | if (copy_from_user(&id, useraddr, sizeof(id))) | 1635 | if (copy_from_user(&id, useraddr, sizeof(id))) |
1626 | return -EFAULT; | 1636 | return -EFAULT; |
1627 | 1637 | ||
1628 | return dev->ethtool_ops->phys_id(dev, id.data); | 1638 | if (!dev->ethtool_ops->set_phys_id) |
1639 | /* Do it the old way */ | ||
1640 | return dev->ethtool_ops->phys_id(dev, id.data); | ||
1641 | |||
1642 | rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); | ||
1643 | if (rc && rc != -EINVAL) | ||
1644 | return rc; | ||
1645 | |||
1646 | /* Drop the RTNL lock while waiting, but prevent reentry or | ||
1647 | * removal of the device. | ||
1648 | */ | ||
1649 | busy = true; | ||
1650 | dev_hold(dev); | ||
1651 | rtnl_unlock(); | ||
1652 | |||
1653 | if (rc == 0) { | ||
1654 | /* Driver will handle this itself */ | ||
1655 | schedule_timeout_interruptible( | ||
1656 | id.data ? id.data : MAX_SCHEDULE_TIMEOUT); | ||
1657 | } else { | ||
1658 | /* Driver expects to be called periodically */ | ||
1659 | do { | ||
1660 | rtnl_lock(); | ||
1661 | rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ON); | ||
1662 | rtnl_unlock(); | ||
1663 | if (rc) | ||
1664 | break; | ||
1665 | schedule_timeout_interruptible(HZ / 2); | ||
1666 | |||
1667 | rtnl_lock(); | ||
1668 | rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_OFF); | ||
1669 | rtnl_unlock(); | ||
1670 | if (rc) | ||
1671 | break; | ||
1672 | schedule_timeout_interruptible(HZ / 2); | ||
1673 | } while (!signal_pending(current) && | ||
1674 | (id.data == 0 || --id.data != 0)); | ||
1675 | } | ||
1676 | |||
1677 | rtnl_lock(); | ||
1678 | dev_put(dev); | ||
1679 | busy = false; | ||
1680 | |||
1681 | (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); | ||
1682 | return rc; | ||
1629 | } | 1683 | } |
1630 | 1684 | ||
1631 | static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) | 1685 | static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 451088330bbb..22524716fe70 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <net/arp.h> | 44 | #include <net/arp.h> |
45 | #include <net/ip_fib.h> | 45 | #include <net/ip_fib.h> |
46 | #include <net/rtnetlink.h> | 46 | #include <net/rtnetlink.h> |
47 | #include <net/xfrm.h> | ||
47 | 48 | ||
48 | #ifndef CONFIG_IP_MULTIPLE_TABLES | 49 | #ifndef CONFIG_IP_MULTIPLE_TABLES |
49 | 50 | ||
@@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type); | |||
188 | * - check, that packet arrived from expected physical interface. | 189 | * - check, that packet arrived from expected physical interface. |
189 | * called with rcu_read_lock() | 190 | * called with rcu_read_lock() |
190 | */ | 191 | */ |
191 | int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | 192 | int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, |
192 | struct net_device *dev, __be32 *spec_dst, | 193 | int oif, struct net_device *dev, __be32 *spec_dst, |
193 | u32 *itag, u32 mark) | 194 | u32 *itag) |
194 | { | 195 | { |
195 | struct in_device *in_dev; | 196 | struct in_device *in_dev; |
196 | struct flowi4 fl4; | 197 | struct flowi4 fl4; |
@@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
202 | 203 | ||
203 | fl4.flowi4_oif = 0; | 204 | fl4.flowi4_oif = 0; |
204 | fl4.flowi4_iif = oif; | 205 | fl4.flowi4_iif = oif; |
205 | fl4.flowi4_mark = mark; | ||
206 | fl4.daddr = src; | 206 | fl4.daddr = src; |
207 | fl4.saddr = dst; | 207 | fl4.saddr = dst; |
208 | fl4.flowi4_tos = tos; | 208 | fl4.flowi4_tos = tos; |
@@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
212 | in_dev = __in_dev_get_rcu(dev); | 212 | in_dev = __in_dev_get_rcu(dev); |
213 | if (in_dev) { | 213 | if (in_dev) { |
214 | no_addr = in_dev->ifa_list == NULL; | 214 | no_addr = in_dev->ifa_list == NULL; |
215 | rpf = IN_DEV_RPFILTER(in_dev); | 215 | |
216 | /* Ignore rp_filter for packets protected by IPsec. */ | ||
217 | rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev); | ||
218 | |||
216 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); | 219 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); |
217 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) | 220 | fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; |
218 | fl4.flowi4_mark = 0; | ||
219 | } | 221 | } |
220 | 222 | ||
221 | if (in_dev == NULL) | 223 | if (in_dev == NULL) |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e9013d6c1f51..bde80c450b52 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -126,7 +126,7 @@ struct tnode { | |||
126 | struct work_struct work; | 126 | struct work_struct work; |
127 | struct tnode *tnode_free; | 127 | struct tnode *tnode_free; |
128 | }; | 128 | }; |
129 | struct rt_trie_node *child[0]; | 129 | struct rt_trie_node __rcu *child[0]; |
130 | }; | 130 | }; |
131 | 131 | ||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -151,7 +151,7 @@ struct trie_stat { | |||
151 | }; | 151 | }; |
152 | 152 | ||
153 | struct trie { | 153 | struct trie { |
154 | struct rt_trie_node *trie; | 154 | struct rt_trie_node __rcu *trie; |
155 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 155 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
156 | struct trie_use_stats stats; | 156 | struct trie_use_stats stats; |
157 | #endif | 157 | #endif |
@@ -177,16 +177,29 @@ static const int sync_pages = 128; | |||
177 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 177 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
179 | 179 | ||
180 | static inline struct tnode *node_parent(struct rt_trie_node *node) | 180 | /* |
181 | * caller must hold RTNL | ||
182 | */ | ||
183 | static inline struct tnode *node_parent(const struct rt_trie_node *node) | ||
181 | { | 184 | { |
182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); | 185 | unsigned long parent; |
186 | |||
187 | parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); | ||
188 | |||
189 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); | ||
183 | } | 190 | } |
184 | 191 | ||
185 | static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) | 192 | /* |
193 | * caller must hold RCU read lock or RTNL | ||
194 | */ | ||
195 | static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) | ||
186 | { | 196 | { |
187 | struct tnode *ret = node_parent(node); | 197 | unsigned long parent; |
198 | |||
199 | parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || | ||
200 | lockdep_rtnl_is_held()); | ||
188 | 201 | ||
189 | return rcu_dereference_rtnl(ret); | 202 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); |
190 | } | 203 | } |
191 | 204 | ||
192 | /* Same as rcu_assign_pointer | 205 | /* Same as rcu_assign_pointer |
@@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) | |||
198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); | 211 | node->parent = (unsigned long)ptr | NODE_TYPE(node); |
199 | } | 212 | } |
200 | 213 | ||
201 | static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) | 214 | /* |
215 | * caller must hold RTNL | ||
216 | */ | ||
217 | static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) | ||
202 | { | 218 | { |
203 | BUG_ON(i >= 1U << tn->bits); | 219 | BUG_ON(i >= 1U << tn->bits); |
204 | 220 | ||
205 | return tn->child[i]; | 221 | return rtnl_dereference(tn->child[i]); |
206 | } | 222 | } |
207 | 223 | ||
208 | static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | 224 | /* |
225 | * caller must hold RCU read lock or RTNL | ||
226 | */ | ||
227 | static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) | ||
209 | { | 228 | { |
210 | struct rt_trie_node *ret = tnode_get_child(tn, i); | 229 | BUG_ON(i >= 1U << tn->bits); |
211 | 230 | ||
212 | return rcu_dereference_rtnl(ret); | 231 | return rcu_dereference_rtnl(tn->child[i]); |
213 | } | 232 | } |
214 | 233 | ||
215 | static inline int tnode_child_length(const struct tnode *tn) | 234 | static inline int tnode_child_length(const struct tnode *tn) |
@@ -487,7 +506,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, | |||
487 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, | 506 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
488 | int wasfull) | 507 | int wasfull) |
489 | { | 508 | { |
490 | struct rt_trie_node *chi = tn->child[i]; | 509 | struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); |
491 | int isfull; | 510 | int isfull; |
492 | 511 | ||
493 | BUG_ON(i >= 1<<tn->bits); | 512 | BUG_ON(i >= 1<<tn->bits); |
@@ -665,7 +684,7 @@ one_child: | |||
665 | for (i = 0; i < tnode_child_length(tn); i++) { | 684 | for (i = 0; i < tnode_child_length(tn); i++) { |
666 | struct rt_trie_node *n; | 685 | struct rt_trie_node *n; |
667 | 686 | ||
668 | n = tn->child[i]; | 687 | n = rtnl_dereference(tn->child[i]); |
669 | if (!n) | 688 | if (!n) |
670 | continue; | 689 | continue; |
671 | 690 | ||
@@ -679,6 +698,20 @@ one_child: | |||
679 | return (struct rt_trie_node *) tn; | 698 | return (struct rt_trie_node *) tn; |
680 | } | 699 | } |
681 | 700 | ||
701 | |||
702 | static void tnode_clean_free(struct tnode *tn) | ||
703 | { | ||
704 | int i; | ||
705 | struct tnode *tofree; | ||
706 | |||
707 | for (i = 0; i < tnode_child_length(tn); i++) { | ||
708 | tofree = (struct tnode *)rtnl_dereference(tn->child[i]); | ||
709 | if (tofree) | ||
710 | tnode_free(tofree); | ||
711 | } | ||
712 | tnode_free(tn); | ||
713 | } | ||
714 | |||
682 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 715 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
683 | { | 716 | { |
684 | struct tnode *oldtnode = tn; | 717 | struct tnode *oldtnode = tn; |
@@ -755,8 +788,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
755 | inode = (struct tnode *) node; | 788 | inode = (struct tnode *) node; |
756 | 789 | ||
757 | if (inode->bits == 1) { | 790 | if (inode->bits == 1) { |
758 | put_child(t, tn, 2*i, inode->child[0]); | 791 | put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); |
759 | put_child(t, tn, 2*i+1, inode->child[1]); | 792 | put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); |
760 | 793 | ||
761 | tnode_free_safe(inode); | 794 | tnode_free_safe(inode); |
762 | continue; | 795 | continue; |
@@ -797,8 +830,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
797 | 830 | ||
798 | size = tnode_child_length(left); | 831 | size = tnode_child_length(left); |
799 | for (j = 0; j < size; j++) { | 832 | for (j = 0; j < size; j++) { |
800 | put_child(t, left, j, inode->child[j]); | 833 | put_child(t, left, j, rtnl_dereference(inode->child[j])); |
801 | put_child(t, right, j, inode->child[j + size]); | 834 | put_child(t, right, j, rtnl_dereference(inode->child[j + size])); |
802 | } | 835 | } |
803 | put_child(t, tn, 2*i, resize(t, left)); | 836 | put_child(t, tn, 2*i, resize(t, left)); |
804 | put_child(t, tn, 2*i+1, resize(t, right)); | 837 | put_child(t, tn, 2*i+1, resize(t, right)); |
@@ -808,18 +841,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
808 | tnode_free_safe(oldtnode); | 841 | tnode_free_safe(oldtnode); |
809 | return tn; | 842 | return tn; |
810 | nomem: | 843 | nomem: |
811 | { | 844 | tnode_clean_free(tn); |
812 | int size = tnode_child_length(tn); | 845 | return ERR_PTR(-ENOMEM); |
813 | int j; | ||
814 | |||
815 | for (j = 0; j < size; j++) | ||
816 | if (tn->child[j]) | ||
817 | tnode_free((struct tnode *)tn->child[j]); | ||
818 | |||
819 | tnode_free(tn); | ||
820 | |||
821 | return ERR_PTR(-ENOMEM); | ||
822 | } | ||
823 | } | 846 | } |
824 | 847 | ||
825 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 848 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
@@ -890,18 +913,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
890 | tnode_free_safe(oldtnode); | 913 | tnode_free_safe(oldtnode); |
891 | return tn; | 914 | return tn; |
892 | nomem: | 915 | nomem: |
893 | { | 916 | tnode_clean_free(tn); |
894 | int size = tnode_child_length(tn); | 917 | return ERR_PTR(-ENOMEM); |
895 | int j; | ||
896 | |||
897 | for (j = 0; j < size; j++) | ||
898 | if (tn->child[j]) | ||
899 | tnode_free((struct tnode *)tn->child[j]); | ||
900 | |||
901 | tnode_free(tn); | ||
902 | |||
903 | return ERR_PTR(-ENOMEM); | ||
904 | } | ||
905 | } | 918 | } |
906 | 919 | ||
907 | /* readside must use rcu_read_lock currently dump routines | 920 | /* readside must use rcu_read_lock currently dump routines |
@@ -1033,7 +1046,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1033 | t_key cindex; | 1046 | t_key cindex; |
1034 | 1047 | ||
1035 | pos = 0; | 1048 | pos = 0; |
1036 | n = t->trie; | 1049 | n = rtnl_dereference(t->trie); |
1037 | 1050 | ||
1038 | /* If we point to NULL, stop. Either the tree is empty and we should | 1051 | /* If we point to NULL, stop. Either the tree is empty and we should |
1039 | * just put a new leaf in if, or we have reached an empty child slot, | 1052 | * just put a new leaf in if, or we have reached an empty child slot, |
@@ -1756,7 +1769,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) | |||
1756 | continue; | 1769 | continue; |
1757 | 1770 | ||
1758 | if (IS_LEAF(c)) { | 1771 | if (IS_LEAF(c)) { |
1759 | prefetch(p->child[idx]); | 1772 | prefetch(rcu_dereference_rtnl(p->child[idx])); |
1760 | return (struct leaf *) c; | 1773 | return (struct leaf *) c; |
1761 | } | 1774 | } |
1762 | 1775 | ||
@@ -2272,7 +2285,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2272 | 2285 | ||
2273 | /* walk rest of this hash chain */ | 2286 | /* walk rest of this hash chain */ |
2274 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); | 2287 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); |
2275 | while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { | 2288 | while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { |
2276 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); | 2289 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); |
2277 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); | 2290 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); |
2278 | if (n) | 2291 | if (n) |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6c0b7f4a3d7d..f784608a4c45 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -356,20 +356,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
356 | struct rtable *rt; | 356 | struct rtable *rt; |
357 | const struct inet_request_sock *ireq = inet_rsk(req); | 357 | const struct inet_request_sock *ireq = inet_rsk(req); |
358 | struct ip_options *opt = inet_rsk(req)->opt; | 358 | struct ip_options *opt = inet_rsk(req)->opt; |
359 | struct flowi4 fl4 = { | ||
360 | .flowi4_oif = sk->sk_bound_dev_if, | ||
361 | .flowi4_mark = sk->sk_mark, | ||
362 | .daddr = ((opt && opt->srr) ? | ||
363 | opt->faddr : ireq->rmt_addr), | ||
364 | .saddr = ireq->loc_addr, | ||
365 | .flowi4_tos = RT_CONN_FLAGS(sk), | ||
366 | .flowi4_proto = sk->sk_protocol, | ||
367 | .flowi4_flags = inet_sk_flowi_flags(sk), | ||
368 | .fl4_sport = inet_sk(sk)->inet_sport, | ||
369 | .fl4_dport = ireq->rmt_port, | ||
370 | }; | ||
371 | struct net *net = sock_net(sk); | 359 | struct net *net = sock_net(sk); |
360 | struct flowi4 fl4; | ||
372 | 361 | ||
362 | flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, | ||
363 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, | ||
364 | sk->sk_protocol, inet_sk_flowi_flags(sk), | ||
365 | (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, | ||
366 | ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); | ||
373 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); | 367 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); |
374 | rt = ip_route_output_flow(net, &fl4, sk); | 368 | rt = ip_route_output_flow(net, &fl4, sk); |
375 | if (IS_ERR(rt)) | 369 | if (IS_ERR(rt)) |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 459c011b1d4a..bdad3d60aa82 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -1474,16 +1474,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1474 | } | 1474 | } |
1475 | 1475 | ||
1476 | { | 1476 | { |
1477 | struct flowi4 fl4 = { | 1477 | struct flowi4 fl4; |
1478 | .flowi4_oif = arg->bound_dev_if, | 1478 | |
1479 | .daddr = daddr, | 1479 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, |
1480 | .saddr = rt->rt_spec_dst, | 1480 | RT_TOS(ip_hdr(skb)->tos), |
1481 | .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), | 1481 | RT_SCOPE_UNIVERSE, sk->sk_protocol, |
1482 | .fl4_sport = tcp_hdr(skb)->dest, | 1482 | ip_reply_arg_flowi_flags(arg), |
1483 | .fl4_dport = tcp_hdr(skb)->source, | 1483 | daddr, rt->rt_spec_dst, |
1484 | .flowi4_proto = sk->sk_protocol, | 1484 | tcp_hdr(skb)->source, tcp_hdr(skb)->dest); |
1485 | .flowi4_flags = ip_reply_arg_flowi_flags(arg), | ||
1486 | }; | ||
1487 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | 1485 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
1488 | rt = ip_route_output_key(sock_net(sk), &fl4); | 1486 | rt = ip_route_output_key(sock_net(sk), &fl4); |
1489 | if (IS_ERR(rt)) | 1487 | if (IS_ERR(rt)) |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bceaec42c37d..2b50cc2da90a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -548,17 +548,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
548 | } | 548 | } |
549 | 549 | ||
550 | { | 550 | { |
551 | struct flowi4 fl4 = { | 551 | struct flowi4 fl4; |
552 | .flowi4_oif = ipc.oif, | 552 | |
553 | .flowi4_mark = sk->sk_mark, | 553 | flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, |
554 | .daddr = daddr, | 554 | RT_SCOPE_UNIVERSE, |
555 | .saddr = saddr, | 555 | inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, |
556 | .flowi4_tos = tos, | 556 | FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); |
557 | .flowi4_proto = (inet->hdrincl ? | 557 | |
558 | IPPROTO_RAW : | ||
559 | sk->sk_protocol), | ||
560 | .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, | ||
561 | }; | ||
562 | if (!inet->hdrincl) { | 558 | if (!inet->hdrincl) { |
563 | err = raw_probe_proto_opt(&fl4, msg); | 559 | err = raw_probe_proto_opt(&fl4, msg); |
564 | if (err) | 560 | if (err) |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c1acf69858fd..0e7430c327a7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1871,8 +1871,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1871 | goto e_inval; | 1871 | goto e_inval; |
1872 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 1872 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
1873 | } else { | 1873 | } else { |
1874 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 1874 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, |
1875 | &itag, 0); | 1875 | &itag); |
1876 | if (err < 0) | 1876 | if (err < 0) |
1877 | goto e_err; | 1877 | goto e_err; |
1878 | } | 1878 | } |
@@ -1981,8 +1981,8 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1981 | } | 1981 | } |
1982 | 1982 | ||
1983 | 1983 | ||
1984 | err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), | 1984 | err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), |
1985 | in_dev->dev, &spec_dst, &itag, skb->mark); | 1985 | in_dev->dev, &spec_dst, &itag); |
1986 | if (err < 0) { | 1986 | if (err < 0) { |
1987 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, | 1987 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, |
1988 | saddr); | 1988 | saddr); |
@@ -2150,9 +2150,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2150 | goto brd_input; | 2150 | goto brd_input; |
2151 | 2151 | ||
2152 | if (res.type == RTN_LOCAL) { | 2152 | if (res.type == RTN_LOCAL) { |
2153 | err = fib_validate_source(saddr, daddr, tos, | 2153 | err = fib_validate_source(skb, saddr, daddr, tos, |
2154 | net->loopback_dev->ifindex, | 2154 | net->loopback_dev->ifindex, |
2155 | dev, &spec_dst, &itag, skb->mark); | 2155 | dev, &spec_dst, &itag); |
2156 | if (err < 0) | 2156 | if (err < 0) |
2157 | goto martian_source_keep_err; | 2157 | goto martian_source_keep_err; |
2158 | if (err) | 2158 | if (err) |
@@ -2176,8 +2176,8 @@ brd_input: | |||
2176 | if (ipv4_is_zeronet(saddr)) | 2176 | if (ipv4_is_zeronet(saddr)) |
2177 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 2177 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
2178 | else { | 2178 | else { |
2179 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 2179 | err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, |
2180 | &itag, skb->mark); | 2180 | &itag); |
2181 | if (err < 0) | 2181 | if (err < 0) |
2182 | goto martian_source_keep_err; | 2182 | goto martian_source_keep_err; |
2183 | if (err) | 2183 | if (err) |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8b44c6d2a79b..71e029691908 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
345 | * no easy way to do this. | 345 | * no easy way to do this. |
346 | */ | 346 | */ |
347 | { | 347 | { |
348 | struct flowi4 fl4 = { | 348 | struct flowi4 fl4; |
349 | .flowi4_mark = sk->sk_mark, | 349 | |
350 | .daddr = ((opt && opt->srr) ? | 350 | flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), |
351 | opt->faddr : ireq->rmt_addr), | 351 | RT_SCOPE_UNIVERSE, IPPROTO_TCP, |
352 | .saddr = ireq->loc_addr, | 352 | inet_sk_flowi_flags(sk), |
353 | .flowi4_tos = RT_CONN_FLAGS(sk), | 353 | (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, |
354 | .flowi4_proto = IPPROTO_TCP, | 354 | ireq->loc_addr, th->source, th->dest); |
355 | .flowi4_flags = inet_sk_flowi_flags(sk), | ||
356 | .fl4_sport = th->dest, | ||
357 | .fl4_dport = th->source, | ||
358 | }; | ||
359 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); | 355 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); |
360 | rt = ip_route_output_key(sock_net(sk), &fl4); | 356 | rt = ip_route_output_key(sock_net(sk), &fl4); |
361 | if (IS_ERR(rt)) { | 357 | if (IS_ERR(rt)) { |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b22d45010545..054a59d21eb0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -999,7 +999,8 @@ new_segment: | |||
999 | /* We have some space in skb head. Superb! */ | 999 | /* We have some space in skb head. Superb! */ |
1000 | if (copy > skb_tailroom(skb)) | 1000 | if (copy > skb_tailroom(skb)) |
1001 | copy = skb_tailroom(skb); | 1001 | copy = skb_tailroom(skb); |
1002 | if ((err = skb_add_data(skb, from, copy)) != 0) | 1002 | err = skb_add_data_nocache(sk, skb, from, copy); |
1003 | if (err) | ||
1003 | goto do_fault; | 1004 | goto do_fault; |
1004 | } else { | 1005 | } else { |
1005 | int merge = 0; | 1006 | int merge = 0; |
@@ -1042,8 +1043,8 @@ new_segment: | |||
1042 | 1043 | ||
1043 | /* Time to copy data. We are close to | 1044 | /* Time to copy data. We are close to |
1044 | * the end! */ | 1045 | * the end! */ |
1045 | err = skb_copy_to_page(sk, from, skb, page, | 1046 | err = skb_copy_to_page_nocache(sk, from, skb, |
1046 | off, copy); | 1047 | page, off, copy); |
1047 | if (err) { | 1048 | if (err) { |
1048 | /* If this page was new, give it to the | 1049 | /* If this page was new, give it to the |
1049 | * socket so it does not get leaked. | 1050 | * socket so it does not get leaked. |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f87a8eb76f3b..a15c8fb653af 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -909,20 +909,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
909 | rt = (struct rtable *)sk_dst_check(sk, 0); | 909 | rt = (struct rtable *)sk_dst_check(sk, 0); |
910 | 910 | ||
911 | if (rt == NULL) { | 911 | if (rt == NULL) { |
912 | struct flowi4 fl4 = { | 912 | struct flowi4 fl4; |
913 | .flowi4_oif = ipc.oif, | ||
914 | .flowi4_mark = sk->sk_mark, | ||
915 | .daddr = faddr, | ||
916 | .saddr = saddr, | ||
917 | .flowi4_tos = tos, | ||
918 | .flowi4_proto = sk->sk_protocol, | ||
919 | .flowi4_flags = (inet_sk_flowi_flags(sk) | | ||
920 | FLOWI_FLAG_CAN_SLEEP), | ||
921 | .fl4_sport = inet->inet_sport, | ||
922 | .fl4_dport = dport, | ||
923 | }; | ||
924 | struct net *net = sock_net(sk); | 913 | struct net *net = sock_net(sk); |
925 | 914 | ||
915 | flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, | ||
916 | RT_SCOPE_UNIVERSE, sk->sk_protocol, | ||
917 | inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, | ||
918 | faddr, saddr, dport, inet->inet_sport); | ||
919 | |||
926 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); | 920 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); |
927 | rt = ip_route_output_flow(net, &fl4, sk); | 921 | rt = ip_route_output_flow(net, &fl4, sk); |
928 | if (IS_ERR(rt)) { | 922 | if (IS_ERR(rt)) { |
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index a7a5583d4f68..aeaa2110b699 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -239,6 +239,17 @@ config NET_SCH_CHOKE | |||
239 | To compile this code as a module, choose M here: the | 239 | To compile this code as a module, choose M here: the |
240 | module will be called sch_choke. | 240 | module will be called sch_choke. |
241 | 241 | ||
242 | config NET_SCH_QFQ | ||
243 | tristate "Quick Fair Queueing scheduler (QFQ)" | ||
244 | help | ||
245 | Say Y here if you want to use the Quick Fair Queueing Scheduler (QFQ) | ||
246 | packet scheduling algorithm. | ||
247 | |||
248 | To compile this driver as a module, choose M here: the module | ||
249 | will be called sch_qfq. | ||
250 | |||
251 | If unsure, say N. | ||
252 | |||
242 | config NET_SCH_INGRESS | 253 | config NET_SCH_INGRESS |
243 | tristate "Ingress Qdisc" | 254 | tristate "Ingress Qdisc" |
244 | depends on NET_CLS_ACT | 255 | depends on NET_CLS_ACT |
diff --git a/net/sched/Makefile b/net/sched/Makefile index 2e77b8dba22e..dc5889c0a15a 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile | |||
@@ -35,6 +35,7 @@ obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o | |||
35 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o | 35 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o |
36 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o | 36 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o |
37 | obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o | 37 | obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o |
38 | obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o | ||
38 | 39 | ||
39 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o | 40 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o |
40 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o | 41 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o |
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c new file mode 100644 index 000000000000..103343408593 --- /dev/null +++ b/net/sched/sch_qfq.c | |||
@@ -0,0 +1,1137 @@ | |||
1 | /* | ||
2 | * net/sched/sch_qfq.c Quick Fair Queueing Scheduler. | ||
3 | * | ||
4 | * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * version 2 as published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/bitops.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/netdevice.h> | ||
16 | #include <linux/pkt_sched.h> | ||
17 | #include <net/sch_generic.h> | ||
18 | #include <net/pkt_sched.h> | ||
19 | #include <net/pkt_cls.h> | ||
20 | |||
21 | |||
22 | /* Quick Fair Queueing | ||
23 | =================== | ||
24 | |||
25 | Sources: | ||
26 | |||
27 | Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient | ||
28 | Packet Scheduling with Tight Bandwidth Distribution Guarantees." | ||
29 | |||
30 | See also: | ||
31 | http://retis.sssup.it/~fabio/linux/qfq/ | ||
32 | */ | ||
33 | |||
34 | /* | ||
35 | |||
36 | Virtual time computations. | ||
37 | |||
38 | S, F and V are all computed in fixed point arithmetic with | ||
39 | FRAC_BITS decimal bits. | ||
40 | |||
41 | QFQ_MAX_INDEX is the maximum index allowed for a group. We need | ||
42 | one bit per index. | ||
43 | QFQ_MAX_WSHIFT is the maximum power of two supported as a weight. | ||
44 | |||
45 | The layout of the bits is as below: | ||
46 | |||
47 | [ MTU_SHIFT ][ FRAC_BITS ] | ||
48 | [ MAX_INDEX ][ MIN_SLOT_SHIFT ] | ||
49 | ^.__grp->index = 0 | ||
50 | *.__grp->slot_shift | ||
51 | |||
52 | where MIN_SLOT_SHIFT is derived by difference from the others. | ||
53 | |||
54 | The max group index corresponds to Lmax/w_min, where | ||
55 | Lmax=1<<MTU_SHIFT, w_min = 1 . | ||
56 | From this, and knowing how many groups (MAX_INDEX) we want, | ||
57 | we can derive the shift corresponding to each group. | ||
58 | |||
59 | Because we often need to compute | ||
60 | F = S + len/w_i and V = V + len/wsum | ||
61 | instead of storing w_i store the value | ||
62 | inv_w = (1<<FRAC_BITS)/w_i | ||
63 | so we can do F = S + len * inv_w * wsum. | ||
64 | We use W_TOT in the formulas so we can easily move between | ||
65 | static and adaptive weight sum. | ||
66 | |||
67 | The per-scheduler-instance data contain all the data structures | ||
68 | for the scheduler: bitmaps and bucket lists. | ||
69 | |||
70 | */ | ||
71 | |||
72 | /* | ||
73 | * Maximum number of consecutive slots occupied by backlogged classes | ||
74 | * inside a group. | ||
75 | */ | ||
76 | #define QFQ_MAX_SLOTS 32 | ||
77 | |||
78 | /* | ||
79 | * Shifts used for class<->group mapping. We allow class weights that are | ||
80 | * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the | ||
81 | * group with the smallest index that can support the L_i / r_i configured | ||
82 | * for the class. | ||
83 | * | ||
84 | * grp->index is the index of the group; and grp->slot_shift | ||
85 | * is the shift for the corresponding (scaled) sigma_i. | ||
86 | */ | ||
87 | #define QFQ_MAX_INDEX 19 | ||
88 | #define QFQ_MAX_WSHIFT 16 | ||
89 | |||
90 | #define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT) | ||
91 | #define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT) | ||
92 | |||
93 | #define FRAC_BITS 30 /* fixed point arithmetic */ | ||
94 | #define ONE_FP (1UL << FRAC_BITS) | ||
95 | #define IWSUM (ONE_FP/QFQ_MAX_WSUM) | ||
96 | |||
97 | #define QFQ_MTU_SHIFT 11 | ||
98 | #define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX) | ||
99 | |||
100 | /* | ||
101 | * Possible group states. These values are used as indexes for the bitmaps | ||
102 | * array of struct qfq_queue. | ||
103 | */ | ||
104 | enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE }; | ||
105 | |||
106 | struct qfq_group; | ||
107 | |||
108 | struct qfq_class { | ||
109 | struct Qdisc_class_common common; | ||
110 | |||
111 | unsigned int refcnt; | ||
112 | unsigned int filter_cnt; | ||
113 | |||
114 | struct gnet_stats_basic_packed bstats; | ||
115 | struct gnet_stats_queue qstats; | ||
116 | struct gnet_stats_rate_est rate_est; | ||
117 | struct Qdisc *qdisc; | ||
118 | |||
119 | struct hlist_node next; /* Link for the slot list. */ | ||
120 | u64 S, F; /* flow timestamps (exact) */ | ||
121 | |||
122 | /* group we belong to. In principle we would need the index, | ||
123 | * which is log_2(lmax/weight), but we never reference it | ||
124 | * directly, only the group. | ||
125 | */ | ||
126 | struct qfq_group *grp; | ||
127 | |||
128 | /* these are copied from the flowset. */ | ||
129 | u32 inv_w; /* ONE_FP/weight */ | ||
130 | u32 lmax; /* Max packet size for this flow. */ | ||
131 | }; | ||
132 | |||
133 | struct qfq_group { | ||
134 | u64 S, F; /* group timestamps (approx). */ | ||
135 | unsigned int slot_shift; /* Slot shift. */ | ||
136 | unsigned int index; /* Group index. */ | ||
137 | unsigned int front; /* Index of the front slot. */ | ||
138 | unsigned long full_slots; /* non-empty slots */ | ||
139 | |||
140 | /* Array of RR lists of active classes. */ | ||
141 | struct hlist_head slots[QFQ_MAX_SLOTS]; | ||
142 | }; | ||
143 | |||
144 | struct qfq_sched { | ||
145 | struct tcf_proto *filter_list; | ||
146 | struct Qdisc_class_hash clhash; | ||
147 | |||
148 | u64 V; /* Precise virtual time. */ | ||
149 | u32 wsum; /* weight sum */ | ||
150 | |||
151 | unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ | ||
152 | struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ | ||
153 | }; | ||
154 | |||
155 | static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) | ||
156 | { | ||
157 | struct qfq_sched *q = qdisc_priv(sch); | ||
158 | struct Qdisc_class_common *clc; | ||
159 | |||
160 | clc = qdisc_class_find(&q->clhash, classid); | ||
161 | if (clc == NULL) | ||
162 | return NULL; | ||
163 | return container_of(clc, struct qfq_class, common); | ||
164 | } | ||
165 | |||
166 | static void qfq_purge_queue(struct qfq_class *cl) | ||
167 | { | ||
168 | unsigned int len = cl->qdisc->q.qlen; | ||
169 | |||
170 | qdisc_reset(cl->qdisc); | ||
171 | qdisc_tree_decrease_qlen(cl->qdisc, len); | ||
172 | } | ||
173 | |||
174 | static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { | ||
175 | [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, | ||
176 | [TCA_QFQ_LMAX] = { .type = NLA_U32 }, | ||
177 | }; | ||
178 | |||
179 | /* | ||
180 | * Calculate a flow index, given its weight and maximum packet length. | ||
181 | * index = log_2(maxlen/weight) but we need to apply the scaling. | ||
182 | * This is used only once at flow creation. | ||
183 | */ | ||
184 | static int qfq_calc_index(u32 inv_w, unsigned int maxlen) | ||
185 | { | ||
186 | u64 slot_size = (u64)maxlen * inv_w; | ||
187 | unsigned long size_map; | ||
188 | int index = 0; | ||
189 | |||
190 | size_map = slot_size >> QFQ_MIN_SLOT_SHIFT; | ||
191 | if (!size_map) | ||
192 | goto out; | ||
193 | |||
194 | index = __fls(size_map) + 1; /* basically a log_2 */ | ||
195 | index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1))); | ||
196 | |||
197 | if (index < 0) | ||
198 | index = 0; | ||
199 | out: | ||
200 | pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n", | ||
201 | (unsigned long) ONE_FP/inv_w, maxlen, index); | ||
202 | |||
203 | return index; | ||
204 | } | ||
205 | |||
206 | static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | ||
207 | struct nlattr **tca, unsigned long *arg) | ||
208 | { | ||
209 | struct qfq_sched *q = qdisc_priv(sch); | ||
210 | struct qfq_class *cl = (struct qfq_class *)*arg; | ||
211 | struct nlattr *tb[TCA_QFQ_MAX + 1]; | ||
212 | u32 weight, lmax, inv_w; | ||
213 | int i, err; | ||
214 | |||
215 | if (tca[TCA_OPTIONS] == NULL) { | ||
216 | pr_notice("qfq: no options\n"); | ||
217 | return -EINVAL; | ||
218 | } | ||
219 | |||
220 | err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy); | ||
221 | if (err < 0) | ||
222 | return err; | ||
223 | |||
224 | if (tb[TCA_QFQ_WEIGHT]) { | ||
225 | weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]); | ||
226 | if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) { | ||
227 | pr_notice("qfq: invalid weight %u\n", weight); | ||
228 | return -EINVAL; | ||
229 | } | ||
230 | } else | ||
231 | weight = 1; | ||
232 | |||
233 | inv_w = ONE_FP / weight; | ||
234 | weight = ONE_FP / inv_w; | ||
235 | if (q->wsum + weight > QFQ_MAX_WSUM) { | ||
236 | pr_notice("qfq: total weight out of range (%u + %u)\n", | ||
237 | weight, q->wsum); | ||
238 | return -EINVAL; | ||
239 | } | ||
240 | |||
241 | if (tb[TCA_QFQ_LMAX]) { | ||
242 | lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); | ||
243 | if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) { | ||
244 | pr_notice("qfq: invalid max length %u\n", lmax); | ||
245 | return -EINVAL; | ||
246 | } | ||
247 | } else | ||
248 | lmax = 1UL << QFQ_MTU_SHIFT; | ||
249 | |||
250 | if (cl != NULL) { | ||
251 | if (tca[TCA_RATE]) { | ||
252 | err = gen_replace_estimator(&cl->bstats, &cl->rate_est, | ||
253 | qdisc_root_sleeping_lock(sch), | ||
254 | tca[TCA_RATE]); | ||
255 | if (err) | ||
256 | return err; | ||
257 | } | ||
258 | |||
259 | sch_tree_lock(sch); | ||
260 | if (tb[TCA_QFQ_WEIGHT]) { | ||
261 | q->wsum = weight - ONE_FP / cl->inv_w; | ||
262 | cl->inv_w = inv_w; | ||
263 | } | ||
264 | sch_tree_unlock(sch); | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL); | ||
270 | if (cl == NULL) | ||
271 | return -ENOBUFS; | ||
272 | |||
273 | cl->refcnt = 1; | ||
274 | cl->common.classid = classid; | ||
275 | cl->lmax = lmax; | ||
276 | cl->inv_w = inv_w; | ||
277 | i = qfq_calc_index(cl->inv_w, cl->lmax); | ||
278 | |||
279 | cl->grp = &q->groups[i]; | ||
280 | q->wsum += weight; | ||
281 | |||
282 | cl->qdisc = qdisc_create_dflt(sch->dev_queue, | ||
283 | &pfifo_qdisc_ops, classid); | ||
284 | if (cl->qdisc == NULL) | ||
285 | cl->qdisc = &noop_qdisc; | ||
286 | |||
287 | if (tca[TCA_RATE]) { | ||
288 | err = gen_new_estimator(&cl->bstats, &cl->rate_est, | ||
289 | qdisc_root_sleeping_lock(sch), | ||
290 | tca[TCA_RATE]); | ||
291 | if (err) { | ||
292 | qdisc_destroy(cl->qdisc); | ||
293 | kfree(cl); | ||
294 | return err; | ||
295 | } | ||
296 | } | ||
297 | |||
298 | sch_tree_lock(sch); | ||
299 | qdisc_class_hash_insert(&q->clhash, &cl->common); | ||
300 | sch_tree_unlock(sch); | ||
301 | |||
302 | qdisc_class_hash_grow(sch, &q->clhash); | ||
303 | |||
304 | *arg = (unsigned long)cl; | ||
305 | return 0; | ||
306 | } | ||
307 | |||
308 | static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) | ||
309 | { | ||
310 | struct qfq_sched *q = qdisc_priv(sch); | ||
311 | |||
312 | if (cl->inv_w) { | ||
313 | q->wsum -= ONE_FP / cl->inv_w; | ||
314 | cl->inv_w = 0; | ||
315 | } | ||
316 | |||
317 | gen_kill_estimator(&cl->bstats, &cl->rate_est); | ||
318 | qdisc_destroy(cl->qdisc); | ||
319 | kfree(cl); | ||
320 | } | ||
321 | |||
322 | static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) | ||
323 | { | ||
324 | struct qfq_sched *q = qdisc_priv(sch); | ||
325 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
326 | |||
327 | if (cl->filter_cnt > 0) | ||
328 | return -EBUSY; | ||
329 | |||
330 | sch_tree_lock(sch); | ||
331 | |||
332 | qfq_purge_queue(cl); | ||
333 | qdisc_class_hash_remove(&q->clhash, &cl->common); | ||
334 | |||
335 | BUG_ON(--cl->refcnt == 0); | ||
336 | /* | ||
337 | * This shouldn't happen: we "hold" one cops->get() when called | ||
338 | * from tc_ctl_tclass; the destroy method is done from cops->put(). | ||
339 | */ | ||
340 | |||
341 | sch_tree_unlock(sch); | ||
342 | return 0; | ||
343 | } | ||
344 | |||
345 | static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid) | ||
346 | { | ||
347 | struct qfq_class *cl = qfq_find_class(sch, classid); | ||
348 | |||
349 | if (cl != NULL) | ||
350 | cl->refcnt++; | ||
351 | |||
352 | return (unsigned long)cl; | ||
353 | } | ||
354 | |||
355 | static void qfq_put_class(struct Qdisc *sch, unsigned long arg) | ||
356 | { | ||
357 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
358 | |||
359 | if (--cl->refcnt == 0) | ||
360 | qfq_destroy_class(sch, cl); | ||
361 | } | ||
362 | |||
363 | static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl) | ||
364 | { | ||
365 | struct qfq_sched *q = qdisc_priv(sch); | ||
366 | |||
367 | if (cl) | ||
368 | return NULL; | ||
369 | |||
370 | return &q->filter_list; | ||
371 | } | ||
372 | |||
373 | static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, | ||
374 | u32 classid) | ||
375 | { | ||
376 | struct qfq_class *cl = qfq_find_class(sch, classid); | ||
377 | |||
378 | if (cl != NULL) | ||
379 | cl->filter_cnt++; | ||
380 | |||
381 | return (unsigned long)cl; | ||
382 | } | ||
383 | |||
384 | static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg) | ||
385 | { | ||
386 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
387 | |||
388 | cl->filter_cnt--; | ||
389 | } | ||
390 | |||
391 | static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, | ||
392 | struct Qdisc *new, struct Qdisc **old) | ||
393 | { | ||
394 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
395 | |||
396 | if (new == NULL) { | ||
397 | new = qdisc_create_dflt(sch->dev_queue, | ||
398 | &pfifo_qdisc_ops, cl->common.classid); | ||
399 | if (new == NULL) | ||
400 | new = &noop_qdisc; | ||
401 | } | ||
402 | |||
403 | sch_tree_lock(sch); | ||
404 | qfq_purge_queue(cl); | ||
405 | *old = cl->qdisc; | ||
406 | cl->qdisc = new; | ||
407 | sch_tree_unlock(sch); | ||
408 | return 0; | ||
409 | } | ||
410 | |||
411 | static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg) | ||
412 | { | ||
413 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
414 | |||
415 | return cl->qdisc; | ||
416 | } | ||
417 | |||
418 | static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, | ||
419 | struct sk_buff *skb, struct tcmsg *tcm) | ||
420 | { | ||
421 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
422 | struct nlattr *nest; | ||
423 | |||
424 | tcm->tcm_parent = TC_H_ROOT; | ||
425 | tcm->tcm_handle = cl->common.classid; | ||
426 | tcm->tcm_info = cl->qdisc->handle; | ||
427 | |||
428 | nest = nla_nest_start(skb, TCA_OPTIONS); | ||
429 | if (nest == NULL) | ||
430 | goto nla_put_failure; | ||
431 | NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w); | ||
432 | NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax); | ||
433 | return nla_nest_end(skb, nest); | ||
434 | |||
435 | nla_put_failure: | ||
436 | nla_nest_cancel(skb, nest); | ||
437 | return -EMSGSIZE; | ||
438 | } | ||
439 | |||
440 | static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, | ||
441 | struct gnet_dump *d) | ||
442 | { | ||
443 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
444 | struct tc_qfq_stats xstats; | ||
445 | |||
446 | memset(&xstats, 0, sizeof(xstats)); | ||
447 | cl->qdisc->qstats.qlen = cl->qdisc->q.qlen; | ||
448 | |||
449 | xstats.weight = ONE_FP/cl->inv_w; | ||
450 | xstats.lmax = cl->lmax; | ||
451 | |||
452 | if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || | ||
453 | gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || | ||
454 | gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) | ||
455 | return -1; | ||
456 | |||
457 | return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); | ||
458 | } | ||
459 | |||
460 | static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | ||
461 | { | ||
462 | struct qfq_sched *q = qdisc_priv(sch); | ||
463 | struct qfq_class *cl; | ||
464 | struct hlist_node *n; | ||
465 | unsigned int i; | ||
466 | |||
467 | if (arg->stop) | ||
468 | return; | ||
469 | |||
470 | for (i = 0; i < q->clhash.hashsize; i++) { | ||
471 | hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { | ||
472 | if (arg->count < arg->skip) { | ||
473 | arg->count++; | ||
474 | continue; | ||
475 | } | ||
476 | if (arg->fn(sch, (unsigned long)cl, arg) < 0) { | ||
477 | arg->stop = 1; | ||
478 | return; | ||
479 | } | ||
480 | arg->count++; | ||
481 | } | ||
482 | } | ||
483 | } | ||
484 | |||
485 | static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, | ||
486 | int *qerr) | ||
487 | { | ||
488 | struct qfq_sched *q = qdisc_priv(sch); | ||
489 | struct qfq_class *cl; | ||
490 | struct tcf_result res; | ||
491 | int result; | ||
492 | |||
493 | if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { | ||
494 | pr_debug("qfq_classify: found %d\n", skb->priority); | ||
495 | cl = qfq_find_class(sch, skb->priority); | ||
496 | if (cl != NULL) | ||
497 | return cl; | ||
498 | } | ||
499 | |||
500 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | ||
501 | result = tc_classify(skb, q->filter_list, &res); | ||
502 | if (result >= 0) { | ||
503 | #ifdef CONFIG_NET_CLS_ACT | ||
504 | switch (result) { | ||
505 | case TC_ACT_QUEUED: | ||
506 | case TC_ACT_STOLEN: | ||
507 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | ||
508 | case TC_ACT_SHOT: | ||
509 | return NULL; | ||
510 | } | ||
511 | #endif | ||
512 | cl = (struct qfq_class *)res.class; | ||
513 | if (cl == NULL) | ||
514 | cl = qfq_find_class(sch, res.classid); | ||
515 | return cl; | ||
516 | } | ||
517 | |||
518 | return NULL; | ||
519 | } | ||
520 | |||
521 | /* Generic comparison function, handling wraparound. */ | ||
522 | static inline int qfq_gt(u64 a, u64 b) | ||
523 | { | ||
524 | return (s64)(a - b) > 0; | ||
525 | } | ||
526 | |||
527 | /* Round a precise timestamp to its slotted value. */ | ||
528 | static inline u64 qfq_round_down(u64 ts, unsigned int shift) | ||
529 | { | ||
530 | return ts & ~((1ULL << shift) - 1); | ||
531 | } | ||
532 | |||
533 | /* return the pointer to the group with lowest index in the bitmap */ | ||
534 | static inline struct qfq_group *qfq_ffs(struct qfq_sched *q, | ||
535 | unsigned long bitmap) | ||
536 | { | ||
537 | int index = __ffs(bitmap); | ||
538 | return &q->groups[index]; | ||
539 | } | ||
540 | /* Calculate a mask to mimic what would be ffs_from(). */ | ||
541 | static inline unsigned long mask_from(unsigned long bitmap, int from) | ||
542 | { | ||
543 | return bitmap & ~((1UL << from) - 1); | ||
544 | } | ||
545 | |||
546 | /* | ||
547 | * The state computation relies on ER=0, IR=1, EB=2, IB=3 | ||
548 | * First compute eligibility comparing grp->S, q->V, | ||
549 | * then check if someone is blocking us and possibly add EB | ||
550 | */ | ||
551 | static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp) | ||
552 | { | ||
553 | /* if S > V we are not eligible */ | ||
554 | unsigned int state = qfq_gt(grp->S, q->V); | ||
555 | unsigned long mask = mask_from(q->bitmaps[ER], grp->index); | ||
556 | struct qfq_group *next; | ||
557 | |||
558 | if (mask) { | ||
559 | next = qfq_ffs(q, mask); | ||
560 | if (qfq_gt(grp->F, next->F)) | ||
561 | state |= EB; | ||
562 | } | ||
563 | |||
564 | return state; | ||
565 | } | ||
566 | |||
567 | |||
568 | /* | ||
569 | * In principle | ||
570 | * q->bitmaps[dst] |= q->bitmaps[src] & mask; | ||
571 | * q->bitmaps[src] &= ~mask; | ||
572 | * but we should make sure that src != dst | ||
573 | */ | ||
574 | static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask, | ||
575 | int src, int dst) | ||
576 | { | ||
577 | q->bitmaps[dst] |= q->bitmaps[src] & mask; | ||
578 | q->bitmaps[src] &= ~mask; | ||
579 | } | ||
580 | |||
581 | static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F) | ||
582 | { | ||
583 | unsigned long mask = mask_from(q->bitmaps[ER], index + 1); | ||
584 | struct qfq_group *next; | ||
585 | |||
586 | if (mask) { | ||
587 | next = qfq_ffs(q, mask); | ||
588 | if (!qfq_gt(next->F, old_F)) | ||
589 | return; | ||
590 | } | ||
591 | |||
592 | mask = (1UL << index) - 1; | ||
593 | qfq_move_groups(q, mask, EB, ER); | ||
594 | qfq_move_groups(q, mask, IB, IR); | ||
595 | } | ||
596 | |||
597 | /* | ||
598 | * perhaps | ||
599 | * | ||
600 | old_V ^= q->V; | ||
601 | old_V >>= QFQ_MIN_SLOT_SHIFT; | ||
602 | if (old_V) { | ||
603 | ... | ||
604 | } | ||
605 | * | ||
606 | */ | ||
607 | static void qfq_make_eligible(struct qfq_sched *q, u64 old_V) | ||
608 | { | ||
609 | unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT; | ||
610 | unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT; | ||
611 | |||
612 | if (vslot != old_vslot) { | ||
613 | unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1; | ||
614 | qfq_move_groups(q, mask, IR, ER); | ||
615 | qfq_move_groups(q, mask, IB, EB); | ||
616 | } | ||
617 | } | ||
618 | |||
619 | |||
620 | /* | ||
621 | * XXX we should make sure that slot becomes less than 32. | ||
622 | * This is guaranteed by the input values. | ||
623 | * roundedS is always cl->S rounded on grp->slot_shift bits. | ||
624 | */ | ||
625 | static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, | ||
626 | u64 roundedS) | ||
627 | { | ||
628 | u64 slot = (roundedS - grp->S) >> grp->slot_shift; | ||
629 | unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS; | ||
630 | |||
631 | hlist_add_head(&cl->next, &grp->slots[i]); | ||
632 | __set_bit(slot, &grp->full_slots); | ||
633 | } | ||
634 | |||
635 | /* Maybe introduce hlist_first_entry?? */ | ||
636 | static struct qfq_class *qfq_slot_head(struct qfq_group *grp) | ||
637 | { | ||
638 | return hlist_entry(grp->slots[grp->front].first, | ||
639 | struct qfq_class, next); | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * remove the entry from the slot | ||
644 | */ | ||
645 | static void qfq_front_slot_remove(struct qfq_group *grp) | ||
646 | { | ||
647 | struct qfq_class *cl = qfq_slot_head(grp); | ||
648 | |||
649 | BUG_ON(!cl); | ||
650 | hlist_del(&cl->next); | ||
651 | if (hlist_empty(&grp->slots[grp->front])) | ||
652 | __clear_bit(0, &grp->full_slots); | ||
653 | } | ||
654 | |||
655 | /* | ||
656 | * Returns the first full queue in a group. As a side effect, | ||
657 | * adjust the bucket list so the first non-empty bucket is at | ||
658 | * position 0 in full_slots. | ||
659 | */ | ||
660 | static struct qfq_class *qfq_slot_scan(struct qfq_group *grp) | ||
661 | { | ||
662 | unsigned int i; | ||
663 | |||
664 | pr_debug("qfq slot_scan: grp %u full %#lx\n", | ||
665 | grp->index, grp->full_slots); | ||
666 | |||
667 | if (grp->full_slots == 0) | ||
668 | return NULL; | ||
669 | |||
670 | i = __ffs(grp->full_slots); /* zero based */ | ||
671 | if (i > 0) { | ||
672 | grp->front = (grp->front + i) % QFQ_MAX_SLOTS; | ||
673 | grp->full_slots >>= i; | ||
674 | } | ||
675 | |||
676 | return qfq_slot_head(grp); | ||
677 | } | ||
678 | |||
679 | /* | ||
680 | * adjust the bucket list. When the start time of a group decreases, | ||
681 | * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to | ||
682 | * move the objects. The mask of occupied slots must be shifted | ||
683 | * because we use ffs() to find the first non-empty slot. | ||
684 | * This covers decreases in the group's start time, but what about | ||
685 | * increases of the start time ? | ||
686 | * Here too we should make sure that i is less than 32 | ||
687 | */ | ||
688 | static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS) | ||
689 | { | ||
690 | unsigned int i = (grp->S - roundedS) >> grp->slot_shift; | ||
691 | |||
692 | grp->full_slots <<= i; | ||
693 | grp->front = (grp->front - i) % QFQ_MAX_SLOTS; | ||
694 | } | ||
695 | |||
696 | static void qfq_update_eligible(struct qfq_sched *q, u64 old_V) | ||
697 | { | ||
698 | struct qfq_group *grp; | ||
699 | unsigned long ineligible; | ||
700 | |||
701 | ineligible = q->bitmaps[IR] | q->bitmaps[IB]; | ||
702 | if (ineligible) { | ||
703 | if (!q->bitmaps[ER]) { | ||
704 | grp = qfq_ffs(q, ineligible); | ||
705 | if (qfq_gt(grp->S, q->V)) | ||
706 | q->V = grp->S; | ||
707 | } | ||
708 | qfq_make_eligible(q, old_V); | ||
709 | } | ||
710 | } | ||
711 | |||
712 | /* What is length of next packet in queue (0 if queue is empty) */ | ||
713 | static unsigned int qdisc_peek_len(struct Qdisc *sch) | ||
714 | { | ||
715 | struct sk_buff *skb; | ||
716 | |||
717 | skb = sch->ops->peek(sch); | ||
718 | return skb ? qdisc_pkt_len(skb) : 0; | ||
719 | } | ||
720 | |||
721 | /* | ||
722 | * Updates the class, returns true if also the group needs to be updated. | ||
723 | */ | ||
724 | static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl) | ||
725 | { | ||
726 | unsigned int len = qdisc_peek_len(cl->qdisc); | ||
727 | |||
728 | cl->S = cl->F; | ||
729 | if (!len) | ||
730 | qfq_front_slot_remove(grp); /* queue is empty */ | ||
731 | else { | ||
732 | u64 roundedS; | ||
733 | |||
734 | cl->F = cl->S + (u64)len * cl->inv_w; | ||
735 | roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
736 | if (roundedS == grp->S) | ||
737 | return false; | ||
738 | |||
739 | qfq_front_slot_remove(grp); | ||
740 | qfq_slot_insert(grp, cl, roundedS); | ||
741 | } | ||
742 | |||
743 | return true; | ||
744 | } | ||
745 | |||
746 | static struct sk_buff *qfq_dequeue(struct Qdisc *sch) | ||
747 | { | ||
748 | struct qfq_sched *q = qdisc_priv(sch); | ||
749 | struct qfq_group *grp; | ||
750 | struct qfq_class *cl; | ||
751 | struct sk_buff *skb; | ||
752 | unsigned int len; | ||
753 | u64 old_V; | ||
754 | |||
755 | if (!q->bitmaps[ER]) | ||
756 | return NULL; | ||
757 | |||
758 | grp = qfq_ffs(q, q->bitmaps[ER]); | ||
759 | |||
760 | cl = qfq_slot_head(grp); | ||
761 | skb = qdisc_dequeue_peeked(cl->qdisc); | ||
762 | if (!skb) { | ||
763 | WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n"); | ||
764 | return NULL; | ||
765 | } | ||
766 | |||
767 | sch->q.qlen--; | ||
768 | qdisc_bstats_update(sch, skb); | ||
769 | |||
770 | old_V = q->V; | ||
771 | len = qdisc_pkt_len(skb); | ||
772 | q->V += (u64)len * IWSUM; | ||
773 | pr_debug("qfq dequeue: len %u F %lld now %lld\n", | ||
774 | len, (unsigned long long) cl->F, (unsigned long long) q->V); | ||
775 | |||
776 | if (qfq_update_class(grp, cl)) { | ||
777 | u64 old_F = grp->F; | ||
778 | |||
779 | cl = qfq_slot_scan(grp); | ||
780 | if (!cl) | ||
781 | __clear_bit(grp->index, &q->bitmaps[ER]); | ||
782 | else { | ||
783 | u64 roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
784 | unsigned int s; | ||
785 | |||
786 | if (grp->S == roundedS) | ||
787 | goto skip_unblock; | ||
788 | grp->S = roundedS; | ||
789 | grp->F = roundedS + (2ULL << grp->slot_shift); | ||
790 | __clear_bit(grp->index, &q->bitmaps[ER]); | ||
791 | s = qfq_calc_state(q, grp); | ||
792 | __set_bit(grp->index, &q->bitmaps[s]); | ||
793 | } | ||
794 | |||
795 | qfq_unblock_groups(q, grp->index, old_F); | ||
796 | } | ||
797 | |||
798 | skip_unblock: | ||
799 | qfq_update_eligible(q, old_V); | ||
800 | |||
801 | return skb; | ||
802 | } | ||
803 | |||
804 | /* | ||
805 | * Assign a reasonable start time for a new flow k in group i. | ||
806 | * Admissible values for \hat(F) are multiples of \sigma_i | ||
807 | * no greater than V+\sigma_i . Larger values mean that | ||
808 | * we had a wraparound so we consider the timestamp to be stale. | ||
809 | * | ||
810 | * If F is not stale and F >= V then we set S = F. | ||
811 | * Otherwise we should assign S = V, but this may violate | ||
812 | * the ordering in ER. So, if we have groups in ER, set S to | ||
813 | * the F_j of the first group j which would be blocking us. | ||
814 | * We are guaranteed not to move S backward because | ||
815 | * otherwise our group i would still be blocked. | ||
816 | */ | ||
817 | static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) | ||
818 | { | ||
819 | unsigned long mask; | ||
820 | uint32_t limit, roundedF; | ||
821 | int slot_shift = cl->grp->slot_shift; | ||
822 | |||
823 | roundedF = qfq_round_down(cl->F, slot_shift); | ||
824 | limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift); | ||
825 | |||
826 | if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) { | ||
827 | /* timestamp was stale */ | ||
828 | mask = mask_from(q->bitmaps[ER], cl->grp->index); | ||
829 | if (mask) { | ||
830 | struct qfq_group *next = qfq_ffs(q, mask); | ||
831 | if (qfq_gt(roundedF, next->F)) { | ||
832 | cl->S = next->F; | ||
833 | return; | ||
834 | } | ||
835 | } | ||
836 | cl->S = q->V; | ||
837 | } else /* timestamp is not stale */ | ||
838 | cl->S = cl->F; | ||
839 | } | ||
840 | |||
841 | static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | ||
842 | { | ||
843 | struct qfq_sched *q = qdisc_priv(sch); | ||
844 | struct qfq_group *grp; | ||
845 | struct qfq_class *cl; | ||
846 | int err; | ||
847 | u64 roundedS; | ||
848 | int s; | ||
849 | |||
850 | cl = qfq_classify(skb, sch, &err); | ||
851 | if (cl == NULL) { | ||
852 | if (err & __NET_XMIT_BYPASS) | ||
853 | sch->qstats.drops++; | ||
854 | kfree_skb(skb); | ||
855 | return err; | ||
856 | } | ||
857 | pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); | ||
858 | |||
859 | err = qdisc_enqueue(skb, cl->qdisc); | ||
860 | if (unlikely(err != NET_XMIT_SUCCESS)) { | ||
861 | pr_debug("qfq_enqueue: enqueue failed %d\n", err); | ||
862 | if (net_xmit_drop_count(err)) { | ||
863 | cl->qstats.drops++; | ||
864 | sch->qstats.drops++; | ||
865 | } | ||
866 | return err; | ||
867 | } | ||
868 | |||
869 | bstats_update(&cl->bstats, skb); | ||
870 | ++sch->q.qlen; | ||
871 | |||
872 | /* If the new skb is not the head of queue, then done here. */ | ||
873 | if (cl->qdisc->q.qlen != 1) | ||
874 | return err; | ||
875 | |||
876 | /* If reach this point, queue q was idle */ | ||
877 | grp = cl->grp; | ||
878 | qfq_update_start(q, cl); | ||
879 | |||
880 | /* compute new finish time and rounded start. */ | ||
881 | cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w; | ||
882 | roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
883 | |||
884 | /* | ||
885 | * insert cl in the correct bucket. | ||
886 | * If cl->S >= grp->S we don't need to adjust the | ||
887 | * bucket list and simply go to the insertion phase. | ||
888 | * Otherwise grp->S is decreasing, we must make room | ||
889 | * in the bucket list, and also recompute the group state. | ||
890 | * Finally, if there were no flows in this group and nobody | ||
891 | * was in ER make sure to adjust V. | ||
892 | */ | ||
893 | if (grp->full_slots) { | ||
894 | if (!qfq_gt(grp->S, cl->S)) | ||
895 | goto skip_update; | ||
896 | |||
897 | /* create a slot for this cl->S */ | ||
898 | qfq_slot_rotate(grp, roundedS); | ||
899 | /* group was surely ineligible, remove */ | ||
900 | __clear_bit(grp->index, &q->bitmaps[IR]); | ||
901 | __clear_bit(grp->index, &q->bitmaps[IB]); | ||
902 | } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V)) | ||
903 | q->V = roundedS; | ||
904 | |||
905 | grp->S = roundedS; | ||
906 | grp->F = roundedS + (2ULL << grp->slot_shift); | ||
907 | s = qfq_calc_state(q, grp); | ||
908 | __set_bit(grp->index, &q->bitmaps[s]); | ||
909 | |||
910 | pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n", | ||
911 | s, q->bitmaps[s], | ||
912 | (unsigned long long) cl->S, | ||
913 | (unsigned long long) cl->F, | ||
914 | (unsigned long long) q->V); | ||
915 | |||
916 | skip_update: | ||
917 | qfq_slot_insert(grp, cl, roundedS); | ||
918 | |||
919 | return err; | ||
920 | } | ||
921 | |||
922 | |||
923 | static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp, | ||
924 | struct qfq_class *cl) | ||
925 | { | ||
926 | unsigned int i, offset; | ||
927 | u64 roundedS; | ||
928 | |||
929 | roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
930 | offset = (roundedS - grp->S) >> grp->slot_shift; | ||
931 | i = (grp->front + offset) % QFQ_MAX_SLOTS; | ||
932 | |||
933 | hlist_del(&cl->next); | ||
934 | if (hlist_empty(&grp->slots[i])) | ||
935 | __clear_bit(offset, &grp->full_slots); | ||
936 | } | ||
937 | |||
938 | /* | ||
939 | * called to forcibly destroy a queue. | ||
940 | * If the queue is not in the front bucket, or if it has | ||
941 | * other queues in the front bucket, we can simply remove | ||
942 | * the queue with no other side effects. | ||
943 | * Otherwise we must propagate the event up. | ||
944 | */ | ||
945 | static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) | ||
946 | { | ||
947 | struct qfq_group *grp = cl->grp; | ||
948 | unsigned long mask; | ||
949 | u64 roundedS; | ||
950 | int s; | ||
951 | |||
952 | cl->F = cl->S; | ||
953 | qfq_slot_remove(q, grp, cl); | ||
954 | |||
955 | if (!grp->full_slots) { | ||
956 | __clear_bit(grp->index, &q->bitmaps[IR]); | ||
957 | __clear_bit(grp->index, &q->bitmaps[EB]); | ||
958 | __clear_bit(grp->index, &q->bitmaps[IB]); | ||
959 | |||
960 | if (test_bit(grp->index, &q->bitmaps[ER]) && | ||
961 | !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) { | ||
962 | mask = q->bitmaps[ER] & ((1UL << grp->index) - 1); | ||
963 | if (mask) | ||
964 | mask = ~((1UL << __fls(mask)) - 1); | ||
965 | else | ||
966 | mask = ~0UL; | ||
967 | qfq_move_groups(q, mask, EB, ER); | ||
968 | qfq_move_groups(q, mask, IB, IR); | ||
969 | } | ||
970 | __clear_bit(grp->index, &q->bitmaps[ER]); | ||
971 | } else if (hlist_empty(&grp->slots[grp->front])) { | ||
972 | cl = qfq_slot_scan(grp); | ||
973 | roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
974 | if (grp->S != roundedS) { | ||
975 | __clear_bit(grp->index, &q->bitmaps[ER]); | ||
976 | __clear_bit(grp->index, &q->bitmaps[IR]); | ||
977 | __clear_bit(grp->index, &q->bitmaps[EB]); | ||
978 | __clear_bit(grp->index, &q->bitmaps[IB]); | ||
979 | grp->S = roundedS; | ||
980 | grp->F = roundedS + (2ULL << grp->slot_shift); | ||
981 | s = qfq_calc_state(q, grp); | ||
982 | __set_bit(grp->index, &q->bitmaps[s]); | ||
983 | } | ||
984 | } | ||
985 | |||
986 | qfq_update_eligible(q, q->V); | ||
987 | } | ||
988 | |||
989 | static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) | ||
990 | { | ||
991 | struct qfq_sched *q = qdisc_priv(sch); | ||
992 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
993 | |||
994 | if (cl->qdisc->q.qlen == 0) | ||
995 | qfq_deactivate_class(q, cl); | ||
996 | } | ||
997 | |||
998 | static unsigned int qfq_drop(struct Qdisc *sch) | ||
999 | { | ||
1000 | struct qfq_sched *q = qdisc_priv(sch); | ||
1001 | struct qfq_group *grp; | ||
1002 | unsigned int i, j, len; | ||
1003 | |||
1004 | for (i = 0; i <= QFQ_MAX_INDEX; i++) { | ||
1005 | grp = &q->groups[i]; | ||
1006 | for (j = 0; j < QFQ_MAX_SLOTS; j++) { | ||
1007 | struct qfq_class *cl; | ||
1008 | struct hlist_node *n; | ||
1009 | |||
1010 | hlist_for_each_entry(cl, n, &grp->slots[j], next) { | ||
1011 | |||
1012 | if (!cl->qdisc->ops->drop) | ||
1013 | continue; | ||
1014 | |||
1015 | len = cl->qdisc->ops->drop(cl->qdisc); | ||
1016 | if (len > 0) { | ||
1017 | sch->q.qlen--; | ||
1018 | if (!cl->qdisc->q.qlen) | ||
1019 | qfq_deactivate_class(q, cl); | ||
1020 | |||
1021 | return len; | ||
1022 | } | ||
1023 | } | ||
1024 | } | ||
1025 | } | ||
1026 | |||
1027 | return 0; | ||
1028 | } | ||
1029 | |||
1030 | static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) | ||
1031 | { | ||
1032 | struct qfq_sched *q = qdisc_priv(sch); | ||
1033 | struct qfq_group *grp; | ||
1034 | int i, j, err; | ||
1035 | |||
1036 | err = qdisc_class_hash_init(&q->clhash); | ||
1037 | if (err < 0) | ||
1038 | return err; | ||
1039 | |||
1040 | for (i = 0; i <= QFQ_MAX_INDEX; i++) { | ||
1041 | grp = &q->groups[i]; | ||
1042 | grp->index = i; | ||
1043 | grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS | ||
1044 | - (QFQ_MAX_INDEX - i); | ||
1045 | for (j = 0; j < QFQ_MAX_SLOTS; j++) | ||
1046 | INIT_HLIST_HEAD(&grp->slots[j]); | ||
1047 | } | ||
1048 | |||
1049 | return 0; | ||
1050 | } | ||
1051 | |||
1052 | static void qfq_reset_qdisc(struct Qdisc *sch) | ||
1053 | { | ||
1054 | struct qfq_sched *q = qdisc_priv(sch); | ||
1055 | struct qfq_group *grp; | ||
1056 | struct qfq_class *cl; | ||
1057 | struct hlist_node *n, *tmp; | ||
1058 | unsigned int i, j; | ||
1059 | |||
1060 | for (i = 0; i <= QFQ_MAX_INDEX; i++) { | ||
1061 | grp = &q->groups[i]; | ||
1062 | for (j = 0; j < QFQ_MAX_SLOTS; j++) { | ||
1063 | hlist_for_each_entry_safe(cl, n, tmp, | ||
1064 | &grp->slots[j], next) { | ||
1065 | qfq_deactivate_class(q, cl); | ||
1066 | } | ||
1067 | } | ||
1068 | } | ||
1069 | |||
1070 | for (i = 0; i < q->clhash.hashsize; i++) { | ||
1071 | hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) | ||
1072 | qdisc_reset(cl->qdisc); | ||
1073 | } | ||
1074 | sch->q.qlen = 0; | ||
1075 | } | ||
1076 | |||
1077 | static void qfq_destroy_qdisc(struct Qdisc *sch) | ||
1078 | { | ||
1079 | struct qfq_sched *q = qdisc_priv(sch); | ||
1080 | struct qfq_class *cl; | ||
1081 | struct hlist_node *n, *next; | ||
1082 | unsigned int i; | ||
1083 | |||
1084 | tcf_destroy_chain(&q->filter_list); | ||
1085 | |||
1086 | for (i = 0; i < q->clhash.hashsize; i++) { | ||
1087 | hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], | ||
1088 | common.hnode) { | ||
1089 | qfq_destroy_class(sch, cl); | ||
1090 | } | ||
1091 | } | ||
1092 | qdisc_class_hash_destroy(&q->clhash); | ||
1093 | } | ||
1094 | |||
1095 | static const struct Qdisc_class_ops qfq_class_ops = { | ||
1096 | .change = qfq_change_class, | ||
1097 | .delete = qfq_delete_class, | ||
1098 | .get = qfq_get_class, | ||
1099 | .put = qfq_put_class, | ||
1100 | .tcf_chain = qfq_tcf_chain, | ||
1101 | .bind_tcf = qfq_bind_tcf, | ||
1102 | .unbind_tcf = qfq_unbind_tcf, | ||
1103 | .graft = qfq_graft_class, | ||
1104 | .leaf = qfq_class_leaf, | ||
1105 | .qlen_notify = qfq_qlen_notify, | ||
1106 | .dump = qfq_dump_class, | ||
1107 | .dump_stats = qfq_dump_class_stats, | ||
1108 | .walk = qfq_walk, | ||
1109 | }; | ||
1110 | |||
1111 | static struct Qdisc_ops qfq_qdisc_ops __read_mostly = { | ||
1112 | .cl_ops = &qfq_class_ops, | ||
1113 | .id = "qfq", | ||
1114 | .priv_size = sizeof(struct qfq_sched), | ||
1115 | .enqueue = qfq_enqueue, | ||
1116 | .dequeue = qfq_dequeue, | ||
1117 | .peek = qdisc_peek_dequeued, | ||
1118 | .drop = qfq_drop, | ||
1119 | .init = qfq_init_qdisc, | ||
1120 | .reset = qfq_reset_qdisc, | ||
1121 | .destroy = qfq_destroy_qdisc, | ||
1122 | .owner = THIS_MODULE, | ||
1123 | }; | ||
1124 | |||
1125 | static int __init qfq_init(void) | ||
1126 | { | ||
1127 | return register_qdisc(&qfq_qdisc_ops); | ||
1128 | } | ||
1129 | |||
1130 | static void __exit qfq_exit(void) | ||
1131 | { | ||
1132 | unregister_qdisc(&qfq_qdisc_ops); | ||
1133 | } | ||
1134 | |||
1135 | module_init(qfq_init); | ||
1136 | module_exit(qfq_exit); | ||
1137 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/socket.c b/net/socket.c index 310d16b1b3c9..d25f5a9d6fa2 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -2643,13 +2643,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) | |||
2643 | return -EFAULT; | 2643 | return -EFAULT; |
2644 | 2644 | ||
2645 | if (convert_in) { | 2645 | if (convert_in) { |
2646 | /* We expect there to be holes between fs.m_u and | 2646 | /* We expect there to be holes between fs.m_ext and |
2647 | * fs.ring_cookie and at the end of fs, but nowhere else. | 2647 | * fs.ring_cookie and at the end of fs, but nowhere else. |
2648 | */ | 2648 | */ |
2649 | BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) + | 2649 | BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + |
2650 | sizeof(compat_rxnfc->fs.m_u) != | 2650 | sizeof(compat_rxnfc->fs.m_ext) != |
2651 | offsetof(struct ethtool_rxnfc, fs.m_u) + | 2651 | offsetof(struct ethtool_rxnfc, fs.m_ext) + |
2652 | sizeof(rxnfc->fs.m_u)); | 2652 | sizeof(rxnfc->fs.m_ext)); |
2653 | BUILD_BUG_ON( | 2653 | BUILD_BUG_ON( |
2654 | offsetof(struct compat_ethtool_rxnfc, fs.location) - | 2654 | offsetof(struct compat_ethtool_rxnfc, fs.location) - |
2655 | offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != | 2655 | offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != |
@@ -2657,7 +2657,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) | |||
2657 | offsetof(struct ethtool_rxnfc, fs.ring_cookie)); | 2657 | offsetof(struct ethtool_rxnfc, fs.ring_cookie)); |
2658 | 2658 | ||
2659 | if (copy_in_user(rxnfc, compat_rxnfc, | 2659 | if (copy_in_user(rxnfc, compat_rxnfc, |
2660 | (void *)(&rxnfc->fs.m_u + 1) - | 2660 | (void *)(&rxnfc->fs.m_ext + 1) - |
2661 | (void *)rxnfc) || | 2661 | (void *)rxnfc) || |
2662 | copy_in_user(&rxnfc->fs.ring_cookie, | 2662 | copy_in_user(&rxnfc->fs.ring_cookie, |
2663 | &compat_rxnfc->fs.ring_cookie, | 2663 | &compat_rxnfc->fs.ring_cookie, |
@@ -2674,7 +2674,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) | |||
2674 | 2674 | ||
2675 | if (convert_out) { | 2675 | if (convert_out) { |
2676 | if (copy_in_user(compat_rxnfc, rxnfc, | 2676 | if (copy_in_user(compat_rxnfc, rxnfc, |
2677 | (const void *)(&rxnfc->fs.m_u + 1) - | 2677 | (const void *)(&rxnfc->fs.m_ext + 1) - |
2678 | (const void *)rxnfc) || | 2678 | (const void *)rxnfc) || |
2679 | copy_in_user(&compat_rxnfc->fs.ring_cookie, | 2679 | copy_in_user(&compat_rxnfc->fs.ring_cookie, |
2680 | &rxnfc->fs.ring_cookie, | 2680 | &rxnfc->fs.ring_cookie, |