aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c8
-rw-r--r--net/8021q/vlan_dev.c50
-rw-r--r--net/bridge/br.c1
-rw-r--r--net/bridge/br_device.c41
-rw-r--r--net/bridge/br_fdb.c311
-rw-r--r--net/bridge/br_if.c83
-rw-r--r--net/bridge/br_input.c5
-rw-r--r--net/bridge/br_ioctl.c40
-rw-r--r--net/bridge/br_netlink.c53
-rw-r--r--net/bridge/br_notify.c6
-rw-r--r--net/bridge/br_private.h19
-rw-r--r--net/bridge/br_private_stp.h13
-rw-r--r--net/bridge/br_stp.c48
-rw-r--r--net/bridge/br_stp_if.c21
-rw-r--r--net/bridge/br_sysfs_br.c39
-rw-r--r--net/bridge/br_sysfs_if.c26
-rw-r--r--net/can/af_can.c52
-rw-r--r--net/core/dev.c35
-rw-r--r--net/core/ethtool.c66
-rw-r--r--net/ipv4/fib_frontend.c16
-rw-r--r--net/ipv4/fib_trie.c103
-rw-r--r--net/ipv4/inet_connection_sock.c18
-rw-r--r--net/ipv4/ip_output.c18
-rw-r--r--net/ipv4/raw.c18
-rw-r--r--net/ipv4/route.c16
-rw-r--r--net/ipv4/syncookies.c18
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/udp.c18
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/sch_qfq.c1137
-rw-r--r--net/socket.c14
32 files changed, 1917 insertions, 395 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 7850412f52b7..e47600b4e2e3 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -327,10 +327,6 @@ static void vlan_sync_address(struct net_device *dev,
327static void vlan_transfer_features(struct net_device *dev, 327static void vlan_transfer_features(struct net_device *dev,
328 struct net_device *vlandev) 328 struct net_device *vlandev)
329{ 329{
330 u32 old_features = vlandev->features;
331
332 vlandev->features &= ~dev->vlan_features;
333 vlandev->features |= dev->features & dev->vlan_features;
334 vlandev->gso_max_size = dev->gso_max_size; 330 vlandev->gso_max_size = dev->gso_max_size;
335 331
336 if (dev->features & NETIF_F_HW_VLAN_TX) 332 if (dev->features & NETIF_F_HW_VLAN_TX)
@@ -341,8 +337,8 @@ static void vlan_transfer_features(struct net_device *dev,
341#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 337#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
342 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; 338 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
343#endif 339#endif
344 if (old_features != vlandev->features) 340
345 netdev_features_change(vlandev); 341 netdev_update_features(vlandev);
346} 342}
347 343
348static void __vlan_device_event(struct net_device *dev, unsigned long event) 344static void __vlan_device_event(struct net_device *dev, unsigned long event)
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e34ea9e5e28b..b84a46b30c0c 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -704,8 +704,8 @@ static int vlan_dev_init(struct net_device *dev)
704 (1<<__LINK_STATE_DORMANT))) | 704 (1<<__LINK_STATE_DORMANT))) |
705 (1<<__LINK_STATE_PRESENT); 705 (1<<__LINK_STATE_PRESENT);
706 706
707 dev->features |= real_dev->features & real_dev->vlan_features; 707 dev->hw_features = real_dev->vlan_features & NETIF_F_ALL_TX_OFFLOADS;
708 dev->features |= NETIF_F_LLTX; 708 dev->features |= real_dev->vlan_features | NETIF_F_LLTX;
709 dev->gso_max_size = real_dev->gso_max_size; 709 dev->gso_max_size = real_dev->gso_max_size;
710 710
711 /* ipv6 shared card related stuff */ 711 /* ipv6 shared card related stuff */
@@ -759,6 +759,17 @@ static void vlan_dev_uninit(struct net_device *dev)
759 } 759 }
760} 760}
761 761
762static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
763{
764 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
765
766 features &= (real_dev->features | NETIF_F_LLTX);
767 if (dev_ethtool_get_rx_csum(real_dev))
768 features |= NETIF_F_RXCSUM;
769
770 return features;
771}
772
762static int vlan_ethtool_get_settings(struct net_device *dev, 773static int vlan_ethtool_get_settings(struct net_device *dev,
763 struct ethtool_cmd *cmd) 774 struct ethtool_cmd *cmd)
764{ 775{
@@ -774,18 +785,6 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
774 strcpy(info->fw_version, "N/A"); 785 strcpy(info->fw_version, "N/A");
775} 786}
776 787
777static u32 vlan_ethtool_get_rx_csum(struct net_device *dev)
778{
779 const struct vlan_dev_info *vlan = vlan_dev_info(dev);
780 return dev_ethtool_get_rx_csum(vlan->real_dev);
781}
782
783static u32 vlan_ethtool_get_flags(struct net_device *dev)
784{
785 const struct vlan_dev_info *vlan = vlan_dev_info(dev);
786 return dev_ethtool_get_flags(vlan->real_dev);
787}
788
789static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) 788static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
790{ 789{
791 790
@@ -823,32 +822,10 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
823 return stats; 822 return stats;
824} 823}
825 824
826static int vlan_ethtool_set_tso(struct net_device *dev, u32 data)
827{
828 if (data) {
829 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
830
831 /* Underlying device must support TSO for VLAN-tagged packets
832 * and must have TSO enabled now.
833 */
834 if (!(real_dev->vlan_features & NETIF_F_TSO))
835 return -EOPNOTSUPP;
836 if (!(real_dev->features & NETIF_F_TSO))
837 return -EINVAL;
838 dev->features |= NETIF_F_TSO;
839 } else {
840 dev->features &= ~NETIF_F_TSO;
841 }
842 return 0;
843}
844
845static const struct ethtool_ops vlan_ethtool_ops = { 825static const struct ethtool_ops vlan_ethtool_ops = {
846 .get_settings = vlan_ethtool_get_settings, 826 .get_settings = vlan_ethtool_get_settings,
847 .get_drvinfo = vlan_ethtool_get_drvinfo, 827 .get_drvinfo = vlan_ethtool_get_drvinfo,
848 .get_link = ethtool_op_get_link, 828 .get_link = ethtool_op_get_link,
849 .get_rx_csum = vlan_ethtool_get_rx_csum,
850 .get_flags = vlan_ethtool_get_flags,
851 .set_tso = vlan_ethtool_set_tso,
852}; 829};
853 830
854static const struct net_device_ops vlan_netdev_ops = { 831static const struct net_device_ops vlan_netdev_ops = {
@@ -874,6 +851,7 @@ static const struct net_device_ops vlan_netdev_ops = {
874 .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, 851 .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
875 .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, 852 .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target,
876#endif 853#endif
854 .ndo_fix_features = vlan_dev_fix_features,
877}; 855};
878 856
879void vlan_setup(struct net_device *dev) 857void vlan_setup(struct net_device *dev)
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 84bbb82599b2..f20c4fd915a8 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -104,3 +104,4 @@ module_init(br_init)
104module_exit(br_deinit) 104module_exit(br_deinit)
105MODULE_LICENSE("GPL"); 105MODULE_LICENSE("GPL");
106MODULE_VERSION(BR_VERSION); 106MODULE_VERSION(BR_VERSION);
107MODULE_ALIAS_RTNL_LINK("bridge");
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 21e5901186ea..45cfd54b06d3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -74,6 +74,17 @@ out:
74 return NETDEV_TX_OK; 74 return NETDEV_TX_OK;
75} 75}
76 76
77static int br_dev_init(struct net_device *dev)
78{
79 struct net_bridge *br = netdev_priv(dev);
80
81 br->stats = alloc_percpu(struct br_cpu_netstats);
82 if (!br->stats)
83 return -ENOMEM;
84
85 return 0;
86}
87
77static int br_dev_open(struct net_device *dev) 88static int br_dev_open(struct net_device *dev)
78{ 89{
79 struct net_bridge *br = netdev_priv(dev); 90 struct net_bridge *br = netdev_priv(dev);
@@ -334,6 +345,7 @@ static const struct ethtool_ops br_ethtool_ops = {
334static const struct net_device_ops br_netdev_ops = { 345static const struct net_device_ops br_netdev_ops = {
335 .ndo_open = br_dev_open, 346 .ndo_open = br_dev_open,
336 .ndo_stop = br_dev_stop, 347 .ndo_stop = br_dev_stop,
348 .ndo_init = br_dev_init,
337 .ndo_start_xmit = br_dev_xmit, 349 .ndo_start_xmit = br_dev_xmit,
338 .ndo_get_stats64 = br_get_stats64, 350 .ndo_get_stats64 = br_get_stats64,
339 .ndo_set_mac_address = br_set_mac_address, 351 .ndo_set_mac_address = br_set_mac_address,
@@ -357,18 +369,47 @@ static void br_dev_free(struct net_device *dev)
357 free_netdev(dev); 369 free_netdev(dev);
358} 370}
359 371
372static struct device_type br_type = {
373 .name = "bridge",
374};
375
360void br_dev_setup(struct net_device *dev) 376void br_dev_setup(struct net_device *dev)
361{ 377{
378 struct net_bridge *br = netdev_priv(dev);
379
362 random_ether_addr(dev->dev_addr); 380 random_ether_addr(dev->dev_addr);
363 ether_setup(dev); 381 ether_setup(dev);
364 382
365 dev->netdev_ops = &br_netdev_ops; 383 dev->netdev_ops = &br_netdev_ops;
366 dev->destructor = br_dev_free; 384 dev->destructor = br_dev_free;
367 SET_ETHTOOL_OPS(dev, &br_ethtool_ops); 385 SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
386 SET_NETDEV_DEVTYPE(dev, &br_type);
368 dev->tx_queue_len = 0; 387 dev->tx_queue_len = 0;
369 dev->priv_flags = IFF_EBRIDGE; 388 dev->priv_flags = IFF_EBRIDGE;
370 389
371 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 390 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
372 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | 391 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
373 NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX; 392 NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX;
393
394 br->dev = dev;
395 spin_lock_init(&br->lock);
396 INIT_LIST_HEAD(&br->port_list);
397 spin_lock_init(&br->hash_lock);
398
399 br->bridge_id.prio[0] = 0x80;
400 br->bridge_id.prio[1] = 0x00;
401
402 memcpy(br->group_addr, br_group_address, ETH_ALEN);
403
404 br->feature_mask = dev->features;
405 br->stp_enabled = BR_NO_STP;
406 br->designated_root = br->bridge_id;
407 br->bridge_max_age = br->max_age = 20 * HZ;
408 br->bridge_hello_time = br->hello_time = 2 * HZ;
409 br->bridge_forward_delay = br->forward_delay = 15 * HZ;
410 br->ageing_time = 300 * HZ;
411
412 br_netfilter_rtable_init(br);
413 br_stp_timer_init(br);
414 br_multicast_init(br);
374} 415}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index cc4d3c5ab1c6..e0dfbc151dd7 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -28,6 +28,7 @@
28static struct kmem_cache *br_fdb_cache __read_mostly; 28static struct kmem_cache *br_fdb_cache __read_mostly;
29static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, 29static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
30 const unsigned char *addr); 30 const unsigned char *addr);
31static void fdb_notify(const struct net_bridge_fdb_entry *, int);
31 32
32static u32 fdb_salt __read_mostly; 33static u32 fdb_salt __read_mostly;
33 34
@@ -62,7 +63,7 @@ static inline int has_expired(const struct net_bridge *br,
62 const struct net_bridge_fdb_entry *fdb) 63 const struct net_bridge_fdb_entry *fdb)
63{ 64{
64 return !fdb->is_static && 65 return !fdb->is_static &&
65 time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); 66 time_before_eq(fdb->updated + hold_time(br), jiffies);
66} 67}
67 68
68static inline int br_mac_hash(const unsigned char *mac) 69static inline int br_mac_hash(const unsigned char *mac)
@@ -81,6 +82,7 @@ static void fdb_rcu_free(struct rcu_head *head)
81 82
82static inline void fdb_delete(struct net_bridge_fdb_entry *f) 83static inline void fdb_delete(struct net_bridge_fdb_entry *f)
83{ 84{
85 fdb_notify(f, RTM_DELNEIGH);
84 hlist_del_rcu(&f->hlist); 86 hlist_del_rcu(&f->hlist);
85 call_rcu(&f->rcu, fdb_rcu_free); 87 call_rcu(&f->rcu, fdb_rcu_free);
86} 88}
@@ -140,7 +142,7 @@ void br_fdb_cleanup(unsigned long _data)
140 unsigned long this_timer; 142 unsigned long this_timer;
141 if (f->is_static) 143 if (f->is_static)
142 continue; 144 continue;
143 this_timer = f->ageing_timer + delay; 145 this_timer = f->updated + delay;
144 if (time_before_eq(this_timer, jiffies)) 146 if (time_before_eq(this_timer, jiffies))
145 fdb_delete(f); 147 fdb_delete(f);
146 else if (time_before(this_timer, next_timer)) 148 else if (time_before(this_timer, next_timer))
@@ -293,7 +295,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
293 295
294 fe->is_local = f->is_local; 296 fe->is_local = f->is_local;
295 if (!f->is_static) 297 if (!f->is_static)
296 fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->ageing_timer); 298 fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated);
297 ++fe; 299 ++fe;
298 ++num; 300 ++num;
299 } 301 }
@@ -305,8 +307,21 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
305 return num; 307 return num;
306} 308}
307 309
308static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, 310static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
309 const unsigned char *addr) 311 const unsigned char *addr)
312{
313 struct hlist_node *h;
314 struct net_bridge_fdb_entry *fdb;
315
316 hlist_for_each_entry(fdb, h, head, hlist) {
317 if (!compare_ether_addr(fdb->addr.addr, addr))
318 return fdb;
319 }
320 return NULL;
321}
322
323static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
324 const unsigned char *addr)
310{ 325{
311 struct hlist_node *h; 326 struct hlist_node *h;
312 struct net_bridge_fdb_entry *fdb; 327 struct net_bridge_fdb_entry *fdb;
@@ -320,8 +335,7 @@ static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
320 335
321static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, 336static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
322 struct net_bridge_port *source, 337 struct net_bridge_port *source,
323 const unsigned char *addr, 338 const unsigned char *addr)
324 int is_local)
325{ 339{
326 struct net_bridge_fdb_entry *fdb; 340 struct net_bridge_fdb_entry *fdb;
327 341
@@ -329,11 +343,11 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
329 if (fdb) { 343 if (fdb) {
330 memcpy(fdb->addr.addr, addr, ETH_ALEN); 344 memcpy(fdb->addr.addr, addr, ETH_ALEN);
331 fdb->dst = source; 345 fdb->dst = source;
332 fdb->is_local = is_local; 346 fdb->is_local = 0;
333 fdb->is_static = is_local; 347 fdb->is_static = 0;
334 fdb->ageing_timer = jiffies; 348 fdb->updated = fdb->used = jiffies;
335
336 hlist_add_head_rcu(&fdb->hlist, head); 349 hlist_add_head_rcu(&fdb->hlist, head);
350 fdb_notify(fdb, RTM_NEWNEIGH);
337 } 351 }
338 return fdb; 352 return fdb;
339} 353}
@@ -360,12 +374,15 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
360 fdb_delete(fdb); 374 fdb_delete(fdb);
361 } 375 }
362 376
363 if (!fdb_create(head, source, addr, 1)) 377 fdb = fdb_create(head, source, addr);
378 if (!fdb)
364 return -ENOMEM; 379 return -ENOMEM;
365 380
381 fdb->is_local = fdb->is_static = 1;
366 return 0; 382 return 0;
367} 383}
368 384
385/* Add entry for local address of interface */
369int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, 386int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
370 const unsigned char *addr) 387 const unsigned char *addr)
371{ 388{
@@ -392,7 +409,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
392 source->state == BR_STATE_FORWARDING)) 409 source->state == BR_STATE_FORWARDING))
393 return; 410 return;
394 411
395 fdb = fdb_find(head, addr); 412 fdb = fdb_find_rcu(head, addr);
396 if (likely(fdb)) { 413 if (likely(fdb)) {
397 /* attempt to update an entry for a local interface */ 414 /* attempt to update an entry for a local interface */
398 if (unlikely(fdb->is_local)) { 415 if (unlikely(fdb->is_local)) {
@@ -403,15 +420,277 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
403 } else { 420 } else {
404 /* fastpath: update of existing entry */ 421 /* fastpath: update of existing entry */
405 fdb->dst = source; 422 fdb->dst = source;
406 fdb->ageing_timer = jiffies; 423 fdb->updated = jiffies;
407 } 424 }
408 } else { 425 } else {
409 spin_lock(&br->hash_lock); 426 spin_lock(&br->hash_lock);
410 if (!fdb_find(head, addr)) 427 if (likely(!fdb_find(head, addr)))
411 fdb_create(head, source, addr, 0); 428 fdb_create(head, source, addr);
429
412 /* else we lose race and someone else inserts 430 /* else we lose race and someone else inserts
413 * it first, don't bother updating 431 * it first, don't bother updating
414 */ 432 */
415 spin_unlock(&br->hash_lock); 433 spin_unlock(&br->hash_lock);
416 } 434 }
417} 435}
436
437static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb)
438{
439 if (fdb->is_local)
440 return NUD_PERMANENT;
441 else if (fdb->is_static)
442 return NUD_NOARP;
443 else if (has_expired(fdb->dst->br, fdb))
444 return NUD_STALE;
445 else
446 return NUD_REACHABLE;
447}
448
449static int fdb_fill_info(struct sk_buff *skb,
450 const struct net_bridge_fdb_entry *fdb,
451 u32 pid, u32 seq, int type, unsigned int flags)
452{
453 unsigned long now = jiffies;
454 struct nda_cacheinfo ci;
455 struct nlmsghdr *nlh;
456 struct ndmsg *ndm;
457
458 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
459 if (nlh == NULL)
460 return -EMSGSIZE;
461
462
463 ndm = nlmsg_data(nlh);
464 ndm->ndm_family = AF_BRIDGE;
465 ndm->ndm_pad1 = 0;
466 ndm->ndm_pad2 = 0;
467 ndm->ndm_flags = 0;
468 ndm->ndm_type = 0;
469 ndm->ndm_ifindex = fdb->dst->dev->ifindex;
470 ndm->ndm_state = fdb_to_nud(fdb);
471
472 NLA_PUT(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr);
473
474 ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
475 ci.ndm_confirmed = 0;
476 ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
477 ci.ndm_refcnt = 0;
478 NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
479
480 return nlmsg_end(skb, nlh);
481
482nla_put_failure:
483 nlmsg_cancel(skb, nlh);
484 return -EMSGSIZE;
485}
486
487static inline size_t fdb_nlmsg_size(void)
488{
489 return NLMSG_ALIGN(sizeof(struct ndmsg))
490 + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
491 + nla_total_size(sizeof(struct nda_cacheinfo));
492}
493
494static void fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
495{
496 struct net *net = dev_net(fdb->dst->dev);
497 struct sk_buff *skb;
498 int err = -ENOBUFS;
499
500 skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
501 if (skb == NULL)
502 goto errout;
503
504 err = fdb_fill_info(skb, fdb, 0, 0, type, 0);
505 if (err < 0) {
506 /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */
507 WARN_ON(err == -EMSGSIZE);
508 kfree_skb(skb);
509 goto errout;
510 }
511 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
512 return;
513errout:
514 if (err < 0)
515 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
516}
517
518/* Dump information about entries, in response to GETNEIGH */
519int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
520{
521 struct net *net = sock_net(skb->sk);
522 struct net_device *dev;
523 int idx = 0;
524
525 rcu_read_lock();
526 for_each_netdev_rcu(net, dev) {
527 struct net_bridge *br = netdev_priv(dev);
528 int i;
529
530 if (!(dev->priv_flags & IFF_EBRIDGE))
531 continue;
532
533 for (i = 0; i < BR_HASH_SIZE; i++) {
534 struct hlist_node *h;
535 struct net_bridge_fdb_entry *f;
536
537 hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
538 if (idx < cb->args[0])
539 goto skip;
540
541 if (fdb_fill_info(skb, f,
542 NETLINK_CB(cb->skb).pid,
543 cb->nlh->nlmsg_seq,
544 RTM_NEWNEIGH,
545 NLM_F_MULTI) < 0)
546 break;
547skip:
548 ++idx;
549 }
550 }
551 }
552 rcu_read_unlock();
553
554 cb->args[0] = idx;
555
556 return skb->len;
557}
558
559/* Create new static fdb entry */
560static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
561 __u16 state)
562{
563 struct net_bridge *br = source->br;
564 struct hlist_head *head = &br->hash[br_mac_hash(addr)];
565 struct net_bridge_fdb_entry *fdb;
566
567 fdb = fdb_find(head, addr);
568 if (fdb)
569 return -EEXIST;
570
571 fdb = fdb_create(head, source, addr);
572 if (!fdb)
573 return -ENOMEM;
574
575 if (state & NUD_PERMANENT)
576 fdb->is_local = fdb->is_static = 1;
577 else if (state & NUD_NOARP)
578 fdb->is_static = 1;
579 return 0;
580}
581
582/* Add new permanent fdb entry with RTM_NEWNEIGH */
583int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
584{
585 struct net *net = sock_net(skb->sk);
586 struct ndmsg *ndm;
587 struct nlattr *tb[NDA_MAX+1];
588 struct net_device *dev;
589 struct net_bridge_port *p;
590 const __u8 *addr;
591 int err;
592
593 ASSERT_RTNL();
594 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
595 if (err < 0)
596 return err;
597
598 ndm = nlmsg_data(nlh);
599 if (ndm->ndm_ifindex == 0) {
600 pr_info("bridge: RTM_NEWNEIGH with invalid ifindex\n");
601 return -EINVAL;
602 }
603
604 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
605 if (dev == NULL) {
606 pr_info("bridge: RTM_NEWNEIGH with unknown ifindex\n");
607 return -ENODEV;
608 }
609
610 if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
611 pr_info("bridge: RTM_NEWNEIGH with invalid address\n");
612 return -EINVAL;
613 }
614
615 addr = nla_data(tb[NDA_LLADDR]);
616 if (!is_valid_ether_addr(addr)) {
617 pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
618 return -EINVAL;
619 }
620
621 p = br_port_get_rtnl(dev);
622 if (p == NULL) {
623 pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
624 dev->name);
625 return -EINVAL;
626 }
627
628 spin_lock_bh(&p->br->hash_lock);
629 err = fdb_add_entry(p, addr, ndm->ndm_state);
630 spin_unlock_bh(&p->br->hash_lock);
631
632 return err;
633}
634
635static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
636{
637 struct net_bridge *br = p->br;
638 struct hlist_head *head = &br->hash[br_mac_hash(addr)];
639 struct net_bridge_fdb_entry *fdb;
640
641 fdb = fdb_find(head, addr);
642 if (!fdb)
643 return -ENOENT;
644
645 fdb_delete(fdb);
646 return 0;
647}
648
649/* Remove neighbor entry with RTM_DELNEIGH */
650int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
651{
652 struct net *net = sock_net(skb->sk);
653 struct ndmsg *ndm;
654 struct net_bridge_port *p;
655 struct nlattr *llattr;
656 const __u8 *addr;
657 struct net_device *dev;
658 int err;
659
660 ASSERT_RTNL();
661 if (nlmsg_len(nlh) < sizeof(*ndm))
662 return -EINVAL;
663
664 ndm = nlmsg_data(nlh);
665 if (ndm->ndm_ifindex == 0) {
666 pr_info("bridge: RTM_DELNEIGH with invalid ifindex\n");
667 return -EINVAL;
668 }
669
670 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
671 if (dev == NULL) {
672 pr_info("bridge: RTM_DELNEIGH with unknown ifindex\n");
673 return -ENODEV;
674 }
675
676 llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
677 if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
678 pr_info("bridge: RTM_DELNEIGH with invalid address\n");
679 return -EINVAL;
680 }
681
682 addr = nla_data(llattr);
683
684 p = br_port_get_rtnl(dev);
685 if (p == NULL) {
686 pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
687 dev->name);
688 return -EINVAL;
689 }
690
691 spin_lock_bh(&p->br->hash_lock);
692 err = fdb_delete_by_addr(p, addr);
693 spin_unlock_bh(&p->br->hash_lock);
694
695 return err;
696}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 718b60366dfe..7f5379c593d9 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -175,56 +175,6 @@ static void del_br(struct net_bridge *br, struct list_head *head)
175 unregister_netdevice_queue(br->dev, head); 175 unregister_netdevice_queue(br->dev, head);
176} 176}
177 177
178static struct net_device *new_bridge_dev(struct net *net, const char *name)
179{
180 struct net_bridge *br;
181 struct net_device *dev;
182
183 dev = alloc_netdev(sizeof(struct net_bridge), name,
184 br_dev_setup);
185
186 if (!dev)
187 return NULL;
188 dev_net_set(dev, net);
189
190 br = netdev_priv(dev);
191 br->dev = dev;
192
193 br->stats = alloc_percpu(struct br_cpu_netstats);
194 if (!br->stats) {
195 free_netdev(dev);
196 return NULL;
197 }
198
199 spin_lock_init(&br->lock);
200 INIT_LIST_HEAD(&br->port_list);
201 spin_lock_init(&br->hash_lock);
202
203 br->bridge_id.prio[0] = 0x80;
204 br->bridge_id.prio[1] = 0x00;
205
206 memcpy(br->group_addr, br_group_address, ETH_ALEN);
207
208 br->feature_mask = dev->features;
209 br->stp_enabled = BR_NO_STP;
210 br->designated_root = br->bridge_id;
211 br->root_path_cost = 0;
212 br->root_port = 0;
213 br->bridge_max_age = br->max_age = 20 * HZ;
214 br->bridge_hello_time = br->hello_time = 2 * HZ;
215 br->bridge_forward_delay = br->forward_delay = 15 * HZ;
216 br->topology_change = 0;
217 br->topology_change_detected = 0;
218 br->ageing_time = 300 * HZ;
219
220 br_netfilter_rtable_init(br);
221
222 br_stp_timer_init(br);
223 br_multicast_init(br);
224
225 return dev;
226}
227
228/* find an available port number */ 178/* find an available port number */
229static int find_portno(struct net_bridge *br) 179static int find_portno(struct net_bridge *br)
230{ 180{
@@ -277,42 +227,19 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
277 return p; 227 return p;
278} 228}
279 229
280static struct device_type br_type = {
281 .name = "bridge",
282};
283
284int br_add_bridge(struct net *net, const char *name) 230int br_add_bridge(struct net *net, const char *name)
285{ 231{
286 struct net_device *dev; 232 struct net_device *dev;
287 int ret;
288 233
289 dev = new_bridge_dev(net, name); 234 dev = alloc_netdev(sizeof(struct net_bridge), name,
235 br_dev_setup);
236
290 if (!dev) 237 if (!dev)
291 return -ENOMEM; 238 return -ENOMEM;
292 239
293 rtnl_lock(); 240 dev_net_set(dev, net);
294 if (strchr(dev->name, '%')) {
295 ret = dev_alloc_name(dev, dev->name);
296 if (ret < 0)
297 goto out_free;
298 }
299
300 SET_NETDEV_DEVTYPE(dev, &br_type);
301
302 ret = register_netdevice(dev);
303 if (ret)
304 goto out_free;
305
306 ret = br_sysfs_addbr(dev);
307 if (ret)
308 unregister_netdevice(dev);
309 out:
310 rtnl_unlock();
311 return ret;
312 241
313out_free: 242 return register_netdev(dev);
314 free_netdev(dev);
315 goto out;
316} 243}
317 244
318int br_del_bridge(struct net *net, const char *name) 245int br_del_bridge(struct net *net, const char *name)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index e2160792e1bc..785932d7ad32 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -98,9 +98,10 @@ int br_handle_frame_finish(struct sk_buff *skb)
98 } 98 }
99 99
100 if (skb) { 100 if (skb) {
101 if (dst) 101 if (dst) {
102 dst->used = jiffies;
102 br_forward(dst->dst, skb, skb2); 103 br_forward(dst->dst, skb, skb2);
103 else 104 } else
104 br_flood_forward(br, skb, skb2); 105 br_flood_forward(br, skb, skb2);
105 } 106 }
106 107
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 3d9fca0e3370..7222fe1d5460 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -181,40 +181,19 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
181 if (!capable(CAP_NET_ADMIN)) 181 if (!capable(CAP_NET_ADMIN))
182 return -EPERM; 182 return -EPERM;
183 183
184 spin_lock_bh(&br->lock); 184 return br_set_forward_delay(br, args[1]);
185 br->bridge_forward_delay = clock_t_to_jiffies(args[1]);
186 if (br_is_root_bridge(br))
187 br->forward_delay = br->bridge_forward_delay;
188 spin_unlock_bh(&br->lock);
189 return 0;
190 185
191 case BRCTL_SET_BRIDGE_HELLO_TIME: 186 case BRCTL_SET_BRIDGE_HELLO_TIME:
192 {
193 unsigned long t = clock_t_to_jiffies(args[1]);
194 if (!capable(CAP_NET_ADMIN)) 187 if (!capable(CAP_NET_ADMIN))
195 return -EPERM; 188 return -EPERM;
196 189
197 if (t < HZ) 190 return br_set_hello_time(br, args[1]);
198 return -EINVAL;
199
200 spin_lock_bh(&br->lock);
201 br->bridge_hello_time = t;
202 if (br_is_root_bridge(br))
203 br->hello_time = br->bridge_hello_time;
204 spin_unlock_bh(&br->lock);
205 return 0;
206 }
207 191
208 case BRCTL_SET_BRIDGE_MAX_AGE: 192 case BRCTL_SET_BRIDGE_MAX_AGE:
209 if (!capable(CAP_NET_ADMIN)) 193 if (!capable(CAP_NET_ADMIN))
210 return -EPERM; 194 return -EPERM;
211 195
212 spin_lock_bh(&br->lock); 196 return br_set_max_age(br, args[1]);
213 br->bridge_max_age = clock_t_to_jiffies(args[1]);
214 if (br_is_root_bridge(br))
215 br->max_age = br->bridge_max_age;
216 spin_unlock_bh(&br->lock);
217 return 0;
218 197
219 case BRCTL_SET_AGEING_TIME: 198 case BRCTL_SET_AGEING_TIME:
220 if (!capable(CAP_NET_ADMIN)) 199 if (!capable(CAP_NET_ADMIN))
@@ -275,19 +254,16 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
275 case BRCTL_SET_PORT_PRIORITY: 254 case BRCTL_SET_PORT_PRIORITY:
276 { 255 {
277 struct net_bridge_port *p; 256 struct net_bridge_port *p;
278 int ret = 0; 257 int ret;
279 258
280 if (!capable(CAP_NET_ADMIN)) 259 if (!capable(CAP_NET_ADMIN))
281 return -EPERM; 260 return -EPERM;
282 261
283 if (args[2] >= (1<<(16-BR_PORT_BITS)))
284 return -ERANGE;
285
286 spin_lock_bh(&br->lock); 262 spin_lock_bh(&br->lock);
287 if ((p = br_get_port(br, args[1])) == NULL) 263 if ((p = br_get_port(br, args[1])) == NULL)
288 ret = -EINVAL; 264 ret = -EINVAL;
289 else 265 else
290 br_stp_set_port_priority(p, args[2]); 266 ret = br_stp_set_port_priority(p, args[2]);
291 spin_unlock_bh(&br->lock); 267 spin_unlock_bh(&br->lock);
292 return ret; 268 return ret;
293 } 269 }
@@ -295,15 +271,17 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
295 case BRCTL_SET_PATH_COST: 271 case BRCTL_SET_PATH_COST:
296 { 272 {
297 struct net_bridge_port *p; 273 struct net_bridge_port *p;
298 int ret = 0; 274 int ret;
299 275
300 if (!capable(CAP_NET_ADMIN)) 276 if (!capable(CAP_NET_ADMIN))
301 return -EPERM; 277 return -EPERM;
302 278
279 spin_lock_bh(&br->lock);
303 if ((p = br_get_port(br, args[1])) == NULL) 280 if ((p = br_get_port(br, args[1])) == NULL)
304 ret = -EINVAL; 281 ret = -EINVAL;
305 else 282 else
306 br_stp_set_path_cost(p, args[2]); 283 ret = br_stp_set_path_cost(p, args[2]);
284 spin_unlock_bh(&br->lock);
307 285
308 return ret; 286 return ret;
309 } 287 }
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index f8bf4c7f842c..134a2ff6b98b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -12,9 +12,11 @@
12 12
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/etherdevice.h>
15#include <net/rtnetlink.h> 16#include <net/rtnetlink.h>
16#include <net/net_namespace.h> 17#include <net/net_namespace.h>
17#include <net/sock.h> 18#include <net/sock.h>
19
18#include "br_private.h" 20#include "br_private.h"
19 21
20static inline size_t br_nlmsg_size(void) 22static inline size_t br_nlmsg_size(void)
@@ -188,20 +190,61 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
188 return 0; 190 return 0;
189} 191}
190 192
193static int br_validate(struct nlattr *tb[], struct nlattr *data[])
194{
195 if (tb[IFLA_ADDRESS]) {
196 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
197 return -EINVAL;
198 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
199 return -EADDRNOTAVAIL;
200 }
201
202 return 0;
203}
204
205static struct rtnl_link_ops br_link_ops __read_mostly = {
206 .kind = "bridge",
207 .priv_size = sizeof(struct net_bridge),
208 .setup = br_dev_setup,
209 .validate = br_validate,
210};
191 211
192int __init br_netlink_init(void) 212int __init br_netlink_init(void)
193{ 213{
194 if (__rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo)) 214 int err;
195 return -ENOBUFS;
196 215
197 /* Only the first call to __rtnl_register can fail */ 216 err = rtnl_link_register(&br_link_ops);
198 __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL); 217 if (err < 0)
218 goto err1;
219
220 err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo);
221 if (err)
222 goto err2;
223 err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL);
224 if (err)
225 goto err3;
226 err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, br_fdb_add, NULL);
227 if (err)
228 goto err3;
229 err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH, br_fdb_delete, NULL);
230 if (err)
231 goto err3;
232 err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, br_fdb_dump);
233 if (err)
234 goto err3;
199 235
200 return 0; 236 return 0;
237
238err3:
239 rtnl_unregister_all(PF_BRIDGE);
240err2:
241 rtnl_link_unregister(&br_link_ops);
242err1:
243 return err;
201} 244}
202 245
203void __exit br_netlink_fini(void) 246void __exit br_netlink_fini(void)
204{ 247{
248 rtnl_link_unregister(&br_link_ops);
205 rtnl_unregister_all(PF_BRIDGE); 249 rtnl_unregister_all(PF_BRIDGE);
206} 250}
207
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 7d337c9b6082..7a03bb975375 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -36,6 +36,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
36 struct net_bridge *br; 36 struct net_bridge *br;
37 int err; 37 int err;
38 38
39 /* register of bridge completed, add sysfs entries */
40 if ((dev->priv_flags && IFF_EBRIDGE) && event == NETDEV_REGISTER) {
41 br_sysfs_addbr(dev);
42 return NOTIFY_DONE;
43 }
44
39 /* not a port of a bridge */ 45 /* not a port of a bridge */
40 p = br_port_get_rtnl(dev); 46 p = br_port_get_rtnl(dev);
41 if (!p) 47 if (!p)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 387013d33745..e2a40343aa09 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -64,7 +64,8 @@ struct net_bridge_fdb_entry
64 struct net_bridge_port *dst; 64 struct net_bridge_port *dst;
65 65
66 struct rcu_head rcu; 66 struct rcu_head rcu;
67 unsigned long ageing_timer; 67 unsigned long updated;
68 unsigned long used;
68 mac_addr addr; 69 mac_addr addr;
69 unsigned char is_local; 70 unsigned char is_local;
70 unsigned char is_static; 71 unsigned char is_static;
@@ -353,6 +354,9 @@ extern int br_fdb_insert(struct net_bridge *br,
353extern void br_fdb_update(struct net_bridge *br, 354extern void br_fdb_update(struct net_bridge *br,
354 struct net_bridge_port *source, 355 struct net_bridge_port *source,
355 const unsigned char *addr); 356 const unsigned char *addr);
357extern int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb);
358extern int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
359extern int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
356 360
357/* br_forward.c */ 361/* br_forward.c */
358extern void br_deliver(const struct net_bridge_port *to, 362extern void br_deliver(const struct net_bridge_port *to,
@@ -491,6 +495,11 @@ extern struct net_bridge_port *br_get_port(struct net_bridge *br,
491extern void br_init_port(struct net_bridge_port *p); 495extern void br_init_port(struct net_bridge_port *p);
492extern void br_become_designated_port(struct net_bridge_port *p); 496extern void br_become_designated_port(struct net_bridge_port *p);
493 497
498extern int br_set_forward_delay(struct net_bridge *br, unsigned long x);
499extern int br_set_hello_time(struct net_bridge *br, unsigned long x);
500extern int br_set_max_age(struct net_bridge *br, unsigned long x);
501
502
494/* br_stp_if.c */ 503/* br_stp_if.c */
495extern void br_stp_enable_bridge(struct net_bridge *br); 504extern void br_stp_enable_bridge(struct net_bridge *br);
496extern void br_stp_disable_bridge(struct net_bridge *br); 505extern void br_stp_disable_bridge(struct net_bridge *br);
@@ -501,10 +510,10 @@ extern bool br_stp_recalculate_bridge_id(struct net_bridge *br);
501extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); 510extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
502extern void br_stp_set_bridge_priority(struct net_bridge *br, 511extern void br_stp_set_bridge_priority(struct net_bridge *br,
503 u16 newprio); 512 u16 newprio);
504extern void br_stp_set_port_priority(struct net_bridge_port *p, 513extern int br_stp_set_port_priority(struct net_bridge_port *p,
505 u8 newprio); 514 unsigned long newprio);
506extern void br_stp_set_path_cost(struct net_bridge_port *p, 515extern int br_stp_set_path_cost(struct net_bridge_port *p,
507 u32 path_cost); 516 unsigned long path_cost);
508extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); 517extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id);
509 518
510/* br_stp_bpdu.c */ 519/* br_stp_bpdu.c */
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index 8b650f7fbfa0..642ef47a867e 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h
@@ -16,6 +16,19 @@
16#define BPDU_TYPE_CONFIG 0 16#define BPDU_TYPE_CONFIG 0
17#define BPDU_TYPE_TCN 0x80 17#define BPDU_TYPE_TCN 0x80
18 18
19/* IEEE 802.1D-1998 timer values */
20#define BR_MIN_HELLO_TIME (1*HZ)
21#define BR_MAX_HELLO_TIME (10*HZ)
22
23#define BR_MIN_FORWARD_DELAY (2*HZ)
24#define BR_MAX_FORWARD_DELAY (30*HZ)
25
26#define BR_MIN_MAX_AGE (6*HZ)
27#define BR_MAX_MAX_AGE (40*HZ)
28
29#define BR_MIN_PATH_COST 1
30#define BR_MAX_PATH_COST 65535
31
19struct br_config_bpdu 32struct br_config_bpdu
20{ 33{
21 unsigned topology_change:1; 34 unsigned topology_change:1;
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 7370d14f634d..bb4383e84de9 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -484,3 +484,51 @@ void br_received_tcn_bpdu(struct net_bridge_port *p)
484 br_topology_change_acknowledge(p); 484 br_topology_change_acknowledge(p);
485 } 485 }
486} 486}
487
488/* Change bridge STP parameter */
489int br_set_hello_time(struct net_bridge *br, unsigned long val)
490{
491 unsigned long t = clock_t_to_jiffies(val);
492
493 if (t < BR_MIN_HELLO_TIME || t > BR_MAX_HELLO_TIME)
494 return -ERANGE;
495
496 spin_lock_bh(&br->lock);
497 br->bridge_hello_time = t;
498 if (br_is_root_bridge(br))
499 br->hello_time = br->bridge_hello_time;
500 spin_unlock_bh(&br->lock);
501 return 0;
502}
503
504int br_set_max_age(struct net_bridge *br, unsigned long val)
505{
506 unsigned long t = clock_t_to_jiffies(val);
507
508 if (t < BR_MIN_MAX_AGE || t > BR_MAX_MAX_AGE)
509 return -ERANGE;
510
511 spin_lock_bh(&br->lock);
512 br->bridge_max_age = t;
513 if (br_is_root_bridge(br))
514 br->max_age = br->bridge_max_age;
515 spin_unlock_bh(&br->lock);
516 return 0;
517
518}
519
520int br_set_forward_delay(struct net_bridge *br, unsigned long val)
521{
522 unsigned long t = clock_t_to_jiffies(val);
523
524 if (br->stp_enabled != BR_NO_STP &&
525 (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY))
526 return -ERANGE;
527
528 spin_lock_bh(&br->lock);
529 br->bridge_forward_delay = t;
530 if (br_is_root_bridge(br))
531 br->forward_delay = br->bridge_forward_delay;
532 spin_unlock_bh(&br->lock);
533 return 0;
534}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 9b61d09de9b9..6f615b8192f4 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -20,7 +20,7 @@
20 20
21 21
22/* Port id is composed of priority and port number. 22/* Port id is composed of priority and port number.
23 * NB: least significant bits of priority are dropped to 23 * NB: some bits of priority are dropped to
24 * make room for more ports. 24 * make room for more ports.
25 */ 25 */
26static inline port_id br_make_port_id(__u8 priority, __u16 port_no) 26static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
@@ -29,6 +29,8 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
29 | (port_no & ((1<<BR_PORT_BITS)-1)); 29 | (port_no & ((1<<BR_PORT_BITS)-1));
30} 30}
31 31
32#define BR_MAX_PORT_PRIORITY ((u16)~0 >> BR_PORT_BITS)
33
32/* called under bridge lock */ 34/* called under bridge lock */
33void br_init_port(struct net_bridge_port *p) 35void br_init_port(struct net_bridge_port *p)
34{ 36{
@@ -255,10 +257,14 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio)
255} 257}
256 258
257/* called under bridge lock */ 259/* called under bridge lock */
258void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) 260int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio)
259{ 261{
260 port_id new_port_id = br_make_port_id(newprio, p->port_no); 262 port_id new_port_id;
263
264 if (newprio > BR_MAX_PORT_PRIORITY)
265 return -ERANGE;
261 266
267 new_port_id = br_make_port_id(newprio, p->port_no);
262 if (br_is_designated_port(p)) 268 if (br_is_designated_port(p))
263 p->designated_port = new_port_id; 269 p->designated_port = new_port_id;
264 270
@@ -269,14 +275,21 @@ void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio)
269 br_become_designated_port(p); 275 br_become_designated_port(p);
270 br_port_state_selection(p->br); 276 br_port_state_selection(p->br);
271 } 277 }
278
279 return 0;
272} 280}
273 281
274/* called under bridge lock */ 282/* called under bridge lock */
275void br_stp_set_path_cost(struct net_bridge_port *p, u32 path_cost) 283int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost)
276{ 284{
285 if (path_cost < BR_MIN_PATH_COST ||
286 path_cost > BR_MAX_PATH_COST)
287 return -ERANGE;
288
277 p->path_cost = path_cost; 289 p->path_cost = path_cost;
278 br_configuration_update(p->br); 290 br_configuration_update(p->br);
279 br_port_state_selection(p->br); 291 br_port_state_selection(p->br);
292 return 0;
280} 293}
281 294
282ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id) 295ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id)
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 5c1e5559ebba..68b893ea8c3a 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -43,9 +43,7 @@ static ssize_t store_bridge_parm(struct device *d,
43 if (endp == buf) 43 if (endp == buf)
44 return -EINVAL; 44 return -EINVAL;
45 45
46 spin_lock_bh(&br->lock);
47 err = (*set)(br, val); 46 err = (*set)(br, val);
48 spin_unlock_bh(&br->lock);
49 return err ? err : len; 47 return err ? err : len;
50} 48}
51 49
@@ -57,20 +55,11 @@ static ssize_t show_forward_delay(struct device *d,
57 return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); 55 return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay));
58} 56}
59 57
60static int set_forward_delay(struct net_bridge *br, unsigned long val)
61{
62 unsigned long delay = clock_t_to_jiffies(val);
63 br->forward_delay = delay;
64 if (br_is_root_bridge(br))
65 br->bridge_forward_delay = delay;
66 return 0;
67}
68
69static ssize_t store_forward_delay(struct device *d, 58static ssize_t store_forward_delay(struct device *d,
70 struct device_attribute *attr, 59 struct device_attribute *attr,
71 const char *buf, size_t len) 60 const char *buf, size_t len)
72{ 61{
73 return store_bridge_parm(d, buf, len, set_forward_delay); 62 return store_bridge_parm(d, buf, len, br_set_forward_delay);
74} 63}
75static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, 64static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR,
76 show_forward_delay, store_forward_delay); 65 show_forward_delay, store_forward_delay);
@@ -82,24 +71,11 @@ static ssize_t show_hello_time(struct device *d, struct device_attribute *attr,
82 jiffies_to_clock_t(to_bridge(d)->hello_time)); 71 jiffies_to_clock_t(to_bridge(d)->hello_time));
83} 72}
84 73
85static int set_hello_time(struct net_bridge *br, unsigned long val)
86{
87 unsigned long t = clock_t_to_jiffies(val);
88
89 if (t < HZ)
90 return -EINVAL;
91
92 br->hello_time = t;
93 if (br_is_root_bridge(br))
94 br->bridge_hello_time = t;
95 return 0;
96}
97
98static ssize_t store_hello_time(struct device *d, 74static ssize_t store_hello_time(struct device *d,
99 struct device_attribute *attr, const char *buf, 75 struct device_attribute *attr, const char *buf,
100 size_t len) 76 size_t len)
101{ 77{
102 return store_bridge_parm(d, buf, len, set_hello_time); 78 return store_bridge_parm(d, buf, len, br_set_hello_time);
103} 79}
104static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, 80static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time,
105 store_hello_time); 81 store_hello_time);
@@ -111,19 +87,10 @@ static ssize_t show_max_age(struct device *d, struct device_attribute *attr,
111 jiffies_to_clock_t(to_bridge(d)->max_age)); 87 jiffies_to_clock_t(to_bridge(d)->max_age));
112} 88}
113 89
114static int set_max_age(struct net_bridge *br, unsigned long val)
115{
116 unsigned long t = clock_t_to_jiffies(val);
117 br->max_age = t;
118 if (br_is_root_bridge(br))
119 br->bridge_max_age = t;
120 return 0;
121}
122
123static ssize_t store_max_age(struct device *d, struct device_attribute *attr, 90static ssize_t store_max_age(struct device *d, struct device_attribute *attr,
124 const char *buf, size_t len) 91 const char *buf, size_t len)
125{ 92{
126 return store_bridge_parm(d, buf, len, set_max_age); 93 return store_bridge_parm(d, buf, len, br_set_max_age);
127} 94}
128static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); 95static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age);
129 96
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index fd5799c9bc8d..6229b62749e8 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -23,7 +23,7 @@
23struct brport_attribute { 23struct brport_attribute {
24 struct attribute attr; 24 struct attribute attr;
25 ssize_t (*show)(struct net_bridge_port *, char *); 25 ssize_t (*show)(struct net_bridge_port *, char *);
26 ssize_t (*store)(struct net_bridge_port *, unsigned long); 26 int (*store)(struct net_bridge_port *, unsigned long);
27}; 27};
28 28
29#define BRPORT_ATTR(_name,_mode,_show,_store) \ 29#define BRPORT_ATTR(_name,_mode,_show,_store) \
@@ -38,27 +38,17 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
38{ 38{
39 return sprintf(buf, "%d\n", p->path_cost); 39 return sprintf(buf, "%d\n", p->path_cost);
40} 40}
41static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v) 41
42{
43 br_stp_set_path_cost(p, v);
44 return 0;
45}
46static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, 42static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
47 show_path_cost, store_path_cost); 43 show_path_cost, br_stp_set_path_cost);
48 44
49static ssize_t show_priority(struct net_bridge_port *p, char *buf) 45static ssize_t show_priority(struct net_bridge_port *p, char *buf)
50{ 46{
51 return sprintf(buf, "%d\n", p->priority); 47 return sprintf(buf, "%d\n", p->priority);
52} 48}
53static ssize_t store_priority(struct net_bridge_port *p, unsigned long v) 49
54{
55 if (v >= (1<<(16-BR_PORT_BITS)))
56 return -ERANGE;
57 br_stp_set_port_priority(p, v);
58 return 0;
59}
60static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, 50static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
61 show_priority, store_priority); 51 show_priority, br_stp_set_port_priority);
62 52
63static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) 53static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
64{ 54{
@@ -136,7 +126,7 @@ static ssize_t show_hold_timer(struct net_bridge_port *p,
136} 126}
137static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); 127static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
138 128
139static ssize_t store_flush(struct net_bridge_port *p, unsigned long v) 129static int store_flush(struct net_bridge_port *p, unsigned long v)
140{ 130{
141 br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry 131 br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry
142 return 0; 132 return 0;
@@ -148,7 +138,7 @@ static ssize_t show_hairpin_mode(struct net_bridge_port *p, char *buf)
148 int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0; 138 int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0;
149 return sprintf(buf, "%d\n", hairpin_mode); 139 return sprintf(buf, "%d\n", hairpin_mode);
150} 140}
151static ssize_t store_hairpin_mode(struct net_bridge_port *p, unsigned long v) 141static int store_hairpin_mode(struct net_bridge_port *p, unsigned long v)
152{ 142{
153 if (v) 143 if (v)
154 p->flags |= BR_HAIRPIN_MODE; 144 p->flags |= BR_HAIRPIN_MODE;
@@ -165,7 +155,7 @@ static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
165 return sprintf(buf, "%d\n", p->multicast_router); 155 return sprintf(buf, "%d\n", p->multicast_router);
166} 156}
167 157
168static ssize_t store_multicast_router(struct net_bridge_port *p, 158static int store_multicast_router(struct net_bridge_port *p,
169 unsigned long v) 159 unsigned long v)
170{ 160{
171 return br_multicast_set_port_router(p, v); 161 return br_multicast_set_port_router(p, v);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 733d66f1b05a..a8dcaa49675a 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -85,7 +85,7 @@ static struct kmem_cache *rcv_cache __read_mostly;
85 85
86/* table of registered CAN protocols */ 86/* table of registered CAN protocols */
87static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; 87static struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
88static DEFINE_SPINLOCK(proto_tab_lock); 88static DEFINE_MUTEX(proto_tab_lock);
89 89
90struct timer_list can_stattimer; /* timer for statistics update */ 90struct timer_list can_stattimer; /* timer for statistics update */
91struct s_stats can_stats; /* packet statistics */ 91struct s_stats can_stats; /* packet statistics */
@@ -115,6 +115,19 @@ static void can_sock_destruct(struct sock *sk)
115 skb_queue_purge(&sk->sk_receive_queue); 115 skb_queue_purge(&sk->sk_receive_queue);
116} 116}
117 117
118static struct can_proto *can_try_module_get(int protocol)
119{
120 struct can_proto *cp;
121
122 rcu_read_lock();
123 cp = rcu_dereference(proto_tab[protocol]);
124 if (cp && !try_module_get(cp->prot->owner))
125 cp = NULL;
126 rcu_read_unlock();
127
128 return cp;
129}
130
118static int can_create(struct net *net, struct socket *sock, int protocol, 131static int can_create(struct net *net, struct socket *sock, int protocol,
119 int kern) 132 int kern)
120{ 133{
@@ -130,9 +143,12 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
130 if (!net_eq(net, &init_net)) 143 if (!net_eq(net, &init_net))
131 return -EAFNOSUPPORT; 144 return -EAFNOSUPPORT;
132 145
146 cp = can_try_module_get(protocol);
147
133#ifdef CONFIG_MODULES 148#ifdef CONFIG_MODULES
134 /* try to load protocol module kernel is modular */ 149 if (!cp) {
135 if (!proto_tab[protocol]) { 150 /* try to load protocol module if kernel is modular */
151
136 err = request_module("can-proto-%d", protocol); 152 err = request_module("can-proto-%d", protocol);
137 153
138 /* 154 /*
@@ -143,22 +159,18 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
143 if (err && printk_ratelimit()) 159 if (err && printk_ratelimit())
144 printk(KERN_ERR "can: request_module " 160 printk(KERN_ERR "can: request_module "
145 "(can-proto-%d) failed.\n", protocol); 161 "(can-proto-%d) failed.\n", protocol);
162
163 cp = can_try_module_get(protocol);
146 } 164 }
147#endif 165#endif
148 166
149 spin_lock(&proto_tab_lock);
150 cp = proto_tab[protocol];
151 if (cp && !try_module_get(cp->prot->owner))
152 cp = NULL;
153 spin_unlock(&proto_tab_lock);
154
155 /* check for available protocol and correct usage */ 167 /* check for available protocol and correct usage */
156 168
157 if (!cp) 169 if (!cp)
158 return -EPROTONOSUPPORT; 170 return -EPROTONOSUPPORT;
159 171
160 if (cp->type != sock->type) { 172 if (cp->type != sock->type) {
161 err = -EPROTONOSUPPORT; 173 err = -EPROTOTYPE;
162 goto errout; 174 goto errout;
163 } 175 }
164 176
@@ -694,15 +706,16 @@ int can_proto_register(struct can_proto *cp)
694 if (err < 0) 706 if (err < 0)
695 return err; 707 return err;
696 708
697 spin_lock(&proto_tab_lock); 709 mutex_lock(&proto_tab_lock);
710
698 if (proto_tab[proto]) { 711 if (proto_tab[proto]) {
699 printk(KERN_ERR "can: protocol %d already registered\n", 712 printk(KERN_ERR "can: protocol %d already registered\n",
700 proto); 713 proto);
701 err = -EBUSY; 714 err = -EBUSY;
702 } else 715 } else
703 proto_tab[proto] = cp; 716 rcu_assign_pointer(proto_tab[proto], cp);
704 717
705 spin_unlock(&proto_tab_lock); 718 mutex_unlock(&proto_tab_lock);
706 719
707 if (err < 0) 720 if (err < 0)
708 proto_unregister(cp->prot); 721 proto_unregister(cp->prot);
@@ -719,13 +732,12 @@ void can_proto_unregister(struct can_proto *cp)
719{ 732{
720 int proto = cp->protocol; 733 int proto = cp->protocol;
721 734
722 spin_lock(&proto_tab_lock); 735 mutex_lock(&proto_tab_lock);
723 if (!proto_tab[proto]) { 736 BUG_ON(proto_tab[proto] != cp);
724 printk(KERN_ERR "BUG: can: protocol %d is not registered\n", 737 rcu_assign_pointer(proto_tab[proto], NULL);
725 proto); 738 mutex_unlock(&proto_tab_lock);
726 } 739
727 proto_tab[proto] = NULL; 740 synchronize_rcu();
728 spin_unlock(&proto_tab_lock);
729 741
730 proto_unregister(cp->prot); 742 proto_unregister(cp->prot);
731} 743}
diff --git a/net/core/dev.c b/net/core/dev.c
index 956d3b006e8b..95897ff3a76f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5236,7 +5236,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
5236} 5236}
5237EXPORT_SYMBOL(netdev_fix_features); 5237EXPORT_SYMBOL(netdev_fix_features);
5238 5238
5239void netdev_update_features(struct net_device *dev) 5239int __netdev_update_features(struct net_device *dev)
5240{ 5240{
5241 u32 features; 5241 u32 features;
5242 int err = 0; 5242 int err = 0;
@@ -5250,7 +5250,7 @@ void netdev_update_features(struct net_device *dev)
5250 features = netdev_fix_features(dev, features); 5250 features = netdev_fix_features(dev, features);
5251 5251
5252 if (dev->features == features) 5252 if (dev->features == features)
5253 return; 5253 return 0;
5254 5254
5255 netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", 5255 netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n",
5256 dev->features, features); 5256 dev->features, features);
@@ -5258,12 +5258,23 @@ void netdev_update_features(struct net_device *dev)
5258 if (dev->netdev_ops->ndo_set_features) 5258 if (dev->netdev_ops->ndo_set_features)
5259 err = dev->netdev_ops->ndo_set_features(dev, features); 5259 err = dev->netdev_ops->ndo_set_features(dev, features);
5260 5260
5261 if (!err) 5261 if (unlikely(err < 0)) {
5262 dev->features = features;
5263 else if (err < 0)
5264 netdev_err(dev, 5262 netdev_err(dev,
5265 "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", 5263 "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
5266 err, features, dev->features); 5264 err, features, dev->features);
5265 return -1;
5266 }
5267
5268 if (!err)
5269 dev->features = features;
5270
5271 return 1;
5272}
5273
5274void netdev_update_features(struct net_device *dev)
5275{
5276 if (__netdev_update_features(dev))
5277 netdev_features_change(dev);
5267} 5278}
5268EXPORT_SYMBOL(netdev_update_features); 5279EXPORT_SYMBOL(netdev_update_features);
5269 5280
@@ -5414,6 +5425,14 @@ int register_netdevice(struct net_device *dev)
5414 dev->features &= ~NETIF_F_GSO; 5425 dev->features &= ~NETIF_F_GSO;
5415 } 5426 }
5416 5427
5428 /* Turn on no cache copy if HW is doing checksum */
5429 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5430 if ((dev->features & NETIF_F_ALL_CSUM) &&
5431 !(dev->features & NETIF_F_NO_CSUM)) {
5432 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5433 dev->features |= NETIF_F_NOCACHE_COPY;
5434 }
5435
5417 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, 5436 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5418 * vlan_dev_init() will do the dev->features check, so these features 5437 * vlan_dev_init() will do the dev->features check, so these features
5419 * are enabled only if supported by underlying device. 5438 * are enabled only if supported by underlying device.
@@ -5430,7 +5449,7 @@ int register_netdevice(struct net_device *dev)
5430 goto err_uninit; 5449 goto err_uninit;
5431 dev->reg_state = NETREG_REGISTERED; 5450 dev->reg_state = NETREG_REGISTERED;
5432 5451
5433 netdev_update_features(dev); 5452 __netdev_update_features(dev);
5434 5453
5435 /* 5454 /*
5436 * Default initial state at registry is that the 5455 * Default initial state at registry is that the
@@ -6171,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask)
6171 } 6190 }
6172 } 6191 }
6173 6192
6193 /* If device can't no cache copy, don't do for all */
6194 if (!(one & NETIF_F_NOCACHE_COPY))
6195 all &= ~NETIF_F_NOCACHE_COPY;
6196
6174 one |= NETIF_F_ALL_CSUM; 6197 one |= NETIF_F_ALL_CSUM;
6175 6198
6176 one |= all & NETIF_F_ONE_FOR_ALL; 6199 one |= all & NETIF_F_ONE_FOR_ALL;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 74ead9eca126..704e176ad3a9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -21,6 +21,8 @@
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/rtnetlink.h>
25#include <linux/sched.h>
24 26
25/* 27/*
26 * Some useful ethtool_ops methods that're device independent. 28 * Some useful ethtool_ops methods that're device independent.
@@ -317,7 +319,7 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
317 319
318 dev->wanted_features &= ~features[0].valid; 320 dev->wanted_features &= ~features[0].valid;
319 dev->wanted_features |= features[0].valid & features[0].requested; 321 dev->wanted_features |= features[0].valid & features[0].requested;
320 netdev_update_features(dev); 322 __netdev_update_features(dev);
321 323
322 if ((dev->wanted_features ^ dev->features) & features[0].valid) 324 if ((dev->wanted_features ^ dev->features) & features[0].valid)
323 ret |= ETHTOOL_F_WISH; 325 ret |= ETHTOOL_F_WISH;
@@ -359,7 +361,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS
359 /* NETIF_F_NTUPLE */ "rx-ntuple-filter", 361 /* NETIF_F_NTUPLE */ "rx-ntuple-filter",
360 /* NETIF_F_RXHASH */ "rx-hashing", 362 /* NETIF_F_RXHASH */ "rx-hashing",
361 /* NETIF_F_RXCSUM */ "rx-checksum", 363 /* NETIF_F_RXCSUM */ "rx-checksum",
362 "", 364 /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy"
363 "", 365 "",
364}; 366};
365 367
@@ -499,7 +501,7 @@ static int ethtool_set_one_feature(struct net_device *dev,
499 else 501 else
500 dev->wanted_features &= ~mask; 502 dev->wanted_features &= ~mask;
501 503
502 netdev_update_features(dev); 504 __netdev_update_features(dev);
503 return 0; 505 return 0;
504 } 506 }
505 507
@@ -551,7 +553,7 @@ int __ethtool_set_flags(struct net_device *dev, u32 data)
551 dev->wanted_features = 553 dev->wanted_features =
552 (dev->wanted_features & ~changed) | data; 554 (dev->wanted_features & ~changed) | data;
553 555
554 netdev_update_features(dev); 556 __netdev_update_features(dev);
555 557
556 return 0; 558 return 0;
557} 559}
@@ -908,6 +910,9 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
908 struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; 910 struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
909 int ret; 911 int ret;
910 912
913 if (!ops->set_rx_ntuple)
914 return -EOPNOTSUPP;
915
911 if (!(dev->features & NETIF_F_NTUPLE)) 916 if (!(dev->features & NETIF_F_NTUPLE))
912 return -EINVAL; 917 return -EINVAL;
913 918
@@ -1618,14 +1623,63 @@ out:
1618static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) 1623static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
1619{ 1624{
1620 struct ethtool_value id; 1625 struct ethtool_value id;
1626 static bool busy;
1627 int rc;
1621 1628
1622 if (!dev->ethtool_ops->phys_id) 1629 if (!dev->ethtool_ops->set_phys_id && !dev->ethtool_ops->phys_id)
1623 return -EOPNOTSUPP; 1630 return -EOPNOTSUPP;
1624 1631
1632 if (busy)
1633 return -EBUSY;
1634
1625 if (copy_from_user(&id, useraddr, sizeof(id))) 1635 if (copy_from_user(&id, useraddr, sizeof(id)))
1626 return -EFAULT; 1636 return -EFAULT;
1627 1637
1628 return dev->ethtool_ops->phys_id(dev, id.data); 1638 if (!dev->ethtool_ops->set_phys_id)
1639 /* Do it the old way */
1640 return dev->ethtool_ops->phys_id(dev, id.data);
1641
1642 rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
1643 if (rc && rc != -EINVAL)
1644 return rc;
1645
1646 /* Drop the RTNL lock while waiting, but prevent reentry or
1647 * removal of the device.
1648 */
1649 busy = true;
1650 dev_hold(dev);
1651 rtnl_unlock();
1652
1653 if (rc == 0) {
1654 /* Driver will handle this itself */
1655 schedule_timeout_interruptible(
1656 id.data ? id.data : MAX_SCHEDULE_TIMEOUT);
1657 } else {
1658 /* Driver expects to be called periodically */
1659 do {
1660 rtnl_lock();
1661 rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ON);
1662 rtnl_unlock();
1663 if (rc)
1664 break;
1665 schedule_timeout_interruptible(HZ / 2);
1666
1667 rtnl_lock();
1668 rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_OFF);
1669 rtnl_unlock();
1670 if (rc)
1671 break;
1672 schedule_timeout_interruptible(HZ / 2);
1673 } while (!signal_pending(current) &&
1674 (id.data == 0 || --id.data != 0));
1675 }
1676
1677 rtnl_lock();
1678 dev_put(dev);
1679 busy = false;
1680
1681 (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
1682 return rc;
1629} 1683}
1630 1684
1631static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) 1685static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 451088330bbb..22524716fe70 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -44,6 +44,7 @@
44#include <net/arp.h> 44#include <net/arp.h>
45#include <net/ip_fib.h> 45#include <net/ip_fib.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/xfrm.h>
47 48
48#ifndef CONFIG_IP_MULTIPLE_TABLES 49#ifndef CONFIG_IP_MULTIPLE_TABLES
49 50
@@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type);
188 * - check, that packet arrived from expected physical interface. 189 * - check, that packet arrived from expected physical interface.
189 * called with rcu_read_lock() 190 * called with rcu_read_lock()
190 */ 191 */
191int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 192int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
192 struct net_device *dev, __be32 *spec_dst, 193 int oif, struct net_device *dev, __be32 *spec_dst,
193 u32 *itag, u32 mark) 194 u32 *itag)
194{ 195{
195 struct in_device *in_dev; 196 struct in_device *in_dev;
196 struct flowi4 fl4; 197 struct flowi4 fl4;
@@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
202 203
203 fl4.flowi4_oif = 0; 204 fl4.flowi4_oif = 0;
204 fl4.flowi4_iif = oif; 205 fl4.flowi4_iif = oif;
205 fl4.flowi4_mark = mark;
206 fl4.daddr = src; 206 fl4.daddr = src;
207 fl4.saddr = dst; 207 fl4.saddr = dst;
208 fl4.flowi4_tos = tos; 208 fl4.flowi4_tos = tos;
@@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
212 in_dev = __in_dev_get_rcu(dev); 212 in_dev = __in_dev_get_rcu(dev);
213 if (in_dev) { 213 if (in_dev) {
214 no_addr = in_dev->ifa_list == NULL; 214 no_addr = in_dev->ifa_list == NULL;
215 rpf = IN_DEV_RPFILTER(in_dev); 215
216 /* Ignore rp_filter for packets protected by IPsec. */
217 rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
218
216 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 219 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
217 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 220 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
218 fl4.flowi4_mark = 0;
219 } 221 }
220 222
221 if (in_dev == NULL) 223 if (in_dev == NULL)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e9013d6c1f51..bde80c450b52 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -126,7 +126,7 @@ struct tnode {
126 struct work_struct work; 126 struct work_struct work;
127 struct tnode *tnode_free; 127 struct tnode *tnode_free;
128 }; 128 };
129 struct rt_trie_node *child[0]; 129 struct rt_trie_node __rcu *child[0];
130}; 130};
131 131
132#ifdef CONFIG_IP_FIB_TRIE_STATS 132#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,7 +151,7 @@ struct trie_stat {
151}; 151};
152 152
153struct trie { 153struct trie {
154 struct rt_trie_node *trie; 154 struct rt_trie_node __rcu *trie;
155#ifdef CONFIG_IP_FIB_TRIE_STATS 155#ifdef CONFIG_IP_FIB_TRIE_STATS
156 struct trie_use_stats stats; 156 struct trie_use_stats stats;
157#endif 157#endif
@@ -177,16 +177,29 @@ static const int sync_pages = 128;
177static struct kmem_cache *fn_alias_kmem __read_mostly; 177static struct kmem_cache *fn_alias_kmem __read_mostly;
178static struct kmem_cache *trie_leaf_kmem __read_mostly; 178static struct kmem_cache *trie_leaf_kmem __read_mostly;
179 179
180static inline struct tnode *node_parent(struct rt_trie_node *node) 180/*
181 * caller must hold RTNL
182 */
183static inline struct tnode *node_parent(const struct rt_trie_node *node)
181{ 184{
182 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 185 unsigned long parent;
186
187 parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
188
189 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
183} 190}
184 191
185static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) 192/*
193 * caller must hold RCU read lock or RTNL
194 */
195static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
186{ 196{
187 struct tnode *ret = node_parent(node); 197 unsigned long parent;
198
199 parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
200 lockdep_rtnl_is_held());
188 201
189 return rcu_dereference_rtnl(ret); 202 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
190} 203}
191 204
192/* Same as rcu_assign_pointer 205/* Same as rcu_assign_pointer
@@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
198 node->parent = (unsigned long)ptr | NODE_TYPE(node); 211 node->parent = (unsigned long)ptr | NODE_TYPE(node);
199} 212}
200 213
201static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) 214/*
215 * caller must hold RTNL
216 */
217static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
202{ 218{
203 BUG_ON(i >= 1U << tn->bits); 219 BUG_ON(i >= 1U << tn->bits);
204 220
205 return tn->child[i]; 221 return rtnl_dereference(tn->child[i]);
206} 222}
207 223
208static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 224/*
225 * caller must hold RCU read lock or RTNL
226 */
227static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
209{ 228{
210 struct rt_trie_node *ret = tnode_get_child(tn, i); 229 BUG_ON(i >= 1U << tn->bits);
211 230
212 return rcu_dereference_rtnl(ret); 231 return rcu_dereference_rtnl(tn->child[i]);
213} 232}
214 233
215static inline int tnode_child_length(const struct tnode *tn) 234static inline int tnode_child_length(const struct tnode *tn)
@@ -487,7 +506,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
487static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, 506static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
488 int wasfull) 507 int wasfull)
489{ 508{
490 struct rt_trie_node *chi = tn->child[i]; 509 struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
491 int isfull; 510 int isfull;
492 511
493 BUG_ON(i >= 1<<tn->bits); 512 BUG_ON(i >= 1<<tn->bits);
@@ -665,7 +684,7 @@ one_child:
665 for (i = 0; i < tnode_child_length(tn); i++) { 684 for (i = 0; i < tnode_child_length(tn); i++) {
666 struct rt_trie_node *n; 685 struct rt_trie_node *n;
667 686
668 n = tn->child[i]; 687 n = rtnl_dereference(tn->child[i]);
669 if (!n) 688 if (!n)
670 continue; 689 continue;
671 690
@@ -679,6 +698,20 @@ one_child:
679 return (struct rt_trie_node *) tn; 698 return (struct rt_trie_node *) tn;
680} 699}
681 700
701
702static void tnode_clean_free(struct tnode *tn)
703{
704 int i;
705 struct tnode *tofree;
706
707 for (i = 0; i < tnode_child_length(tn); i++) {
708 tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
709 if (tofree)
710 tnode_free(tofree);
711 }
712 tnode_free(tn);
713}
714
682static struct tnode *inflate(struct trie *t, struct tnode *tn) 715static struct tnode *inflate(struct trie *t, struct tnode *tn)
683{ 716{
684 struct tnode *oldtnode = tn; 717 struct tnode *oldtnode = tn;
@@ -755,8 +788,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
755 inode = (struct tnode *) node; 788 inode = (struct tnode *) node;
756 789
757 if (inode->bits == 1) { 790 if (inode->bits == 1) {
758 put_child(t, tn, 2*i, inode->child[0]); 791 put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
759 put_child(t, tn, 2*i+1, inode->child[1]); 792 put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
760 793
761 tnode_free_safe(inode); 794 tnode_free_safe(inode);
762 continue; 795 continue;
@@ -797,8 +830,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
797 830
798 size = tnode_child_length(left); 831 size = tnode_child_length(left);
799 for (j = 0; j < size; j++) { 832 for (j = 0; j < size; j++) {
800 put_child(t, left, j, inode->child[j]); 833 put_child(t, left, j, rtnl_dereference(inode->child[j]));
801 put_child(t, right, j, inode->child[j + size]); 834 put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
802 } 835 }
803 put_child(t, tn, 2*i, resize(t, left)); 836 put_child(t, tn, 2*i, resize(t, left));
804 put_child(t, tn, 2*i+1, resize(t, right)); 837 put_child(t, tn, 2*i+1, resize(t, right));
@@ -808,18 +841,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
808 tnode_free_safe(oldtnode); 841 tnode_free_safe(oldtnode);
809 return tn; 842 return tn;
810nomem: 843nomem:
811 { 844 tnode_clean_free(tn);
812 int size = tnode_child_length(tn); 845 return ERR_PTR(-ENOMEM);
813 int j;
814
815 for (j = 0; j < size; j++)
816 if (tn->child[j])
817 tnode_free((struct tnode *)tn->child[j]);
818
819 tnode_free(tn);
820
821 return ERR_PTR(-ENOMEM);
822 }
823} 846}
824 847
825static struct tnode *halve(struct trie *t, struct tnode *tn) 848static struct tnode *halve(struct trie *t, struct tnode *tn)
@@ -890,18 +913,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
890 tnode_free_safe(oldtnode); 913 tnode_free_safe(oldtnode);
891 return tn; 914 return tn;
892nomem: 915nomem:
893 { 916 tnode_clean_free(tn);
894 int size = tnode_child_length(tn); 917 return ERR_PTR(-ENOMEM);
895 int j;
896
897 for (j = 0; j < size; j++)
898 if (tn->child[j])
899 tnode_free((struct tnode *)tn->child[j]);
900
901 tnode_free(tn);
902
903 return ERR_PTR(-ENOMEM);
904 }
905} 918}
906 919
907/* readside must use rcu_read_lock currently dump routines 920/* readside must use rcu_read_lock currently dump routines
@@ -1033,7 +1046,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1033 t_key cindex; 1046 t_key cindex;
1034 1047
1035 pos = 0; 1048 pos = 0;
1036 n = t->trie; 1049 n = rtnl_dereference(t->trie);
1037 1050
1038 /* If we point to NULL, stop. Either the tree is empty and we should 1051 /* If we point to NULL, stop. Either the tree is empty and we should
1039 * just put a new leaf in if, or we have reached an empty child slot, 1052 * just put a new leaf in if, or we have reached an empty child slot,
@@ -1756,7 +1769,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
1756 continue; 1769 continue;
1757 1770
1758 if (IS_LEAF(c)) { 1771 if (IS_LEAF(c)) {
1759 prefetch(p->child[idx]); 1772 prefetch(rcu_dereference_rtnl(p->child[idx]));
1760 return (struct leaf *) c; 1773 return (struct leaf *) c;
1761 } 1774 }
1762 1775
@@ -2272,7 +2285,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2272 2285
2273 /* walk rest of this hash chain */ 2286 /* walk rest of this hash chain */
2274 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 2287 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2275 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { 2288 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2276 tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 2289 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2277 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 2290 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2278 if (n) 2291 if (n)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6c0b7f4a3d7d..f784608a4c45 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -356,20 +356,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
356 struct rtable *rt; 356 struct rtable *rt;
357 const struct inet_request_sock *ireq = inet_rsk(req); 357 const struct inet_request_sock *ireq = inet_rsk(req);
358 struct ip_options *opt = inet_rsk(req)->opt; 358 struct ip_options *opt = inet_rsk(req)->opt;
359 struct flowi4 fl4 = {
360 .flowi4_oif = sk->sk_bound_dev_if,
361 .flowi4_mark = sk->sk_mark,
362 .daddr = ((opt && opt->srr) ?
363 opt->faddr : ireq->rmt_addr),
364 .saddr = ireq->loc_addr,
365 .flowi4_tos = RT_CONN_FLAGS(sk),
366 .flowi4_proto = sk->sk_protocol,
367 .flowi4_flags = inet_sk_flowi_flags(sk),
368 .fl4_sport = inet_sk(sk)->inet_sport,
369 .fl4_dport = ireq->rmt_port,
370 };
371 struct net *net = sock_net(sk); 359 struct net *net = sock_net(sk);
360 struct flowi4 fl4;
372 361
362 flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
363 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
364 sk->sk_protocol, inet_sk_flowi_flags(sk),
365 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
366 ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
373 security_req_classify_flow(req, flowi4_to_flowi(&fl4)); 367 security_req_classify_flow(req, flowi4_to_flowi(&fl4));
374 rt = ip_route_output_flow(net, &fl4, sk); 368 rt = ip_route_output_flow(net, &fl4, sk);
375 if (IS_ERR(rt)) 369 if (IS_ERR(rt))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 459c011b1d4a..bdad3d60aa82 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1474,16 +1474,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1474 } 1474 }
1475 1475
1476 { 1476 {
1477 struct flowi4 fl4 = { 1477 struct flowi4 fl4;
1478 .flowi4_oif = arg->bound_dev_if, 1478
1479 .daddr = daddr, 1479 flowi4_init_output(&fl4, arg->bound_dev_if, 0,
1480 .saddr = rt->rt_spec_dst, 1480 RT_TOS(ip_hdr(skb)->tos),
1481 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), 1481 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1482 .fl4_sport = tcp_hdr(skb)->dest, 1482 ip_reply_arg_flowi_flags(arg),
1483 .fl4_dport = tcp_hdr(skb)->source, 1483 daddr, rt->rt_spec_dst,
1484 .flowi4_proto = sk->sk_protocol, 1484 tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
1485 .flowi4_flags = ip_reply_arg_flowi_flags(arg),
1486 };
1487 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 1485 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1488 rt = ip_route_output_key(sock_net(sk), &fl4); 1486 rt = ip_route_output_key(sock_net(sk), &fl4);
1489 if (IS_ERR(rt)) 1487 if (IS_ERR(rt))
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bceaec42c37d..2b50cc2da90a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -548,17 +548,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
548 } 548 }
549 549
550 { 550 {
551 struct flowi4 fl4 = { 551 struct flowi4 fl4;
552 .flowi4_oif = ipc.oif, 552
553 .flowi4_mark = sk->sk_mark, 553 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
554 .daddr = daddr, 554 RT_SCOPE_UNIVERSE,
555 .saddr = saddr, 555 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
556 .flowi4_tos = tos, 556 FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
557 .flowi4_proto = (inet->hdrincl ? 557
558 IPPROTO_RAW :
559 sk->sk_protocol),
560 .flowi4_flags = FLOWI_FLAG_CAN_SLEEP,
561 };
562 if (!inet->hdrincl) { 558 if (!inet->hdrincl) {
563 err = raw_probe_proto_opt(&fl4, msg); 559 err = raw_probe_proto_opt(&fl4, msg);
564 if (err) 560 if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c1acf69858fd..0e7430c327a7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1871,8 +1871,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1871 goto e_inval; 1871 goto e_inval;
1872 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1872 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1873 } else { 1873 } else {
1874 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 1874 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
1875 &itag, 0); 1875 &itag);
1876 if (err < 0) 1876 if (err < 0)
1877 goto e_err; 1877 goto e_err;
1878 } 1878 }
@@ -1981,8 +1981,8 @@ static int __mkroute_input(struct sk_buff *skb,
1981 } 1981 }
1982 1982
1983 1983
1984 err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), 1984 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
1985 in_dev->dev, &spec_dst, &itag, skb->mark); 1985 in_dev->dev, &spec_dst, &itag);
1986 if (err < 0) { 1986 if (err < 0) {
1987 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1987 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1988 saddr); 1988 saddr);
@@ -2150,9 +2150,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2150 goto brd_input; 2150 goto brd_input;
2151 2151
2152 if (res.type == RTN_LOCAL) { 2152 if (res.type == RTN_LOCAL) {
2153 err = fib_validate_source(saddr, daddr, tos, 2153 err = fib_validate_source(skb, saddr, daddr, tos,
2154 net->loopback_dev->ifindex, 2154 net->loopback_dev->ifindex,
2155 dev, &spec_dst, &itag, skb->mark); 2155 dev, &spec_dst, &itag);
2156 if (err < 0) 2156 if (err < 0)
2157 goto martian_source_keep_err; 2157 goto martian_source_keep_err;
2158 if (err) 2158 if (err)
@@ -2176,8 +2176,8 @@ brd_input:
2176 if (ipv4_is_zeronet(saddr)) 2176 if (ipv4_is_zeronet(saddr))
2177 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 2177 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
2178 else { 2178 else {
2179 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 2179 err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
2180 &itag, skb->mark); 2180 &itag);
2181 if (err < 0) 2181 if (err < 0)
2182 goto martian_source_keep_err; 2182 goto martian_source_keep_err;
2183 if (err) 2183 if (err)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8b44c6d2a79b..71e029691908 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
345 * no easy way to do this. 345 * no easy way to do this.
346 */ 346 */
347 { 347 {
348 struct flowi4 fl4 = { 348 struct flowi4 fl4;
349 .flowi4_mark = sk->sk_mark, 349
350 .daddr = ((opt && opt->srr) ? 350 flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk),
351 opt->faddr : ireq->rmt_addr), 351 RT_SCOPE_UNIVERSE, IPPROTO_TCP,
352 .saddr = ireq->loc_addr, 352 inet_sk_flowi_flags(sk),
353 .flowi4_tos = RT_CONN_FLAGS(sk), 353 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
354 .flowi4_proto = IPPROTO_TCP, 354 ireq->loc_addr, th->source, th->dest);
355 .flowi4_flags = inet_sk_flowi_flags(sk),
356 .fl4_sport = th->dest,
357 .fl4_dport = th->source,
358 };
359 security_req_classify_flow(req, flowi4_to_flowi(&fl4)); 355 security_req_classify_flow(req, flowi4_to_flowi(&fl4));
360 rt = ip_route_output_key(sock_net(sk), &fl4); 356 rt = ip_route_output_key(sock_net(sk), &fl4);
361 if (IS_ERR(rt)) { 357 if (IS_ERR(rt)) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b22d45010545..054a59d21eb0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -999,7 +999,8 @@ new_segment:
999 /* We have some space in skb head. Superb! */ 999 /* We have some space in skb head. Superb! */
1000 if (copy > skb_tailroom(skb)) 1000 if (copy > skb_tailroom(skb))
1001 copy = skb_tailroom(skb); 1001 copy = skb_tailroom(skb);
1002 if ((err = skb_add_data(skb, from, copy)) != 0) 1002 err = skb_add_data_nocache(sk, skb, from, copy);
1003 if (err)
1003 goto do_fault; 1004 goto do_fault;
1004 } else { 1005 } else {
1005 int merge = 0; 1006 int merge = 0;
@@ -1042,8 +1043,8 @@ new_segment:
1042 1043
1043 /* Time to copy data. We are close to 1044 /* Time to copy data. We are close to
1044 * the end! */ 1045 * the end! */
1045 err = skb_copy_to_page(sk, from, skb, page, 1046 err = skb_copy_to_page_nocache(sk, from, skb,
1046 off, copy); 1047 page, off, copy);
1047 if (err) { 1048 if (err) {
1048 /* If this page was new, give it to the 1049 /* If this page was new, give it to the
1049 * socket so it does not get leaked. 1050 * socket so it does not get leaked.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f87a8eb76f3b..a15c8fb653af 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -909,20 +909,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
909 rt = (struct rtable *)sk_dst_check(sk, 0); 909 rt = (struct rtable *)sk_dst_check(sk, 0);
910 910
911 if (rt == NULL) { 911 if (rt == NULL) {
912 struct flowi4 fl4 = { 912 struct flowi4 fl4;
913 .flowi4_oif = ipc.oif,
914 .flowi4_mark = sk->sk_mark,
915 .daddr = faddr,
916 .saddr = saddr,
917 .flowi4_tos = tos,
918 .flowi4_proto = sk->sk_protocol,
919 .flowi4_flags = (inet_sk_flowi_flags(sk) |
920 FLOWI_FLAG_CAN_SLEEP),
921 .fl4_sport = inet->inet_sport,
922 .fl4_dport = dport,
923 };
924 struct net *net = sock_net(sk); 913 struct net *net = sock_net(sk);
925 914
915 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
916 RT_SCOPE_UNIVERSE, sk->sk_protocol,
917 inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
918 faddr, saddr, dport, inet->inet_sport);
919
926 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); 920 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
927 rt = ip_route_output_flow(net, &fl4, sk); 921 rt = ip_route_output_flow(net, &fl4, sk);
928 if (IS_ERR(rt)) { 922 if (IS_ERR(rt)) {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a7a5583d4f68..aeaa2110b699 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -239,6 +239,17 @@ config NET_SCH_CHOKE
239 To compile this code as a module, choose M here: the 239 To compile this code as a module, choose M here: the
240 module will be called sch_choke. 240 module will be called sch_choke.
241 241
242config NET_SCH_QFQ
243 tristate "Quick Fair Queueing scheduler (QFQ)"
244 help
245 Say Y here if you want to use the Quick Fair Queueing Scheduler (QFQ)
246 packet scheduling algorithm.
247
248 To compile this driver as a module, choose M here: the module
249 will be called sch_qfq.
250
251 If unsure, say N.
252
242config NET_SCH_INGRESS 253config NET_SCH_INGRESS
243 tristate "Ingress Qdisc" 254 tristate "Ingress Qdisc"
244 depends on NET_CLS_ACT 255 depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 2e77b8dba22e..dc5889c0a15a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
35obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o 35obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
36obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o 36obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
37obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o 37obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
38obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
38 39
39obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 40obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
40obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 41obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
new file mode 100644
index 000000000000..103343408593
--- /dev/null
+++ b/net/sched/sch_qfq.c
@@ -0,0 +1,1137 @@
1/*
2 * net/sched/sch_qfq.c Quick Fair Queueing Scheduler.
3 *
4 * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/bitops.h>
14#include <linux/errno.h>
15#include <linux/netdevice.h>
16#include <linux/pkt_sched.h>
17#include <net/sch_generic.h>
18#include <net/pkt_sched.h>
19#include <net/pkt_cls.h>
20
21
22/* Quick Fair Queueing
23 ===================
24
25 Sources:
26
27 Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient
28 Packet Scheduling with Tight Bandwidth Distribution Guarantees."
29
30 See also:
31 http://retis.sssup.it/~fabio/linux/qfq/
32 */
33
34/*
35
36 Virtual time computations.
37
38 S, F and V are all computed in fixed point arithmetic with
39 FRAC_BITS decimal bits.
40
41 QFQ_MAX_INDEX is the maximum index allowed for a group. We need
42 one bit per index.
43 QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
44
45 The layout of the bits is as below:
46
47 [ MTU_SHIFT ][ FRAC_BITS ]
48 [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
49 ^.__grp->index = 0
50 *.__grp->slot_shift
51
52 where MIN_SLOT_SHIFT is derived by difference from the others.
53
54 The max group index corresponds to Lmax/w_min, where
55 Lmax=1<<MTU_SHIFT, w_min = 1 .
56 From this, and knowing how many groups (MAX_INDEX) we want,
57 we can derive the shift corresponding to each group.
58
59 Because we often need to compute
60 F = S + len/w_i and V = V + len/wsum
61 instead of storing w_i store the value
62 inv_w = (1<<FRAC_BITS)/w_i
63 so we can do F = S + len * inv_w * wsum.
64 We use W_TOT in the formulas so we can easily move between
65 static and adaptive weight sum.
66
67 The per-scheduler-instance data contain all the data structures
68 for the scheduler: bitmaps and bucket lists.
69
70 */
71
72/*
73 * Maximum number of consecutive slots occupied by backlogged classes
74 * inside a group.
75 */
76#define QFQ_MAX_SLOTS 32
77
78/*
79 * Shifts used for class<->group mapping. We allow class weights that are
80 * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the
81 * group with the smallest index that can support the L_i / r_i configured
82 * for the class.
83 *
84 * grp->index is the index of the group; and grp->slot_shift
85 * is the shift for the corresponding (scaled) sigma_i.
86 */
87#define QFQ_MAX_INDEX 19
88#define QFQ_MAX_WSHIFT 16
89
90#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
91#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
92
93#define FRAC_BITS 30 /* fixed point arithmetic */
94#define ONE_FP (1UL << FRAC_BITS)
95#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
96
97#define QFQ_MTU_SHIFT 11
98#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
99
100/*
101 * Possible group states. These values are used as indexes for the bitmaps
102 * array of struct qfq_queue.
103 */
104enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
105
106struct qfq_group;
107
108struct qfq_class {
109 struct Qdisc_class_common common;
110
111 unsigned int refcnt;
112 unsigned int filter_cnt;
113
114 struct gnet_stats_basic_packed bstats;
115 struct gnet_stats_queue qstats;
116 struct gnet_stats_rate_est rate_est;
117 struct Qdisc *qdisc;
118
119 struct hlist_node next; /* Link for the slot list. */
120 u64 S, F; /* flow timestamps (exact) */
121
122 /* group we belong to. In principle we would need the index,
123 * which is log_2(lmax/weight), but we never reference it
124 * directly, only the group.
125 */
126 struct qfq_group *grp;
127
128 /* these are copied from the flowset. */
129 u32 inv_w; /* ONE_FP/weight */
130 u32 lmax; /* Max packet size for this flow. */
131};
132
133struct qfq_group {
134 u64 S, F; /* group timestamps (approx). */
135 unsigned int slot_shift; /* Slot shift. */
136 unsigned int index; /* Group index. */
137 unsigned int front; /* Index of the front slot. */
138 unsigned long full_slots; /* non-empty slots */
139
140 /* Array of RR lists of active classes. */
141 struct hlist_head slots[QFQ_MAX_SLOTS];
142};
143
144struct qfq_sched {
145 struct tcf_proto *filter_list;
146 struct Qdisc_class_hash clhash;
147
148 u64 V; /* Precise virtual time. */
149 u32 wsum; /* weight sum */
150
151 unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
152 struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
153};
154
155static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
156{
157 struct qfq_sched *q = qdisc_priv(sch);
158 struct Qdisc_class_common *clc;
159
160 clc = qdisc_class_find(&q->clhash, classid);
161 if (clc == NULL)
162 return NULL;
163 return container_of(clc, struct qfq_class, common);
164}
165
166static void qfq_purge_queue(struct qfq_class *cl)
167{
168 unsigned int len = cl->qdisc->q.qlen;
169
170 qdisc_reset(cl->qdisc);
171 qdisc_tree_decrease_qlen(cl->qdisc, len);
172}
173
174static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
175 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
176 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
177};
178
179/*
180 * Calculate a flow index, given its weight and maximum packet length.
181 * index = log_2(maxlen/weight) but we need to apply the scaling.
182 * This is used only once at flow creation.
183 */
184static int qfq_calc_index(u32 inv_w, unsigned int maxlen)
185{
186 u64 slot_size = (u64)maxlen * inv_w;
187 unsigned long size_map;
188 int index = 0;
189
190 size_map = slot_size >> QFQ_MIN_SLOT_SHIFT;
191 if (!size_map)
192 goto out;
193
194 index = __fls(size_map) + 1; /* basically a log_2 */
195 index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
196
197 if (index < 0)
198 index = 0;
199out:
200 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
201 (unsigned long) ONE_FP/inv_w, maxlen, index);
202
203 return index;
204}
205
206static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
207 struct nlattr **tca, unsigned long *arg)
208{
209 struct qfq_sched *q = qdisc_priv(sch);
210 struct qfq_class *cl = (struct qfq_class *)*arg;
211 struct nlattr *tb[TCA_QFQ_MAX + 1];
212 u32 weight, lmax, inv_w;
213 int i, err;
214
215 if (tca[TCA_OPTIONS] == NULL) {
216 pr_notice("qfq: no options\n");
217 return -EINVAL;
218 }
219
220 err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
221 if (err < 0)
222 return err;
223
224 if (tb[TCA_QFQ_WEIGHT]) {
225 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
226 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
227 pr_notice("qfq: invalid weight %u\n", weight);
228 return -EINVAL;
229 }
230 } else
231 weight = 1;
232
233 inv_w = ONE_FP / weight;
234 weight = ONE_FP / inv_w;
235 if (q->wsum + weight > QFQ_MAX_WSUM) {
236 pr_notice("qfq: total weight out of range (%u + %u)\n",
237 weight, q->wsum);
238 return -EINVAL;
239 }
240
241 if (tb[TCA_QFQ_LMAX]) {
242 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
243 if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
244 pr_notice("qfq: invalid max length %u\n", lmax);
245 return -EINVAL;
246 }
247 } else
248 lmax = 1UL << QFQ_MTU_SHIFT;
249
250 if (cl != NULL) {
251 if (tca[TCA_RATE]) {
252 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
253 qdisc_root_sleeping_lock(sch),
254 tca[TCA_RATE]);
255 if (err)
256 return err;
257 }
258
259 sch_tree_lock(sch);
260 if (tb[TCA_QFQ_WEIGHT]) {
261 q->wsum = weight - ONE_FP / cl->inv_w;
262 cl->inv_w = inv_w;
263 }
264 sch_tree_unlock(sch);
265
266 return 0;
267 }
268
269 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
270 if (cl == NULL)
271 return -ENOBUFS;
272
273 cl->refcnt = 1;
274 cl->common.classid = classid;
275 cl->lmax = lmax;
276 cl->inv_w = inv_w;
277 i = qfq_calc_index(cl->inv_w, cl->lmax);
278
279 cl->grp = &q->groups[i];
280 q->wsum += weight;
281
282 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
283 &pfifo_qdisc_ops, classid);
284 if (cl->qdisc == NULL)
285 cl->qdisc = &noop_qdisc;
286
287 if (tca[TCA_RATE]) {
288 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
289 qdisc_root_sleeping_lock(sch),
290 tca[TCA_RATE]);
291 if (err) {
292 qdisc_destroy(cl->qdisc);
293 kfree(cl);
294 return err;
295 }
296 }
297
298 sch_tree_lock(sch);
299 qdisc_class_hash_insert(&q->clhash, &cl->common);
300 sch_tree_unlock(sch);
301
302 qdisc_class_hash_grow(sch, &q->clhash);
303
304 *arg = (unsigned long)cl;
305 return 0;
306}
307
308static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
309{
310 struct qfq_sched *q = qdisc_priv(sch);
311
312 if (cl->inv_w) {
313 q->wsum -= ONE_FP / cl->inv_w;
314 cl->inv_w = 0;
315 }
316
317 gen_kill_estimator(&cl->bstats, &cl->rate_est);
318 qdisc_destroy(cl->qdisc);
319 kfree(cl);
320}
321
322static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
323{
324 struct qfq_sched *q = qdisc_priv(sch);
325 struct qfq_class *cl = (struct qfq_class *)arg;
326
327 if (cl->filter_cnt > 0)
328 return -EBUSY;
329
330 sch_tree_lock(sch);
331
332 qfq_purge_queue(cl);
333 qdisc_class_hash_remove(&q->clhash, &cl->common);
334
335 BUG_ON(--cl->refcnt == 0);
336 /*
337 * This shouldn't happen: we "hold" one cops->get() when called
338 * from tc_ctl_tclass; the destroy method is done from cops->put().
339 */
340
341 sch_tree_unlock(sch);
342 return 0;
343}
344
345static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
346{
347 struct qfq_class *cl = qfq_find_class(sch, classid);
348
349 if (cl != NULL)
350 cl->refcnt++;
351
352 return (unsigned long)cl;
353}
354
355static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
356{
357 struct qfq_class *cl = (struct qfq_class *)arg;
358
359 if (--cl->refcnt == 0)
360 qfq_destroy_class(sch, cl);
361}
362
363static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
364{
365 struct qfq_sched *q = qdisc_priv(sch);
366
367 if (cl)
368 return NULL;
369
370 return &q->filter_list;
371}
372
373static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
374 u32 classid)
375{
376 struct qfq_class *cl = qfq_find_class(sch, classid);
377
378 if (cl != NULL)
379 cl->filter_cnt++;
380
381 return (unsigned long)cl;
382}
383
384static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
385{
386 struct qfq_class *cl = (struct qfq_class *)arg;
387
388 cl->filter_cnt--;
389}
390
391static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
392 struct Qdisc *new, struct Qdisc **old)
393{
394 struct qfq_class *cl = (struct qfq_class *)arg;
395
396 if (new == NULL) {
397 new = qdisc_create_dflt(sch->dev_queue,
398 &pfifo_qdisc_ops, cl->common.classid);
399 if (new == NULL)
400 new = &noop_qdisc;
401 }
402
403 sch_tree_lock(sch);
404 qfq_purge_queue(cl);
405 *old = cl->qdisc;
406 cl->qdisc = new;
407 sch_tree_unlock(sch);
408 return 0;
409}
410
411static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
412{
413 struct qfq_class *cl = (struct qfq_class *)arg;
414
415 return cl->qdisc;
416}
417
418static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
419 struct sk_buff *skb, struct tcmsg *tcm)
420{
421 struct qfq_class *cl = (struct qfq_class *)arg;
422 struct nlattr *nest;
423
424 tcm->tcm_parent = TC_H_ROOT;
425 tcm->tcm_handle = cl->common.classid;
426 tcm->tcm_info = cl->qdisc->handle;
427
428 nest = nla_nest_start(skb, TCA_OPTIONS);
429 if (nest == NULL)
430 goto nla_put_failure;
431 NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w);
432 NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax);
433 return nla_nest_end(skb, nest);
434
435nla_put_failure:
436 nla_nest_cancel(skb, nest);
437 return -EMSGSIZE;
438}
439
440static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
441 struct gnet_dump *d)
442{
443 struct qfq_class *cl = (struct qfq_class *)arg;
444 struct tc_qfq_stats xstats;
445
446 memset(&xstats, 0, sizeof(xstats));
447 cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
448
449 xstats.weight = ONE_FP/cl->inv_w;
450 xstats.lmax = cl->lmax;
451
452 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
453 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
454 gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
455 return -1;
456
457 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
458}
459
460static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
461{
462 struct qfq_sched *q = qdisc_priv(sch);
463 struct qfq_class *cl;
464 struct hlist_node *n;
465 unsigned int i;
466
467 if (arg->stop)
468 return;
469
470 for (i = 0; i < q->clhash.hashsize; i++) {
471 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
472 if (arg->count < arg->skip) {
473 arg->count++;
474 continue;
475 }
476 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
477 arg->stop = 1;
478 return;
479 }
480 arg->count++;
481 }
482 }
483}
484
485static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
486 int *qerr)
487{
488 struct qfq_sched *q = qdisc_priv(sch);
489 struct qfq_class *cl;
490 struct tcf_result res;
491 int result;
492
493 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
494 pr_debug("qfq_classify: found %d\n", skb->priority);
495 cl = qfq_find_class(sch, skb->priority);
496 if (cl != NULL)
497 return cl;
498 }
499
500 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
501 result = tc_classify(skb, q->filter_list, &res);
502 if (result >= 0) {
503#ifdef CONFIG_NET_CLS_ACT
504 switch (result) {
505 case TC_ACT_QUEUED:
506 case TC_ACT_STOLEN:
507 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
508 case TC_ACT_SHOT:
509 return NULL;
510 }
511#endif
512 cl = (struct qfq_class *)res.class;
513 if (cl == NULL)
514 cl = qfq_find_class(sch, res.classid);
515 return cl;
516 }
517
518 return NULL;
519}
520
521/* Generic comparison function, handling wraparound. */
522static inline int qfq_gt(u64 a, u64 b)
523{
524 return (s64)(a - b) > 0;
525}
526
527/* Round a precise timestamp to its slotted value. */
528static inline u64 qfq_round_down(u64 ts, unsigned int shift)
529{
530 return ts & ~((1ULL << shift) - 1);
531}
532
533/* return the pointer to the group with lowest index in the bitmap */
534static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
535 unsigned long bitmap)
536{
537 int index = __ffs(bitmap);
538 return &q->groups[index];
539}
540/* Calculate a mask to mimic what would be ffs_from(). */
541static inline unsigned long mask_from(unsigned long bitmap, int from)
542{
543 return bitmap & ~((1UL << from) - 1);
544}
545
546/*
547 * The state computation relies on ER=0, IR=1, EB=2, IB=3
548 * First compute eligibility comparing grp->S, q->V,
549 * then check if someone is blocking us and possibly add EB
550 */
551static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
552{
553 /* if S > V we are not eligible */
554 unsigned int state = qfq_gt(grp->S, q->V);
555 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
556 struct qfq_group *next;
557
558 if (mask) {
559 next = qfq_ffs(q, mask);
560 if (qfq_gt(grp->F, next->F))
561 state |= EB;
562 }
563
564 return state;
565}
566
567
568/*
569 * In principle
570 * q->bitmaps[dst] |= q->bitmaps[src] & mask;
571 * q->bitmaps[src] &= ~mask;
572 * but we should make sure that src != dst
573 */
574static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
575 int src, int dst)
576{
577 q->bitmaps[dst] |= q->bitmaps[src] & mask;
578 q->bitmaps[src] &= ~mask;
579}
580
581static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
582{
583 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
584 struct qfq_group *next;
585
586 if (mask) {
587 next = qfq_ffs(q, mask);
588 if (!qfq_gt(next->F, old_F))
589 return;
590 }
591
592 mask = (1UL << index) - 1;
593 qfq_move_groups(q, mask, EB, ER);
594 qfq_move_groups(q, mask, IB, IR);
595}
596
597/*
598 * perhaps
599 *
600 old_V ^= q->V;
601 old_V >>= QFQ_MIN_SLOT_SHIFT;
602 if (old_V) {
603 ...
604 }
605 *
606 */
607static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
608{
609 unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
610 unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
611
612 if (vslot != old_vslot) {
613 unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1;
614 qfq_move_groups(q, mask, IR, ER);
615 qfq_move_groups(q, mask, IB, EB);
616 }
617}
618
619
620/*
621 * XXX we should make sure that slot becomes less than 32.
622 * This is guaranteed by the input values.
623 * roundedS is always cl->S rounded on grp->slot_shift bits.
624 */
625static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
626 u64 roundedS)
627{
628 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
629 unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
630
631 hlist_add_head(&cl->next, &grp->slots[i]);
632 __set_bit(slot, &grp->full_slots);
633}
634
635/* Maybe introduce hlist_first_entry?? */
636static struct qfq_class *qfq_slot_head(struct qfq_group *grp)
637{
638 return hlist_entry(grp->slots[grp->front].first,
639 struct qfq_class, next);
640}
641
642/*
643 * remove the entry from the slot
644 */
645static void qfq_front_slot_remove(struct qfq_group *grp)
646{
647 struct qfq_class *cl = qfq_slot_head(grp);
648
649 BUG_ON(!cl);
650 hlist_del(&cl->next);
651 if (hlist_empty(&grp->slots[grp->front]))
652 __clear_bit(0, &grp->full_slots);
653}
654
655/*
656 * Returns the first full queue in a group. As a side effect,
657 * adjust the bucket list so the first non-empty bucket is at
658 * position 0 in full_slots.
659 */
660static struct qfq_class *qfq_slot_scan(struct qfq_group *grp)
661{
662 unsigned int i;
663
664 pr_debug("qfq slot_scan: grp %u full %#lx\n",
665 grp->index, grp->full_slots);
666
667 if (grp->full_slots == 0)
668 return NULL;
669
670 i = __ffs(grp->full_slots); /* zero based */
671 if (i > 0) {
672 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
673 grp->full_slots >>= i;
674 }
675
676 return qfq_slot_head(grp);
677}
678
679/*
680 * adjust the bucket list. When the start time of a group decreases,
681 * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
682 * move the objects. The mask of occupied slots must be shifted
683 * because we use ffs() to find the first non-empty slot.
684 * This covers decreases in the group's start time, but what about
685 * increases of the start time ?
686 * Here too we should make sure that i is less than 32
687 */
688static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
689{
690 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
691
692 grp->full_slots <<= i;
693 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
694}
695
696static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
697{
698 struct qfq_group *grp;
699 unsigned long ineligible;
700
701 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
702 if (ineligible) {
703 if (!q->bitmaps[ER]) {
704 grp = qfq_ffs(q, ineligible);
705 if (qfq_gt(grp->S, q->V))
706 q->V = grp->S;
707 }
708 qfq_make_eligible(q, old_V);
709 }
710}
711
712/* What is length of next packet in queue (0 if queue is empty) */
713static unsigned int qdisc_peek_len(struct Qdisc *sch)
714{
715 struct sk_buff *skb;
716
717 skb = sch->ops->peek(sch);
718 return skb ? qdisc_pkt_len(skb) : 0;
719}
720
721/*
722 * Updates the class, returns true if also the group needs to be updated.
723 */
724static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl)
725{
726 unsigned int len = qdisc_peek_len(cl->qdisc);
727
728 cl->S = cl->F;
729 if (!len)
730 qfq_front_slot_remove(grp); /* queue is empty */
731 else {
732 u64 roundedS;
733
734 cl->F = cl->S + (u64)len * cl->inv_w;
735 roundedS = qfq_round_down(cl->S, grp->slot_shift);
736 if (roundedS == grp->S)
737 return false;
738
739 qfq_front_slot_remove(grp);
740 qfq_slot_insert(grp, cl, roundedS);
741 }
742
743 return true;
744}
745
746static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
747{
748 struct qfq_sched *q = qdisc_priv(sch);
749 struct qfq_group *grp;
750 struct qfq_class *cl;
751 struct sk_buff *skb;
752 unsigned int len;
753 u64 old_V;
754
755 if (!q->bitmaps[ER])
756 return NULL;
757
758 grp = qfq_ffs(q, q->bitmaps[ER]);
759
760 cl = qfq_slot_head(grp);
761 skb = qdisc_dequeue_peeked(cl->qdisc);
762 if (!skb) {
763 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
764 return NULL;
765 }
766
767 sch->q.qlen--;
768 qdisc_bstats_update(sch, skb);
769
770 old_V = q->V;
771 len = qdisc_pkt_len(skb);
772 q->V += (u64)len * IWSUM;
773 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
774 len, (unsigned long long) cl->F, (unsigned long long) q->V);
775
776 if (qfq_update_class(grp, cl)) {
777 u64 old_F = grp->F;
778
779 cl = qfq_slot_scan(grp);
780 if (!cl)
781 __clear_bit(grp->index, &q->bitmaps[ER]);
782 else {
783 u64 roundedS = qfq_round_down(cl->S, grp->slot_shift);
784 unsigned int s;
785
786 if (grp->S == roundedS)
787 goto skip_unblock;
788 grp->S = roundedS;
789 grp->F = roundedS + (2ULL << grp->slot_shift);
790 __clear_bit(grp->index, &q->bitmaps[ER]);
791 s = qfq_calc_state(q, grp);
792 __set_bit(grp->index, &q->bitmaps[s]);
793 }
794
795 qfq_unblock_groups(q, grp->index, old_F);
796 }
797
798skip_unblock:
799 qfq_update_eligible(q, old_V);
800
801 return skb;
802}
803
804/*
805 * Assign a reasonable start time for a new flow k in group i.
806 * Admissible values for \hat(F) are multiples of \sigma_i
807 * no greater than V+\sigma_i . Larger values mean that
808 * we had a wraparound so we consider the timestamp to be stale.
809 *
810 * If F is not stale and F >= V then we set S = F.
811 * Otherwise we should assign S = V, but this may violate
812 * the ordering in ER. So, if we have groups in ER, set S to
813 * the F_j of the first group j which would be blocking us.
814 * We are guaranteed not to move S backward because
815 * otherwise our group i would still be blocked.
816 */
817static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
818{
819 unsigned long mask;
820 uint32_t limit, roundedF;
821 int slot_shift = cl->grp->slot_shift;
822
823 roundedF = qfq_round_down(cl->F, slot_shift);
824 limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
825
826 if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
827 /* timestamp was stale */
828 mask = mask_from(q->bitmaps[ER], cl->grp->index);
829 if (mask) {
830 struct qfq_group *next = qfq_ffs(q, mask);
831 if (qfq_gt(roundedF, next->F)) {
832 cl->S = next->F;
833 return;
834 }
835 }
836 cl->S = q->V;
837 } else /* timestamp is not stale */
838 cl->S = cl->F;
839}
840
841static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
842{
843 struct qfq_sched *q = qdisc_priv(sch);
844 struct qfq_group *grp;
845 struct qfq_class *cl;
846 int err;
847 u64 roundedS;
848 int s;
849
850 cl = qfq_classify(skb, sch, &err);
851 if (cl == NULL) {
852 if (err & __NET_XMIT_BYPASS)
853 sch->qstats.drops++;
854 kfree_skb(skb);
855 return err;
856 }
857 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
858
859 err = qdisc_enqueue(skb, cl->qdisc);
860 if (unlikely(err != NET_XMIT_SUCCESS)) {
861 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
862 if (net_xmit_drop_count(err)) {
863 cl->qstats.drops++;
864 sch->qstats.drops++;
865 }
866 return err;
867 }
868
869 bstats_update(&cl->bstats, skb);
870 ++sch->q.qlen;
871
872 /* If the new skb is not the head of queue, then done here. */
873 if (cl->qdisc->q.qlen != 1)
874 return err;
875
876 /* If reach this point, queue q was idle */
877 grp = cl->grp;
878 qfq_update_start(q, cl);
879
880 /* compute new finish time and rounded start. */
881 cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
882 roundedS = qfq_round_down(cl->S, grp->slot_shift);
883
884 /*
885 * insert cl in the correct bucket.
886 * If cl->S >= grp->S we don't need to adjust the
887 * bucket list and simply go to the insertion phase.
888 * Otherwise grp->S is decreasing, we must make room
889 * in the bucket list, and also recompute the group state.
890 * Finally, if there were no flows in this group and nobody
891 * was in ER make sure to adjust V.
892 */
893 if (grp->full_slots) {
894 if (!qfq_gt(grp->S, cl->S))
895 goto skip_update;
896
897 /* create a slot for this cl->S */
898 qfq_slot_rotate(grp, roundedS);
899 /* group was surely ineligible, remove */
900 __clear_bit(grp->index, &q->bitmaps[IR]);
901 __clear_bit(grp->index, &q->bitmaps[IB]);
902 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
903 q->V = roundedS;
904
905 grp->S = roundedS;
906 grp->F = roundedS + (2ULL << grp->slot_shift);
907 s = qfq_calc_state(q, grp);
908 __set_bit(grp->index, &q->bitmaps[s]);
909
910 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
911 s, q->bitmaps[s],
912 (unsigned long long) cl->S,
913 (unsigned long long) cl->F,
914 (unsigned long long) q->V);
915
916skip_update:
917 qfq_slot_insert(grp, cl, roundedS);
918
919 return err;
920}
921
922
923static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
924 struct qfq_class *cl)
925{
926 unsigned int i, offset;
927 u64 roundedS;
928
929 roundedS = qfq_round_down(cl->S, grp->slot_shift);
930 offset = (roundedS - grp->S) >> grp->slot_shift;
931 i = (grp->front + offset) % QFQ_MAX_SLOTS;
932
933 hlist_del(&cl->next);
934 if (hlist_empty(&grp->slots[i]))
935 __clear_bit(offset, &grp->full_slots);
936}
937
938/*
939 * called to forcibly destroy a queue.
940 * If the queue is not in the front bucket, or if it has
941 * other queues in the front bucket, we can simply remove
942 * the queue with no other side effects.
943 * Otherwise we must propagate the event up.
944 */
945static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
946{
947 struct qfq_group *grp = cl->grp;
948 unsigned long mask;
949 u64 roundedS;
950 int s;
951
952 cl->F = cl->S;
953 qfq_slot_remove(q, grp, cl);
954
955 if (!grp->full_slots) {
956 __clear_bit(grp->index, &q->bitmaps[IR]);
957 __clear_bit(grp->index, &q->bitmaps[EB]);
958 __clear_bit(grp->index, &q->bitmaps[IB]);
959
960 if (test_bit(grp->index, &q->bitmaps[ER]) &&
961 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
962 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
963 if (mask)
964 mask = ~((1UL << __fls(mask)) - 1);
965 else
966 mask = ~0UL;
967 qfq_move_groups(q, mask, EB, ER);
968 qfq_move_groups(q, mask, IB, IR);
969 }
970 __clear_bit(grp->index, &q->bitmaps[ER]);
971 } else if (hlist_empty(&grp->slots[grp->front])) {
972 cl = qfq_slot_scan(grp);
973 roundedS = qfq_round_down(cl->S, grp->slot_shift);
974 if (grp->S != roundedS) {
975 __clear_bit(grp->index, &q->bitmaps[ER]);
976 __clear_bit(grp->index, &q->bitmaps[IR]);
977 __clear_bit(grp->index, &q->bitmaps[EB]);
978 __clear_bit(grp->index, &q->bitmaps[IB]);
979 grp->S = roundedS;
980 grp->F = roundedS + (2ULL << grp->slot_shift);
981 s = qfq_calc_state(q, grp);
982 __set_bit(grp->index, &q->bitmaps[s]);
983 }
984 }
985
986 qfq_update_eligible(q, q->V);
987}
988
989static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
990{
991 struct qfq_sched *q = qdisc_priv(sch);
992 struct qfq_class *cl = (struct qfq_class *)arg;
993
994 if (cl->qdisc->q.qlen == 0)
995 qfq_deactivate_class(q, cl);
996}
997
998static unsigned int qfq_drop(struct Qdisc *sch)
999{
1000 struct qfq_sched *q = qdisc_priv(sch);
1001 struct qfq_group *grp;
1002 unsigned int i, j, len;
1003
1004 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1005 grp = &q->groups[i];
1006 for (j = 0; j < QFQ_MAX_SLOTS; j++) {
1007 struct qfq_class *cl;
1008 struct hlist_node *n;
1009
1010 hlist_for_each_entry(cl, n, &grp->slots[j], next) {
1011
1012 if (!cl->qdisc->ops->drop)
1013 continue;
1014
1015 len = cl->qdisc->ops->drop(cl->qdisc);
1016 if (len > 0) {
1017 sch->q.qlen--;
1018 if (!cl->qdisc->q.qlen)
1019 qfq_deactivate_class(q, cl);
1020
1021 return len;
1022 }
1023 }
1024 }
1025 }
1026
1027 return 0;
1028}
1029
1030static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1031{
1032 struct qfq_sched *q = qdisc_priv(sch);
1033 struct qfq_group *grp;
1034 int i, j, err;
1035
1036 err = qdisc_class_hash_init(&q->clhash);
1037 if (err < 0)
1038 return err;
1039
1040 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1041 grp = &q->groups[i];
1042 grp->index = i;
1043 grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS
1044 - (QFQ_MAX_INDEX - i);
1045 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1046 INIT_HLIST_HEAD(&grp->slots[j]);
1047 }
1048
1049 return 0;
1050}
1051
1052static void qfq_reset_qdisc(struct Qdisc *sch)
1053{
1054 struct qfq_sched *q = qdisc_priv(sch);
1055 struct qfq_group *grp;
1056 struct qfq_class *cl;
1057 struct hlist_node *n, *tmp;
1058 unsigned int i, j;
1059
1060 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1061 grp = &q->groups[i];
1062 for (j = 0; j < QFQ_MAX_SLOTS; j++) {
1063 hlist_for_each_entry_safe(cl, n, tmp,
1064 &grp->slots[j], next) {
1065 qfq_deactivate_class(q, cl);
1066 }
1067 }
1068 }
1069
1070 for (i = 0; i < q->clhash.hashsize; i++) {
1071 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1072 qdisc_reset(cl->qdisc);
1073 }
1074 sch->q.qlen = 0;
1075}
1076
1077static void qfq_destroy_qdisc(struct Qdisc *sch)
1078{
1079 struct qfq_sched *q = qdisc_priv(sch);
1080 struct qfq_class *cl;
1081 struct hlist_node *n, *next;
1082 unsigned int i;
1083
1084 tcf_destroy_chain(&q->filter_list);
1085
1086 for (i = 0; i < q->clhash.hashsize; i++) {
1087 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1088 common.hnode) {
1089 qfq_destroy_class(sch, cl);
1090 }
1091 }
1092 qdisc_class_hash_destroy(&q->clhash);
1093}
1094
1095static const struct Qdisc_class_ops qfq_class_ops = {
1096 .change = qfq_change_class,
1097 .delete = qfq_delete_class,
1098 .get = qfq_get_class,
1099 .put = qfq_put_class,
1100 .tcf_chain = qfq_tcf_chain,
1101 .bind_tcf = qfq_bind_tcf,
1102 .unbind_tcf = qfq_unbind_tcf,
1103 .graft = qfq_graft_class,
1104 .leaf = qfq_class_leaf,
1105 .qlen_notify = qfq_qlen_notify,
1106 .dump = qfq_dump_class,
1107 .dump_stats = qfq_dump_class_stats,
1108 .walk = qfq_walk,
1109};
1110
1111static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1112 .cl_ops = &qfq_class_ops,
1113 .id = "qfq",
1114 .priv_size = sizeof(struct qfq_sched),
1115 .enqueue = qfq_enqueue,
1116 .dequeue = qfq_dequeue,
1117 .peek = qdisc_peek_dequeued,
1118 .drop = qfq_drop,
1119 .init = qfq_init_qdisc,
1120 .reset = qfq_reset_qdisc,
1121 .destroy = qfq_destroy_qdisc,
1122 .owner = THIS_MODULE,
1123};
1124
1125static int __init qfq_init(void)
1126{
1127 return register_qdisc(&qfq_qdisc_ops);
1128}
1129
1130static void __exit qfq_exit(void)
1131{
1132 unregister_qdisc(&qfq_qdisc_ops);
1133}
1134
1135module_init(qfq_init);
1136module_exit(qfq_exit);
1137MODULE_LICENSE("GPL");
diff --git a/net/socket.c b/net/socket.c
index 310d16b1b3c9..d25f5a9d6fa2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2643,13 +2643,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2643 return -EFAULT; 2643 return -EFAULT;
2644 2644
2645 if (convert_in) { 2645 if (convert_in) {
2646 /* We expect there to be holes between fs.m_u and 2646 /* We expect there to be holes between fs.m_ext and
2647 * fs.ring_cookie and at the end of fs, but nowhere else. 2647 * fs.ring_cookie and at the end of fs, but nowhere else.
2648 */ 2648 */
2649 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) + 2649 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2650 sizeof(compat_rxnfc->fs.m_u) != 2650 sizeof(compat_rxnfc->fs.m_ext) !=
2651 offsetof(struct ethtool_rxnfc, fs.m_u) + 2651 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2652 sizeof(rxnfc->fs.m_u)); 2652 sizeof(rxnfc->fs.m_ext));
2653 BUILD_BUG_ON( 2653 BUILD_BUG_ON(
2654 offsetof(struct compat_ethtool_rxnfc, fs.location) - 2654 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2655 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != 2655 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
@@ -2657,7 +2657,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2657 offsetof(struct ethtool_rxnfc, fs.ring_cookie)); 2657 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2658 2658
2659 if (copy_in_user(rxnfc, compat_rxnfc, 2659 if (copy_in_user(rxnfc, compat_rxnfc,
2660 (void *)(&rxnfc->fs.m_u + 1) - 2660 (void *)(&rxnfc->fs.m_ext + 1) -
2661 (void *)rxnfc) || 2661 (void *)rxnfc) ||
2662 copy_in_user(&rxnfc->fs.ring_cookie, 2662 copy_in_user(&rxnfc->fs.ring_cookie,
2663 &compat_rxnfc->fs.ring_cookie, 2663 &compat_rxnfc->fs.ring_cookie,
@@ -2674,7 +2674,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2674 2674
2675 if (convert_out) { 2675 if (convert_out) {
2676 if (copy_in_user(compat_rxnfc, rxnfc, 2676 if (copy_in_user(compat_rxnfc, rxnfc,
2677 (const void *)(&rxnfc->fs.m_u + 1) - 2677 (const void *)(&rxnfc->fs.m_ext + 1) -
2678 (const void *)rxnfc) || 2678 (const void *)rxnfc) ||
2679 copy_in_user(&compat_rxnfc->fs.ring_cookie, 2679 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2680 &rxnfc->fs.ring_cookie, 2680 &rxnfc->fs.ring_cookie,