aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorVeaceslav Falico <vfalico@redhat.com>2013-08-28 17:25:05 -0400
committerDavid S. Miller <davem@davemloft.net>2013-08-29 16:19:42 -0400
commit5d261913ca3daf6c2d21d38924235667b3d07c40 (patch)
treef15af2dea3316393cea047c637f434aa04db7726 /net/core/dev.c
parentaa9d85605f5ab070b64842b3eba797cf81698ae1 (diff)
net: add lower_dev_list to net_device and make a full mesh
This patch adds lower_dev_list list_head to net_device, which is the same as upper_dev_list, only for lower devices, and begins to use it in the same way as the upper list. It also changes the way the whole adjacent device lists work - now they contain *all* of upper/lower devices, not only the first level. The first level devices are distinguished by the bool neighbour field in netdev_adjacent, also added by this patch. There are cases when a device can be added several times to the adjacent list, the simplest would be: /---- eth0.10 ---\ eth0- --- bond0 \---- eth0.20 ---/ where both bond0 and eth0 'see' each other in the adjacent lists two times. To avoid duplication of netdev_adjacent structures ref_nr is being kept as the number of times the device was added to the list. The 'full view' is achieved by adding, on link creation, all of the upper_dev's upper_dev_list devices as upper devices to all of the lower_dev's lower_dev_list devices (and to the lower_dev itself), and vice versa. On unlink they are removed using the same logic. I've tested it with thousands vlans/bonds/bridges, everything works ok and no observable lags even on a huge number of interfaces. Memory footprint for 128 devices interconnected with each other via both upper and lower (which is impossible, but for the comparison) lists would be: 128*128*2*sizeof(netdev_adjacent) = 1.5MB but in the real world we usualy have at most several devices with slaves and a lot of vlans, so the footprint will be much lower. CC: "David S. Miller" <davem@davemloft.net> CC: Eric Dumazet <edumazet@google.com> CC: Jiri Pirko <jiri@resnulli.us> CC: Alexander Duyck <alexander.h.duyck@intel.com> CC: Cong Wang <amwang@redhat.com> Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c285
1 files changed, 258 insertions, 27 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 5072e2c1a072..2aa914eee057 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4369,7 +4369,16 @@ softnet_break:
4369 4369
4370struct netdev_adjacent { 4370struct netdev_adjacent {
4371 struct net_device *dev; 4371 struct net_device *dev;
4372
4373 /* upper master flag, there can only be one master device per list */
4372 bool master; 4374 bool master;
4375
4376 /* indicates that this dev is our first-level lower/upper device */
4377 bool neighbour;
4378
4379 /* counter for the number of times this device was added to us */
4380 u16 ref_nr;
4381
4373 struct list_head list; 4382 struct list_head list;
4374 struct rcu_head rcu; 4383 struct rcu_head rcu;
4375 struct list_head search_list; 4384 struct list_head search_list;
@@ -4408,18 +4417,34 @@ static bool __netdev_search_upper_dev(struct net_device *dev,
4408 return ret; 4417 return ret;
4409} 4418}
4410 4419
4411static struct netdev_adjacent *__netdev_find_upper(struct net_device *dev, 4420static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4412 struct net_device *upper_dev) 4421 struct net_device *adj_dev,
4422 bool upper)
4413{ 4423{
4414 struct netdev_adjacent *upper; 4424 struct netdev_adjacent *adj;
4425 struct list_head *dev_list;
4415 4426
4416 list_for_each_entry(upper, &dev->upper_dev_list, list) { 4427 dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
4417 if (upper->dev == upper_dev) 4428
4418 return upper; 4429 list_for_each_entry(adj, dev_list, list) {
4430 if (adj->dev == adj_dev)
4431 return adj;
4419 } 4432 }
4420 return NULL; 4433 return NULL;
4421} 4434}
4422 4435
4436static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
4437 struct net_device *udev)
4438{
4439 return __netdev_find_adj(dev, udev, true);
4440}
4441
4442static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
4443 struct net_device *ldev)
4444{
4445 return __netdev_find_adj(dev, ldev, false);
4446}
4447
4423/** 4448/**
4424 * netdev_has_upper_dev - Check if device is linked to an upper device 4449 * netdev_has_upper_dev - Check if device is linked to an upper device
4425 * @dev: device 4450 * @dev: device
@@ -4496,10 +4521,149 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4496} 4521}
4497EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); 4522EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4498 4523
4524static int __netdev_adjacent_dev_insert(struct net_device *dev,
4525 struct net_device *adj_dev,
4526 bool neighbour, bool master,
4527 bool upper)
4528{
4529 struct netdev_adjacent *adj;
4530
4531 adj = __netdev_find_adj(dev, adj_dev, upper);
4532
4533 if (adj) {
4534 BUG_ON(neighbour);
4535 adj->ref_nr++;
4536 return 0;
4537 }
4538
4539 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
4540 if (!adj)
4541 return -ENOMEM;
4542
4543 adj->dev = adj_dev;
4544 adj->master = master;
4545 adj->neighbour = neighbour;
4546 adj->ref_nr = 1;
4547 INIT_LIST_HEAD(&adj->search_list);
4548
4549 dev_hold(adj_dev);
4550 pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
4551 adj_dev->name, upper ? "upper" : "lower", dev->name,
4552 adj_dev->name);
4553
4554 if (!upper) {
4555 list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
4556 return 0;
4557 }
4558
4559 /* Ensure that master upper link is always the first item in list. */
4560 if (master)
4561 list_add_rcu(&adj->list, &dev->upper_dev_list);
4562 else
4563 list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
4564
4565 return 0;
4566}
4567
4568static inline int __netdev_upper_dev_insert(struct net_device *dev,
4569 struct net_device *udev,
4570 bool master, bool neighbour)
4571{
4572 return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
4573 true);
4574}
4575
4576static inline int __netdev_lower_dev_insert(struct net_device *dev,
4577 struct net_device *ldev,
4578 bool neighbour)
4579{
4580 return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
4581 false);
4582}
4583
4584void __netdev_adjacent_dev_remove(struct net_device *dev,
4585 struct net_device *adj_dev, bool upper)
4586{
4587 struct netdev_adjacent *adj;
4588
4589 if (upper)
4590 adj = __netdev_find_upper(dev, adj_dev);
4591 else
4592 adj = __netdev_find_lower(dev, adj_dev);
4593
4594 if (!adj)
4595 BUG();
4596
4597 if (adj->ref_nr > 1) {
4598 adj->ref_nr--;
4599 return;
4600 }
4601
4602 list_del_rcu(&adj->list);
4603 pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
4604 adj_dev->name, upper ? "upper" : "lower", dev->name,
4605 adj_dev->name);
4606 dev_put(adj_dev);
4607 kfree_rcu(adj, rcu);
4608}
4609
4610static inline void __netdev_upper_dev_remove(struct net_device *dev,
4611 struct net_device *udev)
4612{
4613 return __netdev_adjacent_dev_remove(dev, udev, true);
4614}
4615
4616static inline void __netdev_lower_dev_remove(struct net_device *dev,
4617 struct net_device *ldev)
4618{
4619 return __netdev_adjacent_dev_remove(dev, ldev, false);
4620}
4621
4622int __netdev_adjacent_dev_insert_link(struct net_device *dev,
4623 struct net_device *upper_dev,
4624 bool master, bool neighbour)
4625{
4626 int ret;
4627
4628 ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
4629 if (ret)
4630 return ret;
4631
4632 ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
4633 if (ret) {
4634 __netdev_upper_dev_remove(dev, upper_dev);
4635 return ret;
4636 }
4637
4638 return 0;
4639}
4640
4641static inline int __netdev_adjacent_dev_link(struct net_device *dev,
4642 struct net_device *udev)
4643{
4644 return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
4645}
4646
4647static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4648 struct net_device *udev,
4649 bool master)
4650{
4651 return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
4652}
4653
4654void __netdev_adjacent_dev_unlink(struct net_device *dev,
4655 struct net_device *upper_dev)
4656{
4657 __netdev_upper_dev_remove(dev, upper_dev);
4658 __netdev_lower_dev_remove(upper_dev, dev);
4659}
4660
4661
4499static int __netdev_upper_dev_link(struct net_device *dev, 4662static int __netdev_upper_dev_link(struct net_device *dev,
4500 struct net_device *upper_dev, bool master) 4663 struct net_device *upper_dev, bool master)
4501{ 4664{
4502 struct netdev_adjacent *upper; 4665 struct netdev_adjacent *i, *j, *to_i, *to_j;
4666 int ret = 0;
4503 4667
4504 ASSERT_RTNL(); 4668 ASSERT_RTNL();
4505 4669
@@ -4516,22 +4680,76 @@ static int __netdev_upper_dev_link(struct net_device *dev,
4516 if (master && netdev_master_upper_dev_get(dev)) 4680 if (master && netdev_master_upper_dev_get(dev))
4517 return -EBUSY; 4681 return -EBUSY;
4518 4682
4519 upper = kmalloc(sizeof(*upper), GFP_KERNEL); 4683 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
4520 if (!upper) 4684 if (ret)
4521 return -ENOMEM; 4685 return ret;
4522 4686
4523 upper->dev = upper_dev; 4687 /* Now that we linked these devs, make all the upper_dev's
4524 upper->master = master; 4688 * upper_dev_list visible to every dev's lower_dev_list and vice
4525 INIT_LIST_HEAD(&upper->search_list); 4689 * versa, and don't forget the devices itself. All of these
4690 * links are non-neighbours.
4691 */
4692 list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
4693 list_for_each_entry(j, &dev->lower_dev_list, list) {
4694 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
4695 if (ret)
4696 goto rollback_mesh;
4697 }
4698 }
4699
4700 /* add dev to every upper_dev's upper device */
4701 list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
4702 ret = __netdev_adjacent_dev_link(dev, i->dev);
4703 if (ret)
4704 goto rollback_upper_mesh;
4705 }
4706
4707 /* add upper_dev to every dev's lower device */
4708 list_for_each_entry(i, &dev->lower_dev_list, list) {
4709 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
4710 if (ret)
4711 goto rollback_lower_mesh;
4712 }
4526 4713
4527 /* Ensure that master upper link is always the first item in list. */
4528 if (master)
4529 list_add_rcu(&upper->list, &dev->upper_dev_list);
4530 else
4531 list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
4532 dev_hold(upper_dev);
4533 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); 4714 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4534 return 0; 4715 return 0;
4716
4717rollback_lower_mesh:
4718 to_i = i;
4719 list_for_each_entry(i, &dev->lower_dev_list, list) {
4720 if (i == to_i)
4721 break;
4722 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4723 }
4724
4725 i = NULL;
4726
4727rollback_upper_mesh:
4728 to_i = i;
4729 list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
4730 if (i == to_i)
4731 break;
4732 __netdev_adjacent_dev_unlink(dev, i->dev);
4733 }
4734
4735 i = j = NULL;
4736
4737rollback_mesh:
4738 to_i = i;
4739 to_j = j;
4740 list_for_each_entry(i, &dev->lower_dev_list, list) {
4741 list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
4742 if (i == to_i && j == to_j)
4743 break;
4744 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4745 }
4746 if (i == to_i)
4747 break;
4748 }
4749
4750 __netdev_adjacent_dev_unlink(dev, upper_dev);
4751
4752 return ret;
4535} 4753}
4536 4754
4537/** 4755/**
@@ -4580,16 +4798,28 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
4580void netdev_upper_dev_unlink(struct net_device *dev, 4798void netdev_upper_dev_unlink(struct net_device *dev,
4581 struct net_device *upper_dev) 4799 struct net_device *upper_dev)
4582{ 4800{
4583 struct netdev_adjacent *upper; 4801 struct netdev_adjacent *i, *j;
4584
4585 ASSERT_RTNL(); 4802 ASSERT_RTNL();
4586 4803
4587 upper = __netdev_find_upper(dev, upper_dev); 4804 __netdev_adjacent_dev_unlink(dev, upper_dev);
4588 if (!upper) 4805
4589 return; 4806 /* Here is the tricky part. We must remove all dev's lower
4590 list_del_rcu(&upper->list); 4807 * devices from all upper_dev's upper devices and vice
4591 dev_put(upper_dev); 4808 * versa, to maintain the graph relationship.
4592 kfree_rcu(upper, rcu); 4809 */
4810 list_for_each_entry(i, &dev->lower_dev_list, list)
4811 list_for_each_entry(j, &upper_dev->upper_dev_list, list)
4812 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4813
4814 /* remove also the devices itself from lower/upper device
4815 * list
4816 */
4817 list_for_each_entry(i, &dev->lower_dev_list, list)
4818 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4819
4820 list_for_each_entry(i, &upper_dev->upper_dev_list, list)
4821 __netdev_adjacent_dev_unlink(dev, i->dev);
4822
4593 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); 4823 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4594} 4824}
4595EXPORT_SYMBOL(netdev_upper_dev_unlink); 4825EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -5850,6 +6080,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5850 INIT_LIST_HEAD(&dev->unreg_list); 6080 INIT_LIST_HEAD(&dev->unreg_list);
5851 INIT_LIST_HEAD(&dev->link_watch_list); 6081 INIT_LIST_HEAD(&dev->link_watch_list);
5852 INIT_LIST_HEAD(&dev->upper_dev_list); 6082 INIT_LIST_HEAD(&dev->upper_dev_list);
6083 INIT_LIST_HEAD(&dev->lower_dev_list);
5853 dev->priv_flags = IFF_XMIT_DST_RELEASE; 6084 dev->priv_flags = IFF_XMIT_DST_RELEASE;
5854 setup(dev); 6085 setup(dev);
5855 6086