diff options
author | Veaceslav Falico <vfalico@redhat.com> | 2013-08-28 17:25:05 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-08-29 16:19:42 -0400 |
commit | 5d261913ca3daf6c2d21d38924235667b3d07c40 (patch) | |
tree | f15af2dea3316393cea047c637f434aa04db7726 /net/core/dev.c | |
parent | aa9d85605f5ab070b64842b3eba797cf81698ae1 (diff) |
net: add lower_dev_list to net_device and make a full mesh
This patch adds lower_dev_list list_head to net_device, which is the same
as upper_dev_list, only for lower devices, and begins to use it in the same
way as the upper list.
It also changes the way the whole adjacent device lists work - now they
contain *all* of upper/lower devices, not only the first level. The first
level devices are distinguished by the bool neighbour field in
netdev_adjacent, also added by this patch.
There are cases when a device can be added several times to the adjacent
list, the simplest would be:
/---- eth0.10 ---\
eth0- --- bond0
\---- eth0.20 ---/
where both bond0 and eth0 'see' each other in the adjacent lists two times.
To avoid duplication of netdev_adjacent structures ref_nr is being kept as
the number of times the device was added to the list.
The 'full view' is achieved by adding, on link creation, all of the
upper_dev's upper_dev_list devices as upper devices to all of the
lower_dev's lower_dev_list devices (and to the lower_dev itself), and vice
versa. On unlink they are removed using the same logic.
I've tested it with thousands vlans/bonds/bridges, everything works ok and
no observable lags even on a huge number of interfaces.
Memory footprint for 128 devices interconnected with each other via both
upper and lower (which is impossible, but for the comparison) lists would be:
128*128*2*sizeof(netdev_adjacent) = 1.5MB
but in the real world we usualy have at most several devices with slaves
and a lot of vlans, so the footprint will be much lower.
CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Cong Wang <amwang@redhat.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 285 |
1 files changed, 258 insertions, 27 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 5072e2c1a072..2aa914eee057 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -4369,7 +4369,16 @@ softnet_break: | |||
4369 | 4369 | ||
4370 | struct netdev_adjacent { | 4370 | struct netdev_adjacent { |
4371 | struct net_device *dev; | 4371 | struct net_device *dev; |
4372 | |||
4373 | /* upper master flag, there can only be one master device per list */ | ||
4372 | bool master; | 4374 | bool master; |
4375 | |||
4376 | /* indicates that this dev is our first-level lower/upper device */ | ||
4377 | bool neighbour; | ||
4378 | |||
4379 | /* counter for the number of times this device was added to us */ | ||
4380 | u16 ref_nr; | ||
4381 | |||
4373 | struct list_head list; | 4382 | struct list_head list; |
4374 | struct rcu_head rcu; | 4383 | struct rcu_head rcu; |
4375 | struct list_head search_list; | 4384 | struct list_head search_list; |
@@ -4408,18 +4417,34 @@ static bool __netdev_search_upper_dev(struct net_device *dev, | |||
4408 | return ret; | 4417 | return ret; |
4409 | } | 4418 | } |
4410 | 4419 | ||
4411 | static struct netdev_adjacent *__netdev_find_upper(struct net_device *dev, | 4420 | static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, |
4412 | struct net_device *upper_dev) | 4421 | struct net_device *adj_dev, |
4422 | bool upper) | ||
4413 | { | 4423 | { |
4414 | struct netdev_adjacent *upper; | 4424 | struct netdev_adjacent *adj; |
4425 | struct list_head *dev_list; | ||
4415 | 4426 | ||
4416 | list_for_each_entry(upper, &dev->upper_dev_list, list) { | 4427 | dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list; |
4417 | if (upper->dev == upper_dev) | 4428 | |
4418 | return upper; | 4429 | list_for_each_entry(adj, dev_list, list) { |
4430 | if (adj->dev == adj_dev) | ||
4431 | return adj; | ||
4419 | } | 4432 | } |
4420 | return NULL; | 4433 | return NULL; |
4421 | } | 4434 | } |
4422 | 4435 | ||
4436 | static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev, | ||
4437 | struct net_device *udev) | ||
4438 | { | ||
4439 | return __netdev_find_adj(dev, udev, true); | ||
4440 | } | ||
4441 | |||
4442 | static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev, | ||
4443 | struct net_device *ldev) | ||
4444 | { | ||
4445 | return __netdev_find_adj(dev, ldev, false); | ||
4446 | } | ||
4447 | |||
4423 | /** | 4448 | /** |
4424 | * netdev_has_upper_dev - Check if device is linked to an upper device | 4449 | * netdev_has_upper_dev - Check if device is linked to an upper device |
4425 | * @dev: device | 4450 | * @dev: device |
@@ -4496,10 +4521,149 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) | |||
4496 | } | 4521 | } |
4497 | EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); | 4522 | EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); |
4498 | 4523 | ||
4524 | static int __netdev_adjacent_dev_insert(struct net_device *dev, | ||
4525 | struct net_device *adj_dev, | ||
4526 | bool neighbour, bool master, | ||
4527 | bool upper) | ||
4528 | { | ||
4529 | struct netdev_adjacent *adj; | ||
4530 | |||
4531 | adj = __netdev_find_adj(dev, adj_dev, upper); | ||
4532 | |||
4533 | if (adj) { | ||
4534 | BUG_ON(neighbour); | ||
4535 | adj->ref_nr++; | ||
4536 | return 0; | ||
4537 | } | ||
4538 | |||
4539 | adj = kmalloc(sizeof(*adj), GFP_KERNEL); | ||
4540 | if (!adj) | ||
4541 | return -ENOMEM; | ||
4542 | |||
4543 | adj->dev = adj_dev; | ||
4544 | adj->master = master; | ||
4545 | adj->neighbour = neighbour; | ||
4546 | adj->ref_nr = 1; | ||
4547 | INIT_LIST_HEAD(&adj->search_list); | ||
4548 | |||
4549 | dev_hold(adj_dev); | ||
4550 | pr_debug("dev_hold for %s, because of %s link added from %s to %s\n", | ||
4551 | adj_dev->name, upper ? "upper" : "lower", dev->name, | ||
4552 | adj_dev->name); | ||
4553 | |||
4554 | if (!upper) { | ||
4555 | list_add_tail_rcu(&adj->list, &dev->lower_dev_list); | ||
4556 | return 0; | ||
4557 | } | ||
4558 | |||
4559 | /* Ensure that master upper link is always the first item in list. */ | ||
4560 | if (master) | ||
4561 | list_add_rcu(&adj->list, &dev->upper_dev_list); | ||
4562 | else | ||
4563 | list_add_tail_rcu(&adj->list, &dev->upper_dev_list); | ||
4564 | |||
4565 | return 0; | ||
4566 | } | ||
4567 | |||
4568 | static inline int __netdev_upper_dev_insert(struct net_device *dev, | ||
4569 | struct net_device *udev, | ||
4570 | bool master, bool neighbour) | ||
4571 | { | ||
4572 | return __netdev_adjacent_dev_insert(dev, udev, neighbour, master, | ||
4573 | true); | ||
4574 | } | ||
4575 | |||
4576 | static inline int __netdev_lower_dev_insert(struct net_device *dev, | ||
4577 | struct net_device *ldev, | ||
4578 | bool neighbour) | ||
4579 | { | ||
4580 | return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false, | ||
4581 | false); | ||
4582 | } | ||
4583 | |||
4584 | void __netdev_adjacent_dev_remove(struct net_device *dev, | ||
4585 | struct net_device *adj_dev, bool upper) | ||
4586 | { | ||
4587 | struct netdev_adjacent *adj; | ||
4588 | |||
4589 | if (upper) | ||
4590 | adj = __netdev_find_upper(dev, adj_dev); | ||
4591 | else | ||
4592 | adj = __netdev_find_lower(dev, adj_dev); | ||
4593 | |||
4594 | if (!adj) | ||
4595 | BUG(); | ||
4596 | |||
4597 | if (adj->ref_nr > 1) { | ||
4598 | adj->ref_nr--; | ||
4599 | return; | ||
4600 | } | ||
4601 | |||
4602 | list_del_rcu(&adj->list); | ||
4603 | pr_debug("dev_put for %s, because of %s link removed from %s to %s\n", | ||
4604 | adj_dev->name, upper ? "upper" : "lower", dev->name, | ||
4605 | adj_dev->name); | ||
4606 | dev_put(adj_dev); | ||
4607 | kfree_rcu(adj, rcu); | ||
4608 | } | ||
4609 | |||
4610 | static inline void __netdev_upper_dev_remove(struct net_device *dev, | ||
4611 | struct net_device *udev) | ||
4612 | { | ||
4613 | return __netdev_adjacent_dev_remove(dev, udev, true); | ||
4614 | } | ||
4615 | |||
4616 | static inline void __netdev_lower_dev_remove(struct net_device *dev, | ||
4617 | struct net_device *ldev) | ||
4618 | { | ||
4619 | return __netdev_adjacent_dev_remove(dev, ldev, false); | ||
4620 | } | ||
4621 | |||
4622 | int __netdev_adjacent_dev_insert_link(struct net_device *dev, | ||
4623 | struct net_device *upper_dev, | ||
4624 | bool master, bool neighbour) | ||
4625 | { | ||
4626 | int ret; | ||
4627 | |||
4628 | ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour); | ||
4629 | if (ret) | ||
4630 | return ret; | ||
4631 | |||
4632 | ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour); | ||
4633 | if (ret) { | ||
4634 | __netdev_upper_dev_remove(dev, upper_dev); | ||
4635 | return ret; | ||
4636 | } | ||
4637 | |||
4638 | return 0; | ||
4639 | } | ||
4640 | |||
4641 | static inline int __netdev_adjacent_dev_link(struct net_device *dev, | ||
4642 | struct net_device *udev) | ||
4643 | { | ||
4644 | return __netdev_adjacent_dev_insert_link(dev, udev, false, false); | ||
4645 | } | ||
4646 | |||
4647 | static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, | ||
4648 | struct net_device *udev, | ||
4649 | bool master) | ||
4650 | { | ||
4651 | return __netdev_adjacent_dev_insert_link(dev, udev, master, true); | ||
4652 | } | ||
4653 | |||
4654 | void __netdev_adjacent_dev_unlink(struct net_device *dev, | ||
4655 | struct net_device *upper_dev) | ||
4656 | { | ||
4657 | __netdev_upper_dev_remove(dev, upper_dev); | ||
4658 | __netdev_lower_dev_remove(upper_dev, dev); | ||
4659 | } | ||
4660 | |||
4661 | |||
4499 | static int __netdev_upper_dev_link(struct net_device *dev, | 4662 | static int __netdev_upper_dev_link(struct net_device *dev, |
4500 | struct net_device *upper_dev, bool master) | 4663 | struct net_device *upper_dev, bool master) |
4501 | { | 4664 | { |
4502 | struct netdev_adjacent *upper; | 4665 | struct netdev_adjacent *i, *j, *to_i, *to_j; |
4666 | int ret = 0; | ||
4503 | 4667 | ||
4504 | ASSERT_RTNL(); | 4668 | ASSERT_RTNL(); |
4505 | 4669 | ||
@@ -4516,22 +4680,76 @@ static int __netdev_upper_dev_link(struct net_device *dev, | |||
4516 | if (master && netdev_master_upper_dev_get(dev)) | 4680 | if (master && netdev_master_upper_dev_get(dev)) |
4517 | return -EBUSY; | 4681 | return -EBUSY; |
4518 | 4682 | ||
4519 | upper = kmalloc(sizeof(*upper), GFP_KERNEL); | 4683 | ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master); |
4520 | if (!upper) | 4684 | if (ret) |
4521 | return -ENOMEM; | 4685 | return ret; |
4522 | 4686 | ||
4523 | upper->dev = upper_dev; | 4687 | /* Now that we linked these devs, make all the upper_dev's |
4524 | upper->master = master; | 4688 | * upper_dev_list visible to every dev's lower_dev_list and vice |
4525 | INIT_LIST_HEAD(&upper->search_list); | 4689 | * versa, and don't forget the devices itself. All of these |
4690 | * links are non-neighbours. | ||
4691 | */ | ||
4692 | list_for_each_entry(i, &upper_dev->upper_dev_list, list) { | ||
4693 | list_for_each_entry(j, &dev->lower_dev_list, list) { | ||
4694 | ret = __netdev_adjacent_dev_link(i->dev, j->dev); | ||
4695 | if (ret) | ||
4696 | goto rollback_mesh; | ||
4697 | } | ||
4698 | } | ||
4699 | |||
4700 | /* add dev to every upper_dev's upper device */ | ||
4701 | list_for_each_entry(i, &upper_dev->upper_dev_list, list) { | ||
4702 | ret = __netdev_adjacent_dev_link(dev, i->dev); | ||
4703 | if (ret) | ||
4704 | goto rollback_upper_mesh; | ||
4705 | } | ||
4706 | |||
4707 | /* add upper_dev to every dev's lower device */ | ||
4708 | list_for_each_entry(i, &dev->lower_dev_list, list) { | ||
4709 | ret = __netdev_adjacent_dev_link(i->dev, upper_dev); | ||
4710 | if (ret) | ||
4711 | goto rollback_lower_mesh; | ||
4712 | } | ||
4526 | 4713 | ||
4527 | /* Ensure that master upper link is always the first item in list. */ | ||
4528 | if (master) | ||
4529 | list_add_rcu(&upper->list, &dev->upper_dev_list); | ||
4530 | else | ||
4531 | list_add_tail_rcu(&upper->list, &dev->upper_dev_list); | ||
4532 | dev_hold(upper_dev); | ||
4533 | call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); | 4714 | call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); |
4534 | return 0; | 4715 | return 0; |
4716 | |||
4717 | rollback_lower_mesh: | ||
4718 | to_i = i; | ||
4719 | list_for_each_entry(i, &dev->lower_dev_list, list) { | ||
4720 | if (i == to_i) | ||
4721 | break; | ||
4722 | __netdev_adjacent_dev_unlink(i->dev, upper_dev); | ||
4723 | } | ||
4724 | |||
4725 | i = NULL; | ||
4726 | |||
4727 | rollback_upper_mesh: | ||
4728 | to_i = i; | ||
4729 | list_for_each_entry(i, &upper_dev->upper_dev_list, list) { | ||
4730 | if (i == to_i) | ||
4731 | break; | ||
4732 | __netdev_adjacent_dev_unlink(dev, i->dev); | ||
4733 | } | ||
4734 | |||
4735 | i = j = NULL; | ||
4736 | |||
4737 | rollback_mesh: | ||
4738 | to_i = i; | ||
4739 | to_j = j; | ||
4740 | list_for_each_entry(i, &dev->lower_dev_list, list) { | ||
4741 | list_for_each_entry(j, &upper_dev->upper_dev_list, list) { | ||
4742 | if (i == to_i && j == to_j) | ||
4743 | break; | ||
4744 | __netdev_adjacent_dev_unlink(i->dev, j->dev); | ||
4745 | } | ||
4746 | if (i == to_i) | ||
4747 | break; | ||
4748 | } | ||
4749 | |||
4750 | __netdev_adjacent_dev_unlink(dev, upper_dev); | ||
4751 | |||
4752 | return ret; | ||
4535 | } | 4753 | } |
4536 | 4754 | ||
4537 | /** | 4755 | /** |
@@ -4580,16 +4798,28 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link); | |||
4580 | void netdev_upper_dev_unlink(struct net_device *dev, | 4798 | void netdev_upper_dev_unlink(struct net_device *dev, |
4581 | struct net_device *upper_dev) | 4799 | struct net_device *upper_dev) |
4582 | { | 4800 | { |
4583 | struct netdev_adjacent *upper; | 4801 | struct netdev_adjacent *i, *j; |
4584 | |||
4585 | ASSERT_RTNL(); | 4802 | ASSERT_RTNL(); |
4586 | 4803 | ||
4587 | upper = __netdev_find_upper(dev, upper_dev); | 4804 | __netdev_adjacent_dev_unlink(dev, upper_dev); |
4588 | if (!upper) | 4805 | |
4589 | return; | 4806 | /* Here is the tricky part. We must remove all dev's lower |
4590 | list_del_rcu(&upper->list); | 4807 | * devices from all upper_dev's upper devices and vice |
4591 | dev_put(upper_dev); | 4808 | * versa, to maintain the graph relationship. |
4592 | kfree_rcu(upper, rcu); | 4809 | */ |
4810 | list_for_each_entry(i, &dev->lower_dev_list, list) | ||
4811 | list_for_each_entry(j, &upper_dev->upper_dev_list, list) | ||
4812 | __netdev_adjacent_dev_unlink(i->dev, j->dev); | ||
4813 | |||
4814 | /* remove also the devices itself from lower/upper device | ||
4815 | * list | ||
4816 | */ | ||
4817 | list_for_each_entry(i, &dev->lower_dev_list, list) | ||
4818 | __netdev_adjacent_dev_unlink(i->dev, upper_dev); | ||
4819 | |||
4820 | list_for_each_entry(i, &upper_dev->upper_dev_list, list) | ||
4821 | __netdev_adjacent_dev_unlink(dev, i->dev); | ||
4822 | |||
4593 | call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); | 4823 | call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); |
4594 | } | 4824 | } |
4595 | EXPORT_SYMBOL(netdev_upper_dev_unlink); | 4825 | EXPORT_SYMBOL(netdev_upper_dev_unlink); |
@@ -5850,6 +6080,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
5850 | INIT_LIST_HEAD(&dev->unreg_list); | 6080 | INIT_LIST_HEAD(&dev->unreg_list); |
5851 | INIT_LIST_HEAD(&dev->link_watch_list); | 6081 | INIT_LIST_HEAD(&dev->link_watch_list); |
5852 | INIT_LIST_HEAD(&dev->upper_dev_list); | 6082 | INIT_LIST_HEAD(&dev->upper_dev_list); |
6083 | INIT_LIST_HEAD(&dev->lower_dev_list); | ||
5853 | dev->priv_flags = IFF_XMIT_DST_RELEASE; | 6084 | dev->priv_flags = IFF_XMIT_DST_RELEASE; |
5854 | setup(dev); | 6085 | setup(dev); |
5855 | 6086 | ||