diff options
author | Nicolas Dichtel <nicolas.dichtel@6wind.com> | 2012-10-21 23:42:09 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-10-23 02:38:32 -0400 |
commit | 51ebd3181572af8d5076808dab2682d800f6da5d (patch) | |
tree | 41bce53f5ed82791ea5975f6ce708f54729659b4 /net/ipv6/ip6_fib.c | |
parent | d94ce9b283736a876b2e6dec665c68e5e8b5d55e (diff) |
ipv6: add support of equal cost multipath (ECMP)
Each nexthop is added like a single route in the routing table. All routes
that have the same metric/weight and destination but not the same gateway
are considering as ECMP routes. They are linked together, through a list called
rt6i_siblings.
ECMP routes can be added in one shot, with RTA_MULTIPATH attribute or one after
the other (in both case, the flag NLM_F_EXCL should not be set).
The patch is based on a previous work from
Luc Saillard <luc.saillard@6wind.com>.
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/ip6_fib.c')
-rw-r--r-- | net/ipv6/ip6_fib.c | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 24995a93ef8c..710cafd2e1a9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c | |||
@@ -672,6 +672,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | |||
672 | iter->rt6i_idev == rt->rt6i_idev && | 672 | iter->rt6i_idev == rt->rt6i_idev && |
673 | ipv6_addr_equal(&iter->rt6i_gateway, | 673 | ipv6_addr_equal(&iter->rt6i_gateway, |
674 | &rt->rt6i_gateway)) { | 674 | &rt->rt6i_gateway)) { |
675 | if (rt->rt6i_nsiblings) | ||
676 | rt->rt6i_nsiblings = 0; | ||
675 | if (!(iter->rt6i_flags & RTF_EXPIRES)) | 677 | if (!(iter->rt6i_flags & RTF_EXPIRES)) |
676 | return -EEXIST; | 678 | return -EEXIST; |
677 | if (!(rt->rt6i_flags & RTF_EXPIRES)) | 679 | if (!(rt->rt6i_flags & RTF_EXPIRES)) |
@@ -680,6 +682,21 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | |||
680 | rt6_set_expires(iter, rt->dst.expires); | 682 | rt6_set_expires(iter, rt->dst.expires); |
681 | return -EEXIST; | 683 | return -EEXIST; |
682 | } | 684 | } |
685 | /* If we have the same destination and the same metric, | ||
686 | * but not the same gateway, then the route we try to | ||
687 | * add is sibling to this route, increment our counter | ||
688 | * of siblings, and later we will add our route to the | ||
689 | * list. | ||
690 | * Only static routes (which don't have flag | ||
691 | * RTF_EXPIRES) are used for ECMPv6. | ||
692 | * | ||
693 | * To avoid long list, we only had siblings if the | ||
694 | * route have a gateway. | ||
695 | */ | ||
696 | if (rt->rt6i_flags & RTF_GATEWAY && | ||
697 | !(rt->rt6i_flags & RTF_EXPIRES) && | ||
698 | !(iter->rt6i_flags & RTF_EXPIRES)) | ||
699 | rt->rt6i_nsiblings++; | ||
683 | } | 700 | } |
684 | 701 | ||
685 | if (iter->rt6i_metric > rt->rt6i_metric) | 702 | if (iter->rt6i_metric > rt->rt6i_metric) |
@@ -692,6 +709,35 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | |||
692 | if (ins == &fn->leaf) | 709 | if (ins == &fn->leaf) |
693 | fn->rr_ptr = NULL; | 710 | fn->rr_ptr = NULL; |
694 | 711 | ||
712 | /* Link this route to others same route. */ | ||
713 | if (rt->rt6i_nsiblings) { | ||
714 | unsigned int rt6i_nsiblings; | ||
715 | struct rt6_info *sibling, *temp_sibling; | ||
716 | |||
717 | /* Find the first route that have the same metric */ | ||
718 | sibling = fn->leaf; | ||
719 | while (sibling) { | ||
720 | if (sibling->rt6i_metric == rt->rt6i_metric) { | ||
721 | list_add_tail(&rt->rt6i_siblings, | ||
722 | &sibling->rt6i_siblings); | ||
723 | break; | ||
724 | } | ||
725 | sibling = sibling->dst.rt6_next; | ||
726 | } | ||
727 | /* For each sibling in the list, increment the counter of | ||
728 | * siblings. BUG() if counters does not match, list of siblings | ||
729 | * is broken! | ||
730 | */ | ||
731 | rt6i_nsiblings = 0; | ||
732 | list_for_each_entry_safe(sibling, temp_sibling, | ||
733 | &rt->rt6i_siblings, rt6i_siblings) { | ||
734 | sibling->rt6i_nsiblings++; | ||
735 | BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); | ||
736 | rt6i_nsiblings++; | ||
737 | } | ||
738 | BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); | ||
739 | } | ||
740 | |||
695 | /* | 741 | /* |
696 | * insert node | 742 | * insert node |
697 | */ | 743 | */ |
@@ -1193,6 +1239,17 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, | |||
1193 | if (fn->rr_ptr == rt) | 1239 | if (fn->rr_ptr == rt) |
1194 | fn->rr_ptr = NULL; | 1240 | fn->rr_ptr = NULL; |
1195 | 1241 | ||
1242 | /* Remove this entry from other siblings */ | ||
1243 | if (rt->rt6i_nsiblings) { | ||
1244 | struct rt6_info *sibling, *next_sibling; | ||
1245 | |||
1246 | list_for_each_entry_safe(sibling, next_sibling, | ||
1247 | &rt->rt6i_siblings, rt6i_siblings) | ||
1248 | sibling->rt6i_nsiblings--; | ||
1249 | rt->rt6i_nsiblings = 0; | ||
1250 | list_del_init(&rt->rt6i_siblings); | ||
1251 | } | ||
1252 | |||
1196 | /* Adjust walkers */ | 1253 | /* Adjust walkers */ |
1197 | read_lock(&fib6_walker_lock); | 1254 | read_lock(&fib6_walker_lock); |
1198 | FOR_WALKERS(w) { | 1255 | FOR_WALKERS(w) { |