diff options
author | Michal Kubeček <mkubecek@suse.cz> | 2015-05-18 14:54:00 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-05-20 12:02:26 -0400 |
commit | 27596472473a02cfef2908a6bcda7e55264ba6b7 (patch) | |
tree | 2151136cf03e0dadcff04de695f10f91633acf4f /net | |
parent | 35f1b4e96b9258a3668872b1139c51e5a23eb876 (diff) |
ipv6: fix ECMP route replacement
When replacing an IPv6 multipath route with "ip route replace", i.e.
NLM_F_CREATE | NLM_F_REPLACE, fib6_add_rt2node() replaces only first
matching route without fixing its siblings, resulting in corrupted
siblings linked list; removing one of the siblings can then end in an
infinite loop.
IPv6 ECMP implementation is a bit different from IPv4 so that route
replacement cannot work in exactly the same way. This should be a
reasonable approximation:
1. If the new route is ECMP-able and there is a matching ECMP-able one
already, replace it and all its siblings (if any).
2. If the new route is ECMP-able and no matching ECMP-able route exists,
replace first matching non-ECMP-able (if any) or just add the new one.
3. If the new route is not ECMP-able, replace first matching
non-ECMP-able route (if any) or add the new route.
We also need to remove the NLM_F_REPLACE flag after replacing old
route(s) by first nexthop of an ECMP route so that each subsequent
nexthop does not replace previous one.
Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv6/ip6_fib.c | 39 | ||||
-rw-r--r-- | net/ipv6/route.c | 11 |
2 files changed, 44 insertions, 6 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 96dbffff5a24..bde57b113009 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c | |||
@@ -693,6 +693,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | |||
693 | { | 693 | { |
694 | struct rt6_info *iter = NULL; | 694 | struct rt6_info *iter = NULL; |
695 | struct rt6_info **ins; | 695 | struct rt6_info **ins; |
696 | struct rt6_info **fallback_ins = NULL; | ||
696 | int replace = (info->nlh && | 697 | int replace = (info->nlh && |
697 | (info->nlh->nlmsg_flags & NLM_F_REPLACE)); | 698 | (info->nlh->nlmsg_flags & NLM_F_REPLACE)); |
698 | int add = (!info->nlh || | 699 | int add = (!info->nlh || |
@@ -716,8 +717,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | |||
716 | (info->nlh->nlmsg_flags & NLM_F_EXCL)) | 717 | (info->nlh->nlmsg_flags & NLM_F_EXCL)) |
717 | return -EEXIST; | 718 | return -EEXIST; |
718 | if (replace) { | 719 | if (replace) { |
719 | found++; | 720 | if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { |
720 | break; | 721 | found++; |
722 | break; | ||
723 | } | ||
724 | if (rt_can_ecmp) | ||
725 | fallback_ins = fallback_ins ?: ins; | ||
726 | goto next_iter; | ||
721 | } | 727 | } |
722 | 728 | ||
723 | if (iter->dst.dev == rt->dst.dev && | 729 | if (iter->dst.dev == rt->dst.dev && |
@@ -753,9 +759,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | |||
753 | if (iter->rt6i_metric > rt->rt6i_metric) | 759 | if (iter->rt6i_metric > rt->rt6i_metric) |
754 | break; | 760 | break; |
755 | 761 | ||
762 | next_iter: | ||
756 | ins = &iter->dst.rt6_next; | 763 | ins = &iter->dst.rt6_next; |
757 | } | 764 | } |
758 | 765 | ||
766 | if (fallback_ins && !found) { | ||
767 | /* No ECMP-able route found, replace first non-ECMP one */ | ||
768 | ins = fallback_ins; | ||
769 | iter = *ins; | ||
770 | found++; | ||
771 | } | ||
772 | |||
759 | /* Reset round-robin state, if necessary */ | 773 | /* Reset round-robin state, if necessary */ |
760 | if (ins == &fn->leaf) | 774 | if (ins == &fn->leaf) |
761 | fn->rr_ptr = NULL; | 775 | fn->rr_ptr = NULL; |
@@ -815,6 +829,8 @@ add: | |||
815 | } | 829 | } |
816 | 830 | ||
817 | } else { | 831 | } else { |
832 | int nsiblings; | ||
833 | |||
818 | if (!found) { | 834 | if (!found) { |
819 | if (add) | 835 | if (add) |
820 | goto add; | 836 | goto add; |
@@ -835,8 +851,27 @@ add: | |||
835 | info->nl_net->ipv6.rt6_stats->fib_route_nodes++; | 851 | info->nl_net->ipv6.rt6_stats->fib_route_nodes++; |
836 | fn->fn_flags |= RTN_RTINFO; | 852 | fn->fn_flags |= RTN_RTINFO; |
837 | } | 853 | } |
854 | nsiblings = iter->rt6i_nsiblings; | ||
838 | fib6_purge_rt(iter, fn, info->nl_net); | 855 | fib6_purge_rt(iter, fn, info->nl_net); |
839 | rt6_release(iter); | 856 | rt6_release(iter); |
857 | |||
858 | if (nsiblings) { | ||
859 | /* Replacing an ECMP route, remove all siblings */ | ||
860 | ins = &rt->dst.rt6_next; | ||
861 | iter = *ins; | ||
862 | while (iter) { | ||
863 | if (rt6_qualify_for_ecmp(iter)) { | ||
864 | *ins = iter->dst.rt6_next; | ||
865 | fib6_purge_rt(iter, fn, info->nl_net); | ||
866 | rt6_release(iter); | ||
867 | nsiblings--; | ||
868 | } else { | ||
869 | ins = &iter->dst.rt6_next; | ||
870 | } | ||
871 | iter = *ins; | ||
872 | } | ||
873 | WARN_ON(nsiblings != 0); | ||
874 | } | ||
840 | } | 875 | } |
841 | 876 | ||
842 | return 0; | 877 | return 0; |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3821a3517478..c73ae5039e46 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -2541,11 +2541,14 @@ beginning: | |||
2541 | } | 2541 | } |
2542 | } | 2542 | } |
2543 | /* Because each route is added like a single route we remove | 2543 | /* Because each route is added like a single route we remove |
2544 | * this flag after the first nexthop (if there is a collision, | 2544 | * these flags after the first nexthop: if there is a collision, |
2545 | * we have already fail to add the first nexthop: | 2545 | * we have already failed to add the first nexthop: |
2546 | * fib6_add_rt2node() has reject it). | 2546 | * fib6_add_rt2node() has rejected it; when replacing, old |
2547 | * nexthops have been replaced by first new, the rest should | ||
2548 | * be added to it. | ||
2547 | */ | 2549 | */ |
2548 | cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; | 2550 | cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | |
2551 | NLM_F_REPLACE); | ||
2549 | rtnh = rtnh_next(rtnh, &remaining); | 2552 | rtnh = rtnh_next(rtnh, &remaining); |
2550 | } | 2553 | } |
2551 | 2554 | ||