aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorMichal Kubeček <mkubecek@suse.cz>2015-05-18 14:54:00 -0400
committerDavid S. Miller <davem@davemloft.net>2015-05-20 12:02:26 -0400
commit27596472473a02cfef2908a6bcda7e55264ba6b7 (patch)
tree2151136cf03e0dadcff04de695f10f91633acf4f /net
parent35f1b4e96b9258a3668872b1139c51e5a23eb876 (diff)
ipv6: fix ECMP route replacement
When replacing an IPv6 multipath route with "ip route replace", i.e. NLM_F_CREATE | NLM_F_REPLACE, fib6_add_rt2node() replaces only first matching route without fixing its siblings, resulting in corrupted siblings linked list; removing one of the siblings can then end in an infinite loop. IPv6 ECMP implementation is a bit different from IPv4 so that route replacement cannot work in exactly the same way. This should be a reasonable approximation: 1. If the new route is ECMP-able and there is a matching ECMP-able one already, replace it and all its siblings (if any). 2. If the new route is ECMP-able and no matching ECMP-able route exists, replace first matching non-ECMP-able (if any) or just add the new one. 3. If the new route is not ECMP-able, replace first matching non-ECMP-able route (if any) or add the new route. We also need to remove the NLM_F_REPLACE flag after replacing old route(s) by first nexthop of an ECMP route so that each subsequent nexthop does not replace previous one. Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)") Signed-off-by: Michal Kubecek <mkubecek@suse.cz> Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv6/ip6_fib.c39
-rw-r--r--net/ipv6/route.c11
2 files changed, 44 insertions, 6 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 96dbffff5a24..bde57b113009 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -693,6 +693,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
693{ 693{
694 struct rt6_info *iter = NULL; 694 struct rt6_info *iter = NULL;
695 struct rt6_info **ins; 695 struct rt6_info **ins;
696 struct rt6_info **fallback_ins = NULL;
696 int replace = (info->nlh && 697 int replace = (info->nlh &&
697 (info->nlh->nlmsg_flags & NLM_F_REPLACE)); 698 (info->nlh->nlmsg_flags & NLM_F_REPLACE));
698 int add = (!info->nlh || 699 int add = (!info->nlh ||
@@ -716,8 +717,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
716 (info->nlh->nlmsg_flags & NLM_F_EXCL)) 717 (info->nlh->nlmsg_flags & NLM_F_EXCL))
717 return -EEXIST; 718 return -EEXIST;
718 if (replace) { 719 if (replace) {
719 found++; 720 if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
720 break; 721 found++;
722 break;
723 }
724 if (rt_can_ecmp)
725 fallback_ins = fallback_ins ?: ins;
726 goto next_iter;
721 } 727 }
722 728
723 if (iter->dst.dev == rt->dst.dev && 729 if (iter->dst.dev == rt->dst.dev &&
@@ -753,9 +759,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
753 if (iter->rt6i_metric > rt->rt6i_metric) 759 if (iter->rt6i_metric > rt->rt6i_metric)
754 break; 760 break;
755 761
762next_iter:
756 ins = &iter->dst.rt6_next; 763 ins = &iter->dst.rt6_next;
757 } 764 }
758 765
766 if (fallback_ins && !found) {
767 /* No ECMP-able route found, replace first non-ECMP one */
768 ins = fallback_ins;
769 iter = *ins;
770 found++;
771 }
772
759 /* Reset round-robin state, if necessary */ 773 /* Reset round-robin state, if necessary */
760 if (ins == &fn->leaf) 774 if (ins == &fn->leaf)
761 fn->rr_ptr = NULL; 775 fn->rr_ptr = NULL;
@@ -815,6 +829,8 @@ add:
815 } 829 }
816 830
817 } else { 831 } else {
832 int nsiblings;
833
818 if (!found) { 834 if (!found) {
819 if (add) 835 if (add)
820 goto add; 836 goto add;
@@ -835,8 +851,27 @@ add:
835 info->nl_net->ipv6.rt6_stats->fib_route_nodes++; 851 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
836 fn->fn_flags |= RTN_RTINFO; 852 fn->fn_flags |= RTN_RTINFO;
837 } 853 }
854 nsiblings = iter->rt6i_nsiblings;
838 fib6_purge_rt(iter, fn, info->nl_net); 855 fib6_purge_rt(iter, fn, info->nl_net);
839 rt6_release(iter); 856 rt6_release(iter);
857
858 if (nsiblings) {
859 /* Replacing an ECMP route, remove all siblings */
860 ins = &rt->dst.rt6_next;
861 iter = *ins;
862 while (iter) {
863 if (rt6_qualify_for_ecmp(iter)) {
864 *ins = iter->dst.rt6_next;
865 fib6_purge_rt(iter, fn, info->nl_net);
866 rt6_release(iter);
867 nsiblings--;
868 } else {
869 ins = &iter->dst.rt6_next;
870 }
871 iter = *ins;
872 }
873 WARN_ON(nsiblings != 0);
874 }
840 } 875 }
841 876
842 return 0; 877 return 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3821a3517478..c73ae5039e46 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2541,11 +2541,14 @@ beginning:
2541 } 2541 }
2542 } 2542 }
2543 /* Because each route is added like a single route we remove 2543 /* Because each route is added like a single route we remove
2544 * this flag after the first nexthop (if there is a collision, 2544 * these flags after the first nexthop: if there is a collision,
2545 * we have already fail to add the first nexthop: 2545 * we have already failed to add the first nexthop:
2546 * fib6_add_rt2node() has reject it). 2546 * fib6_add_rt2node() has rejected it; when replacing, old
2547 * nexthops have been replaced by first new, the rest should
2548 * be added to it.
2547 */ 2549 */
2548 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; 2550 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2551 NLM_F_REPLACE);
2549 rtnh = rtnh_next(rtnh, &remaining); 2552 rtnh = rtnh_next(rtnh, &remaining);
2550 } 2553 }
2551 2554