summaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
authorRoopa Prabhu <roopa@cumulusnetworks.com>2015-09-08 13:53:04 -0400
committerDavid S. Miller <davem@davemloft.net>2015-09-09 17:07:50 -0400
commit6b9ea5a64ed5eeb3f68f2e6fcce0ed1179801d1e (patch)
tree35cbd65c3e162c5484bb0c0ef7befa5b7cc5911d /net/ipv6/route.c
parent592867bfabe2fcb449393ba7eb0de4f972a08c63 (diff)
ipv6: fix multipath route replace error recovery
Problem: The ecmp route replace support for ipv6 in the kernel, deletes the existing ecmp route too early, ie when it installs the first nexthop. If there is an error in installing the subsequent nexthops, its too late to recover the already deleted existing route leaving the fib in an inconsistent state. This patch reduces the possibility of this by doing the following: a) Changes the existing multipath route add code to a two stage process: build rt6_infos + insert them ip6_route_add rt6_info creation code is moved into ip6_route_info_create. b) This ensures that most errors are caught during building rt6_infos and we fail early c) Separates multipath add and del code. Because add needs the special two stage mode in a) and delete essentially does not care. d) In any event if the code fails during inserting a route again, a warning is printed (This should be unlikely) Before the patch: $ip -6 route show 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 /* Try replacing the route with a duplicate nexthop */ $ip -6 route change 3000:1000:1000:1000::2/128 nexthop via fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1 RTNETLINK answers: File exists $ip -6 route show /* previously added ecmp route 3000:1000:1000:1000::2 dissappears from * kernel */ After the patch: $ip -6 route show 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 /* Try replacing the route with a duplicate nexthop */ $ip -6 route change 3000:1000:1000:1000::2/128 nexthop via fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1 RTNETLINK answers: File exists $ip -6 route show 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024 Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c201
1 files changed, 175 insertions, 26 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f45cac6f8356..34539d3b843f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1748,7 +1748,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
1748 return -EINVAL; 1748 return -EINVAL;
1749} 1749}
1750 1750
1751int ip6_route_add(struct fib6_config *cfg) 1751int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1752{ 1752{
1753 int err; 1753 int err;
1754 struct net *net = cfg->fc_nlinfo.nl_net; 1754 struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1756,7 +1756,6 @@ int ip6_route_add(struct fib6_config *cfg)
1756 struct net_device *dev = NULL; 1756 struct net_device *dev = NULL;
1757 struct inet6_dev *idev = NULL; 1757 struct inet6_dev *idev = NULL;
1758 struct fib6_table *table; 1758 struct fib6_table *table;
1759 struct mx6_config mxc = { .mx = NULL, };
1760 int addr_type; 1759 int addr_type;
1761 1760
1762 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1761 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
@@ -1981,6 +1980,32 @@ install_route:
1981 1980
1982 cfg->fc_nlinfo.nl_net = dev_net(dev); 1981 cfg->fc_nlinfo.nl_net = dev_net(dev);
1983 1982
1983 *rt_ret = rt;
1984
1985 return 0;
1986out:
1987 if (dev)
1988 dev_put(dev);
1989 if (idev)
1990 in6_dev_put(idev);
1991 if (rt)
1992 dst_free(&rt->dst);
1993
1994 *rt_ret = NULL;
1995
1996 return err;
1997}
1998
1999int ip6_route_add(struct fib6_config *cfg)
2000{
2001 struct mx6_config mxc = { .mx = NULL, };
2002 struct rt6_info *rt = NULL;
2003 int err;
2004
2005 err = ip6_route_info_create(cfg, &rt);
2006 if (err)
2007 goto out;
2008
1984 err = ip6_convert_metrics(&mxc, cfg); 2009 err = ip6_convert_metrics(&mxc, cfg);
1985 if (err) 2010 if (err)
1986 goto out; 2011 goto out;
@@ -1988,14 +2013,12 @@ install_route:
1988 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); 2013 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1989 2014
1990 kfree(mxc.mx); 2015 kfree(mxc.mx);
2016
1991 return err; 2017 return err;
1992out: 2018out:
1993 if (dev)
1994 dev_put(dev);
1995 if (idev)
1996 in6_dev_put(idev);
1997 if (rt) 2019 if (rt)
1998 dst_free(&rt->dst); 2020 dst_free(&rt->dst);
2021
1999 return err; 2022 return err;
2000} 2023}
2001 2024
@@ -2776,19 +2799,78 @@ errout:
2776 return err; 2799 return err;
2777} 2800}
2778 2801
2779static int ip6_route_multipath(struct fib6_config *cfg, int add) 2802struct rt6_nh {
2803 struct rt6_info *rt6_info;
2804 struct fib6_config r_cfg;
2805 struct mx6_config mxc;
2806 struct list_head next;
2807};
2808
2809static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2810{
2811 struct rt6_nh *nh;
2812
2813 list_for_each_entry(nh, rt6_nh_list, next) {
2814 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2815 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2816 nh->r_cfg.fc_ifindex);
2817 }
2818}
2819
2820static int ip6_route_info_append(struct list_head *rt6_nh_list,
2821 struct rt6_info *rt, struct fib6_config *r_cfg)
2822{
2823 struct rt6_nh *nh;
2824 struct rt6_info *rtnh;
2825 int err = -EEXIST;
2826
2827 list_for_each_entry(nh, rt6_nh_list, next) {
2828 /* check if rt6_info already exists */
2829 rtnh = nh->rt6_info;
2830
2831 if (rtnh->dst.dev == rt->dst.dev &&
2832 rtnh->rt6i_idev == rt->rt6i_idev &&
2833 ipv6_addr_equal(&rtnh->rt6i_gateway,
2834 &rt->rt6i_gateway))
2835 return err;
2836 }
2837
2838 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2839 if (!nh)
2840 return -ENOMEM;
2841 nh->rt6_info = rt;
2842 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2843 if (err) {
2844 kfree(nh);
2845 return err;
2846 }
2847 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2848 list_add_tail(&nh->next, rt6_nh_list);
2849
2850 return 0;
2851}
2852
2853static int ip6_route_multipath_add(struct fib6_config *cfg)
2780{ 2854{
2781 struct fib6_config r_cfg; 2855 struct fib6_config r_cfg;
2782 struct rtnexthop *rtnh; 2856 struct rtnexthop *rtnh;
2857 struct rt6_info *rt;
2858 struct rt6_nh *err_nh;
2859 struct rt6_nh *nh, *nh_safe;
2783 int remaining; 2860 int remaining;
2784 int attrlen; 2861 int attrlen;
2785 int err = 0, last_err = 0; 2862 int err = 1;
2863 int nhn = 0;
2864 int replace = (cfg->fc_nlinfo.nlh &&
2865 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2866 LIST_HEAD(rt6_nh_list);
2786 2867
2787 remaining = cfg->fc_mp_len; 2868 remaining = cfg->fc_mp_len;
2788beginning:
2789 rtnh = (struct rtnexthop *)cfg->fc_mp; 2869 rtnh = (struct rtnexthop *)cfg->fc_mp;
2790 2870
2791 /* Parse a Multipath Entry */ 2871 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2872 * rt6_info structs per nexthop
2873 */
2792 while (rtnh_ok(rtnh, remaining)) { 2874 while (rtnh_ok(rtnh, remaining)) {
2793 memcpy(&r_cfg, cfg, sizeof(*cfg)); 2875 memcpy(&r_cfg, cfg, sizeof(*cfg));
2794 if (rtnh->rtnh_ifindex) 2876 if (rtnh->rtnh_ifindex)
@@ -2808,22 +2890,32 @@ beginning:
2808 if (nla) 2890 if (nla)
2809 r_cfg.fc_encap_type = nla_get_u16(nla); 2891 r_cfg.fc_encap_type = nla_get_u16(nla);
2810 } 2892 }
2811 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); 2893
2894 err = ip6_route_info_create(&r_cfg, &rt);
2895 if (err)
2896 goto cleanup;
2897
2898 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2812 if (err) { 2899 if (err) {
2813 last_err = err; 2900 dst_free(&rt->dst);
2814 /* If we are trying to remove a route, do not stop the 2901 goto cleanup;
2815 * loop when ip6_route_del() fails (because next hop is 2902 }
2816 * already gone), we should try to remove all next hops. 2903
2817 */ 2904 rtnh = rtnh_next(rtnh, &remaining);
2818 if (add) { 2905 }
2819 /* If add fails, we should try to delete all 2906
2820 * next hops that have been already added. 2907 err_nh = NULL;
2821 */ 2908 list_for_each_entry(nh, &rt6_nh_list, next) {
2822 add = 0; 2909 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2823 remaining = cfg->fc_mp_len - remaining; 2910 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2824 goto beginning; 2911 nh->rt6_info = NULL;
2825 } 2912 if (err) {
2913 if (replace && nhn)
2914 ip6_print_replace_route_err(&rt6_nh_list);
2915 err_nh = nh;
2916 goto add_errout;
2826 } 2917 }
2918
2827 /* Because each route is added like a single route we remove 2919 /* Because each route is added like a single route we remove
2828 * these flags after the first nexthop: if there is a collision, 2920 * these flags after the first nexthop: if there is a collision,
2829 * we have already failed to add the first nexthop: 2921 * we have already failed to add the first nexthop:
@@ -2833,6 +2925,63 @@ beginning:
2833 */ 2925 */
2834 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 2926 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2835 NLM_F_REPLACE); 2927 NLM_F_REPLACE);
2928 nhn++;
2929 }
2930
2931 goto cleanup;
2932
2933add_errout:
2934 /* Delete routes that were already added */
2935 list_for_each_entry(nh, &rt6_nh_list, next) {
2936 if (err_nh == nh)
2937 break;
2938 ip6_route_del(&nh->r_cfg);
2939 }
2940
2941cleanup:
2942 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2943 if (nh->rt6_info)
2944 dst_free(&nh->rt6_info->dst);
2945 if (nh->mxc.mx)
2946 kfree(nh->mxc.mx);
2947 list_del(&nh->next);
2948 kfree(nh);
2949 }
2950
2951 return err;
2952}
2953
2954static int ip6_route_multipath_del(struct fib6_config *cfg)
2955{
2956 struct fib6_config r_cfg;
2957 struct rtnexthop *rtnh;
2958 int remaining;
2959 int attrlen;
2960 int err = 1, last_err = 0;
2961
2962 remaining = cfg->fc_mp_len;
2963 rtnh = (struct rtnexthop *)cfg->fc_mp;
2964
2965 /* Parse a Multipath Entry */
2966 while (rtnh_ok(rtnh, remaining)) {
2967 memcpy(&r_cfg, cfg, sizeof(*cfg));
2968 if (rtnh->rtnh_ifindex)
2969 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2970
2971 attrlen = rtnh_attrlen(rtnh);
2972 if (attrlen > 0) {
2973 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2974
2975 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2976 if (nla) {
2977 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2978 r_cfg.fc_flags |= RTF_GATEWAY;
2979 }
2980 }
2981 err = ip6_route_del(&r_cfg);
2982 if (err)
2983 last_err = err;
2984
2836 rtnh = rtnh_next(rtnh, &remaining); 2985 rtnh = rtnh_next(rtnh, &remaining);
2837 } 2986 }
2838 2987
@@ -2849,7 +2998,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2849 return err; 2998 return err;
2850 2999
2851 if (cfg.fc_mp) 3000 if (cfg.fc_mp)
2852 return ip6_route_multipath(&cfg, 0); 3001 return ip6_route_multipath_del(&cfg);
2853 else 3002 else
2854 return ip6_route_del(&cfg); 3003 return ip6_route_del(&cfg);
2855} 3004}
@@ -2864,7 +3013,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2864 return err; 3013 return err;
2865 3014
2866 if (cfg.fc_mp) 3015 if (cfg.fc_mp)
2867 return ip6_route_multipath(&cfg, 1); 3016 return ip6_route_multipath_add(&cfg);
2868 else 3017 else
2869 return ip6_route_add(&cfg); 3018 return ip6_route_add(&cfg);
2870} 3019}