aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-01-07 21:29:41 -0500
committerDavid S. Miller <davem@davemloft.net>2018-01-07 21:29:41 -0500
commitf66faae2f80a45feafc04ce63ef744ac4b6e8c05 (patch)
treec7e921c94f3e0522bd99a79d156f02f39fd07e49
parent7f0b800048b562d716372466ea8d9de648c422dd (diff)
parent82e45b6fd29246f36ff8064e74d412c11feaab23 (diff)
Merge branch 'ipv6-ipv4-nexthop-align'
Ido Schimmel says: ==================== ipv6: Align nexthop behaviour with IPv4 This set tries to eliminate some differences between IPv4's and IPv6's treatment of nexthops. These differences are most likely a side effect of IPv6's data structures (specifically 'rt6_info') that incorporate both the route and the nexthop and the late addition of ECMP support in commit 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)"). IPv4 and IPv6 do not react the same to certain netdev events. For example, upon carrier change affected IPv4 nexthops are marked using the RTNH_F_LINKDOWN flag and the nexthop group is rebalanced accordingly. IPv6 on the other hand, does nothing which forces us to perform a carrier check during route lookup and dump. This makes it difficult to introduce features such as non-equal-cost multipath that are built on top of this set [1]. In addition, when a netdev is put administratively down IPv4 nexthops are marked using the RTNH_F_DEAD flag, whereas IPv6 simply flushes all the routes using these nexthops. To be consistent with IPv4, multipath routes should only be flushed when all nexthops in the group are considered dead. The first 12 patches introduce non-functional changes that store the RTNH_F_DEAD and RTNH_F_LINKDOWN flags in IPv6 routes based on netdev events, in a similar fashion to IPv4. This allows us to remove the carrier check performed during route lookup and dump. The next three patches make sure we only flush a multipath route when all of its nexthops are dead. Last three patches add test cases for IPv4/IPv6 FIB. These verify that both address families react similarly to netdev events. Finally, this series also serves as a good first step towards David Ahern's goal of treating nexthops as standalone objects [2], as it makes the code more in line with IPv4 where the nexthop and the nexthop group are separate objects from the route itself. 1. https://github.com/idosch/linux/tree/ipv6-nexthops 2. http://vger.kernel.org/netconf2017_files/nexthop-objects.pdf Changes since RFC (feedback from David Ahern): * Remove redundant declaration of rt6_ifdown() in patch 4 and adjust comment referencing it accordingly * Drop patch to flush multipath routes upon NETDEV_UNREGISTER. Reword cover letter accordingly * Use a temporary variable to make code more readable in patch 15 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip6_fib.h4
-rw-r--r--include/net/ip6_route.h4
-rw-r--r--net/ipv6/addrconf.c9
-rw-r--r--net/ipv6/ip6_fib.c28
-rw-r--r--net/ipv6/route.c185
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh429
7 files changed, 618 insertions, 42 deletions
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 44d96a91e745..ddf53dd1e948 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -173,7 +173,8 @@ struct rt6_info {
173 unsigned short rt6i_nfheader_len; 173 unsigned short rt6i_nfheader_len;
174 u8 rt6i_protocol; 174 u8 rt6i_protocol;
175 u8 exception_bucket_flushed:1, 175 u8 exception_bucket_flushed:1,
176 unused:7; 176 should_flush:1,
177 unused:6;
177}; 178};
178 179
179#define for_each_fib6_node_rt_rcu(fn) \ 180#define for_each_fib6_node_rt_rcu(fn) \
@@ -404,6 +405,7 @@ unsigned int fib6_tables_seq_read(struct net *net);
404int fib6_tables_dump(struct net *net, struct notifier_block *nb); 405int fib6_tables_dump(struct net *net, struct notifier_block *nb);
405 406
406void fib6_update_sernum(struct rt6_info *rt); 407void fib6_update_sernum(struct rt6_info *rt);
408void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt);
407 409
408#ifdef CONFIG_IPV6_MULTIPLE_TABLES 410#ifdef CONFIG_IPV6_MULTIPLE_TABLES
409int fib6_rules_init(void); 411int fib6_rules_init(void);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 18e442ea93d8..34cd3b0c6ded 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -165,10 +165,12 @@ struct rt6_rtnl_dump_arg {
165}; 165};
166 166
167int rt6_dump_route(struct rt6_info *rt, void *p_arg); 167int rt6_dump_route(struct rt6_info *rt, void *p_arg);
168void rt6_ifdown(struct net *net, struct net_device *dev);
169void rt6_mtu_change(struct net_device *dev, unsigned int mtu); 168void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
170void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); 169void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
171void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); 170void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
171void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
172void rt6_disable_ip(struct net_device *dev, unsigned long event);
173void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
172 174
173static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) 175static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
174{ 176{
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ed06b1190f05..2435f7ab070b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3438,6 +3438,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3438 } else if (event == NETDEV_CHANGE) { 3438 } else if (event == NETDEV_CHANGE) {
3439 if (!addrconf_link_ready(dev)) { 3439 if (!addrconf_link_ready(dev)) {
3440 /* device is still not ready. */ 3440 /* device is still not ready. */
3441 rt6_sync_down_dev(dev, event);
3441 break; 3442 break;
3442 } 3443 }
3443 3444
@@ -3449,6 +3450,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3449 * multicast snooping switches 3450 * multicast snooping switches
3450 */ 3451 */
3451 ipv6_mc_up(idev); 3452 ipv6_mc_up(idev);
3453 rt6_sync_up(dev, RTNH_F_LINKDOWN);
3452 break; 3454 break;
3453 } 3455 }
3454 idev->if_flags |= IF_READY; 3456 idev->if_flags |= IF_READY;
@@ -3484,6 +3486,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3484 if (run_pending) 3486 if (run_pending)
3485 addrconf_dad_run(idev); 3487 addrconf_dad_run(idev);
3486 3488
3489 /* Device has an address by now */
3490 rt6_sync_up(dev, RTNH_F_DEAD);
3491
3487 /* 3492 /*
3488 * If the MTU changed during the interface down, 3493 * If the MTU changed during the interface down,
3489 * when the interface up, the changed MTU must be 3494 * when the interface up, the changed MTU must be
@@ -3577,6 +3582,7 @@ static bool addr_is_local(const struct in6_addr *addr)
3577 3582
3578static int addrconf_ifdown(struct net_device *dev, int how) 3583static int addrconf_ifdown(struct net_device *dev, int how)
3579{ 3584{
3585 unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN;
3580 struct net *net = dev_net(dev); 3586 struct net *net = dev_net(dev);
3581 struct inet6_dev *idev; 3587 struct inet6_dev *idev;
3582 struct inet6_ifaddr *ifa, *tmp; 3588 struct inet6_ifaddr *ifa, *tmp;
@@ -3586,8 +3592,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3586 3592
3587 ASSERT_RTNL(); 3593 ASSERT_RTNL();
3588 3594
3589 rt6_ifdown(net, dev); 3595 rt6_disable_ip(dev, event);
3590 neigh_ifdown(&nd_tbl, dev);
3591 3596
3592 idev = __in6_dev_get(dev); 3597 idev = __in6_dev_get(dev);
3593 if (!idev) 3598 if (!idev)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index a64d559fa513..edda5ad3b405 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -107,16 +107,13 @@ enum {
107 107
108void fib6_update_sernum(struct rt6_info *rt) 108void fib6_update_sernum(struct rt6_info *rt)
109{ 109{
110 struct fib6_table *table = rt->rt6i_table;
111 struct net *net = dev_net(rt->dst.dev); 110 struct net *net = dev_net(rt->dst.dev);
112 struct fib6_node *fn; 111 struct fib6_node *fn;
113 112
114 spin_lock_bh(&table->tb6_lock);
115 fn = rcu_dereference_protected(rt->rt6i_node, 113 fn = rcu_dereference_protected(rt->rt6i_node,
116 lockdep_is_held(&table->tb6_lock)); 114 lockdep_is_held(&rt->rt6i_table->tb6_lock));
117 if (fn) 115 if (fn)
118 fn->fn_sernum = fib6_new_sernum(net); 116 fn->fn_sernum = fib6_new_sernum(net);
119 spin_unlock_bh(&table->tb6_lock);
120} 117}
121 118
122/* 119/*
@@ -1102,8 +1099,8 @@ void fib6_force_start_gc(struct net *net)
1102 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); 1099 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1103} 1100}
1104 1101
1105static void fib6_update_sernum_upto_root(struct rt6_info *rt, 1102static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
1106 int sernum) 1103 int sernum)
1107{ 1104{
1108 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, 1105 struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
1109 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 1106 lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1117,6 +1114,11 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt,
1117 } 1114 }
1118} 1115}
1119 1116
1117void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
1118{
1119 __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
1120}
1121
1120/* 1122/*
1121 * Add routing information to the routing tree. 1123 * Add routing information to the routing tree.
1122 * <destination addr>/<source addr> 1124 * <destination addr>/<source addr>
@@ -1230,7 +1232,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1230 1232
1231 err = fib6_add_rt2node(fn, rt, info, mxc, extack); 1233 err = fib6_add_rt2node(fn, rt, info, mxc, extack);
1232 if (!err) { 1234 if (!err) {
1233 fib6_update_sernum_upto_root(rt, sernum); 1235 __fib6_update_sernum_upto_root(rt, sernum);
1234 fib6_start_gc(info->nl_net, rt); 1236 fib6_start_gc(info->nl_net, rt);
1235 } 1237 }
1236 1238
@@ -1887,7 +1889,7 @@ static int fib6_clean_node(struct fib6_walker *w)
1887 1889
1888 for_each_fib6_walker_rt(w) { 1890 for_each_fib6_walker_rt(w) {
1889 res = c->func(rt, c->arg); 1891 res = c->func(rt, c->arg);
1890 if (res < 0) { 1892 if (res == -1) {
1891 w->leaf = rt; 1893 w->leaf = rt;
1892 res = fib6_del(rt, &info); 1894 res = fib6_del(rt, &info);
1893 if (res) { 1895 if (res) {
@@ -1900,6 +1902,12 @@ static int fib6_clean_node(struct fib6_walker *w)
1900 continue; 1902 continue;
1901 } 1903 }
1902 return 0; 1904 return 0;
1905 } else if (res == -2) {
1906 if (WARN_ON(!rt->rt6i_nsiblings))
1907 continue;
1908 rt = list_last_entry(&rt->rt6i_siblings,
1909 struct rt6_info, rt6i_siblings);
1910 continue;
1903 } 1911 }
1904 WARN_ON(res != 0); 1912 WARN_ON(res != 0);
1905 } 1913 }
@@ -1911,7 +1919,8 @@ static int fib6_clean_node(struct fib6_walker *w)
1911 * Convenient frontend to tree walker. 1919 * Convenient frontend to tree walker.
1912 * 1920 *
1913 * func is called on each route. 1921 * func is called on each route.
1914 * It may return -1 -> delete this route. 1922 * It may return -2 -> skip multipath route.
1923 * -1 -> delete this route.
1915 * 0 -> continue walking 1924 * 0 -> continue walking
1916 */ 1925 */
1917 1926
@@ -2103,7 +2112,6 @@ static void fib6_net_exit(struct net *net)
2103{ 2112{
2104 unsigned int i; 2113 unsigned int i;
2105 2114
2106 rt6_ifdown(net, NULL);
2107 del_timer_sync(&net->ipv6.ip6_fib_timer); 2115 del_timer_sync(&net->ipv6.ip6_fib_timer);
2108 2116
2109 for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { 2117 for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2490280b3394..1054b059747f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -474,7 +474,9 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
474 if (route_choosen == 0) { 474 if (route_choosen == 0) {
475 struct inet6_dev *idev = sibling->rt6i_idev; 475 struct inet6_dev *idev = sibling->rt6i_idev;
476 476
477 if (!netif_carrier_ok(sibling->dst.dev) && 477 if (sibling->rt6i_nh_flags & RTNH_F_DEAD)
478 break;
479 if (sibling->rt6i_nh_flags & RTNH_F_LINKDOWN &&
478 idev->cnf.ignore_routes_with_linkdown) 480 idev->cnf.ignore_routes_with_linkdown)
479 break; 481 break;
480 if (rt6_score_route(sibling, oif, strict) < 0) 482 if (rt6_score_route(sibling, oif, strict) < 0)
@@ -499,12 +501,15 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
499 struct rt6_info *local = NULL; 501 struct rt6_info *local = NULL;
500 struct rt6_info *sprt; 502 struct rt6_info *sprt;
501 503
502 if (!oif && ipv6_addr_any(saddr)) 504 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
503 goto out; 505 return rt;
504 506
505 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { 507 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
506 struct net_device *dev = sprt->dst.dev; 508 struct net_device *dev = sprt->dst.dev;
507 509
510 if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
511 continue;
512
508 if (oif) { 513 if (oif) {
509 if (dev->ifindex == oif) 514 if (dev->ifindex == oif)
510 return sprt; 515 return sprt;
@@ -533,8 +538,8 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
533 if (flags & RT6_LOOKUP_F_IFACE) 538 if (flags & RT6_LOOKUP_F_IFACE)
534 return net->ipv6.ip6_null_entry; 539 return net->ipv6.ip6_null_entry;
535 } 540 }
536out: 541
537 return rt; 542 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
538} 543}
539 544
540#ifdef CONFIG_IPV6_ROUTER_PREF 545#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -679,10 +684,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
679 int m; 684 int m;
680 bool match_do_rr = false; 685 bool match_do_rr = false;
681 struct inet6_dev *idev = rt->rt6i_idev; 686 struct inet6_dev *idev = rt->rt6i_idev;
682 struct net_device *dev = rt->dst.dev;
683 687
684 if (dev && !netif_carrier_ok(dev) && 688 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
685 idev->cnf.ignore_routes_with_linkdown && 689 goto out;
690
691 if (idev->cnf.ignore_routes_with_linkdown &&
692 rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
686 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) 693 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
687 goto out; 694 goto out;
688 695
@@ -1346,7 +1353,9 @@ out:
1346 1353
1347 /* Update fn->fn_sernum to invalidate all cached dst */ 1354 /* Update fn->fn_sernum to invalidate all cached dst */
1348 if (!err) { 1355 if (!err) {
1356 spin_lock_bh(&ort->rt6i_table->tb6_lock);
1349 fib6_update_sernum(ort); 1357 fib6_update_sernum(ort);
1358 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
1350 fib6_force_start_gc(net); 1359 fib6_force_start_gc(net);
1351 } 1360 }
1352 1361
@@ -2154,6 +2163,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
2154 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 2163 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2155restart: 2164restart:
2156 for_each_fib6_node_rt_rcu(fn) { 2165 for_each_fib6_node_rt_rcu(fn) {
2166 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
2167 continue;
2157 if (rt6_check_expired(rt)) 2168 if (rt6_check_expired(rt))
2158 continue; 2169 continue;
2159 if (rt->dst.error) 2170 if (rt->dst.error)
@@ -2344,7 +2355,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2344 rt->rt6i_idev = idev; 2355 rt->rt6i_idev = idev;
2345 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 2356 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2346 2357
2347 /* Add this dst into uncached_list so that rt6_ifdown() can 2358 /* Add this dst into uncached_list so that rt6_disable_ip() can
2348 * do proper release of the net_device 2359 * do proper release of the net_device
2349 */ 2360 */
2350 rt6_uncached_list_add(rt); 2361 rt6_uncached_list_add(rt);
@@ -2746,6 +2757,9 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2746 rt->rt6i_flags = cfg->fc_flags; 2757 rt->rt6i_flags = cfg->fc_flags;
2747 2758
2748install_route: 2759install_route:
2760 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2761 !netif_carrier_ok(dev))
2762 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
2749 rt->dst.dev = dev; 2763 rt->dst.dev = dev;
2750 rt->rt6i_idev = idev; 2764 rt->rt6i_idev = idev;
2751 rt->rt6i_table = table; 2765 rt->rt6i_table = table;
@@ -3459,37 +3473,149 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3459 fib6_clean_all(net, fib6_clean_tohost, gateway); 3473 fib6_clean_all(net, fib6_clean_tohost, gateway);
3460} 3474}
3461 3475
3462struct arg_dev_net { 3476struct arg_netdev_event {
3463 struct net_device *dev; 3477 const struct net_device *dev;
3464 struct net *net; 3478 union {
3479 unsigned int nh_flags;
3480 unsigned long event;
3481 };
3465}; 3482};
3466 3483
3484static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3485{
3486 const struct arg_netdev_event *arg = p_arg;
3487 const struct net *net = dev_net(arg->dev);
3488
3489 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
3490 rt->rt6i_nh_flags &= ~arg->nh_flags;
3491 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
3492 }
3493
3494 return 0;
3495}
3496
3497void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3498{
3499 struct arg_netdev_event arg = {
3500 .dev = dev,
3501 .nh_flags = nh_flags,
3502 };
3503
3504 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3505 arg.nh_flags |= RTNH_F_LINKDOWN;
3506
3507 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3508}
3509
3510static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3511 const struct net_device *dev)
3512{
3513 struct rt6_info *iter;
3514
3515 if (rt->dst.dev == dev)
3516 return true;
3517 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3518 if (iter->dst.dev == dev)
3519 return true;
3520
3521 return false;
3522}
3523
3524static void rt6_multipath_flush(struct rt6_info *rt)
3525{
3526 struct rt6_info *iter;
3527
3528 rt->should_flush = 1;
3529 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3530 iter->should_flush = 1;
3531}
3532
3533static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3534 const struct net_device *down_dev)
3535{
3536 struct rt6_info *iter;
3537 unsigned int dead = 0;
3538
3539 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
3540 dead++;
3541 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3542 if (iter->dst.dev == down_dev ||
3543 iter->rt6i_nh_flags & RTNH_F_DEAD)
3544 dead++;
3545
3546 return dead;
3547}
3548
3549static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3550 const struct net_device *dev,
3551 unsigned int nh_flags)
3552{
3553 struct rt6_info *iter;
3554
3555 if (rt->dst.dev == dev)
3556 rt->rt6i_nh_flags |= nh_flags;
3557 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3558 if (iter->dst.dev == dev)
3559 iter->rt6i_nh_flags |= nh_flags;
3560}
3561
3467/* called with write lock held for table with rt */ 3562/* called with write lock held for table with rt */
3468static int fib6_ifdown(struct rt6_info *rt, void *arg) 3563static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
3469{ 3564{
3470 const struct arg_dev_net *adn = arg; 3565 const struct arg_netdev_event *arg = p_arg;
3471 const struct net_device *dev = adn->dev; 3566 const struct net_device *dev = arg->dev;
3567 const struct net *net = dev_net(dev);
3472 3568
3473 if ((rt->dst.dev == dev || !dev) && 3569 if (rt == net->ipv6.ip6_null_entry)
3474 rt != adn->net->ipv6.ip6_null_entry && 3570 return 0;
3475 (rt->rt6i_nsiblings == 0 || 3571
3476 (dev && netdev_unregistering(dev)) || 3572 switch (arg->event) {
3477 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) 3573 case NETDEV_UNREGISTER:
3478 return -1; 3574 return rt->dst.dev == dev ? -1 : 0;
3575 case NETDEV_DOWN:
3576 if (rt->should_flush)
3577 return -1;
3578 if (!rt->rt6i_nsiblings)
3579 return rt->dst.dev == dev ? -1 : 0;
3580 if (rt6_multipath_uses_dev(rt, dev)) {
3581 unsigned int count;
3582
3583 count = rt6_multipath_dead_count(rt, dev);
3584 if (rt->rt6i_nsiblings + 1 == count) {
3585 rt6_multipath_flush(rt);
3586 return -1;
3587 }
3588 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3589 RTNH_F_LINKDOWN);
3590 fib6_update_sernum(rt);
3591 }
3592 return -2;
3593 case NETDEV_CHANGE:
3594 if (rt->dst.dev != dev ||
3595 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
3596 break;
3597 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
3598 break;
3599 }
3479 3600
3480 return 0; 3601 return 0;
3481} 3602}
3482 3603
3483void rt6_ifdown(struct net *net, struct net_device *dev) 3604void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
3484{ 3605{
3485 struct arg_dev_net adn = { 3606 struct arg_netdev_event arg = {
3486 .dev = dev, 3607 .dev = dev,
3487 .net = net, 3608 .event = event,
3488 }; 3609 };
3489 3610
3490 fib6_clean_all(net, fib6_ifdown, &adn); 3611 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3491 if (dev) 3612}
3492 rt6_uncached_list_flush_dev(net, dev); 3613
3614void rt6_disable_ip(struct net_device *dev, unsigned long event)
3615{
3616 rt6_sync_down_dev(dev, event);
3617 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3618 neigh_ifdown(&nd_tbl, dev);
3493} 3619}
3494 3620
3495struct rt6_mtu_change_arg { 3621struct rt6_mtu_change_arg {
@@ -3992,7 +4118,10 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
3992static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, 4118static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3993 unsigned int *flags, bool skip_oif) 4119 unsigned int *flags, bool skip_oif)
3994{ 4120{
3995 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) { 4121 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
4122 *flags |= RTNH_F_DEAD;
4123
4124 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
3996 *flags |= RTNH_F_LINKDOWN; 4125 *flags |= RTNH_F_LINKDOWN;
3997 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) 4126 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3998 *flags |= RTNH_F_DEAD; 4127 *flags |= RTNH_F_DEAD;
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 500c74db746c..d7c30d366935 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,6 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
5CFLAGS += -I../../../../usr/include/ 5CFLAGS += -I../../../../usr/include/
6 6
7TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh 7TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
8TEST_PROGS += fib_tests.sh
8TEST_GEN_FILES = socket 9TEST_GEN_FILES = socket
9TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy 10TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
10TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa 11TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
new file mode 100755
index 000000000000..a9154eefb2e2
--- /dev/null
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -0,0 +1,429 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test is for checking IPv4 and IPv6 FIB behavior in response to
5# different events.
6
7ret=0
8
9check_err()
10{
11 if [ $ret -eq 0 ]; then
12 ret=$1
13 fi
14}
15
16check_fail()
17{
18 if [ $1 -eq 0 ]; then
19 ret=1
20 fi
21}
22
23netns_create()
24{
25 local testns=$1
26
27 ip netns add $testns
28 ip netns exec $testns ip link set dev lo up
29}
30
31fib_unreg_unicast_test()
32{
33 ret=0
34
35 netns_create "testns"
36
37 ip netns exec testns ip link add dummy0 type dummy
38 ip netns exec testns ip link set dev dummy0 up
39
40 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
41 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
42
43 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
44 check_err $?
45 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
46 check_err $?
47
48 ip netns exec testns ip link del dev dummy0
49 check_err $?
50
51 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
52 check_fail $?
53 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
54 check_fail $?
55
56 ip netns del testns
57
58 if [ $ret -ne 0 ]; then
59 echo "FAIL: unicast route test"
60 return 1
61 fi
62 echo "PASS: unicast route test"
63}
64
65fib_unreg_multipath_test()
66{
67 ret=0
68
69 netns_create "testns"
70
71 ip netns exec testns ip link add dummy0 type dummy
72 ip netns exec testns ip link set dev dummy0 up
73
74 ip netns exec testns ip link add dummy1 type dummy
75 ip netns exec testns ip link set dev dummy1 up
76
77 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
78 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
79
80 ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
81 ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
82
83 ip netns exec testns ip route add 203.0.113.0/24 \
84 nexthop via 198.51.100.2 dev dummy0 \
85 nexthop via 192.0.2.2 dev dummy1
86 ip netns exec testns ip -6 route add 2001:db8:3::/64 \
87 nexthop via 2001:db8:1::2 dev dummy0 \
88 nexthop via 2001:db8:2::2 dev dummy1
89
90 ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
91 check_err $?
92 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
93 check_err $?
94
95 ip netns exec testns ip link del dev dummy0
96 check_err $?
97
98 ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
99 check_fail $?
100 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
101 # In IPv6 we do not flush the entire multipath route.
102 check_err $?
103
104 ip netns exec testns ip link del dev dummy1
105
106 ip netns del testns
107
108 if [ $ret -ne 0 ]; then
109 echo "FAIL: multipath route test"
110 return 1
111 fi
112 echo "PASS: multipath route test"
113}
114
115fib_unreg_test()
116{
117 echo "Running netdev unregister tests"
118
119 fib_unreg_unicast_test
120 fib_unreg_multipath_test
121}
122
123fib_down_unicast_test()
124{
125 ret=0
126
127 netns_create "testns"
128
129 ip netns exec testns ip link add dummy0 type dummy
130 ip netns exec testns ip link set dev dummy0 up
131
132 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
133 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
134
135 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
136 check_err $?
137 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
138 check_err $?
139
140 ip netns exec testns ip link set dev dummy0 down
141 check_err $?
142
143 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
144 check_fail $?
145 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
146 check_fail $?
147
148 ip netns exec testns ip link del dev dummy0
149
150 ip netns del testns
151
152 if [ $ret -ne 0 ]; then
153 echo "FAIL: unicast route test"
154 return 1
155 fi
156 echo "PASS: unicast route test"
157}
158
159fib_down_multipath_test_do()
160{
161 local down_dev=$1
162 local up_dev=$2
163
164 ip netns exec testns ip route get fibmatch 203.0.113.1 \
165 oif $down_dev &> /dev/null
166 check_fail $?
167 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
168 oif $down_dev &> /dev/null
169 check_fail $?
170
171 ip netns exec testns ip route get fibmatch 203.0.113.1 \
172 oif $up_dev &> /dev/null
173 check_err $?
174 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
175 oif $up_dev &> /dev/null
176 check_err $?
177
178 ip netns exec testns ip route get fibmatch 203.0.113.1 | \
179 grep $down_dev | grep -q "dead linkdown"
180 check_err $?
181 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
182 grep $down_dev | grep -q "dead linkdown"
183 check_err $?
184
185 ip netns exec testns ip route get fibmatch 203.0.113.1 | \
186 grep $up_dev | grep -q "dead linkdown"
187 check_fail $?
188 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
189 grep $up_dev | grep -q "dead linkdown"
190 check_fail $?
191}
192
193fib_down_multipath_test()
194{
195 ret=0
196
197 netns_create "testns"
198
199 ip netns exec testns ip link add dummy0 type dummy
200 ip netns exec testns ip link set dev dummy0 up
201
202 ip netns exec testns ip link add dummy1 type dummy
203 ip netns exec testns ip link set dev dummy1 up
204
205 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
206 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
207
208 ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
209 ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
210
211 ip netns exec testns ip route add 203.0.113.0/24 \
212 nexthop via 198.51.100.2 dev dummy0 \
213 nexthop via 192.0.2.2 dev dummy1
214 ip netns exec testns ip -6 route add 2001:db8:3::/64 \
215 nexthop via 2001:db8:1::2 dev dummy0 \
216 nexthop via 2001:db8:2::2 dev dummy1
217
218 ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
219 check_err $?
220 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
221 check_err $?
222
223 ip netns exec testns ip link set dev dummy0 down
224 check_err $?
225
226 fib_down_multipath_test_do "dummy0" "dummy1"
227
228 ip netns exec testns ip link set dev dummy0 up
229 check_err $?
230 ip netns exec testns ip link set dev dummy1 down
231 check_err $?
232
233 fib_down_multipath_test_do "dummy1" "dummy0"
234
235 ip netns exec testns ip link set dev dummy0 down
236 check_err $?
237
238 ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
239 check_fail $?
240 ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
241 check_fail $?
242
243 ip netns exec testns ip link del dev dummy1
244 ip netns exec testns ip link del dev dummy0
245
246 ip netns del testns
247
248 if [ $ret -ne 0 ]; then
249 echo "FAIL: multipath route test"
250 return 1
251 fi
252 echo "PASS: multipath route test"
253}
254
255fib_down_test()
256{
257 echo "Running netdev down tests"
258
259 fib_down_unicast_test
260 fib_down_multipath_test
261}
262
263fib_carrier_local_test()
264{
265 ret=0
266
267 # Local routes should not be affected when carrier changes.
268 netns_create "testns"
269
270 ip netns exec testns ip link add dummy0 type dummy
271 ip netns exec testns ip link set dev dummy0 up
272
273 ip netns exec testns ip link set dev dummy0 carrier on
274
275 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
276 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
277
278 ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
279 check_err $?
280 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
281 check_err $?
282
283 ip netns exec testns ip route get fibmatch 198.51.100.1 | \
284 grep -q "linkdown"
285 check_fail $?
286 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
287 grep -q "linkdown"
288 check_fail $?
289
290 ip netns exec testns ip link set dev dummy0 carrier off
291
292 ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
293 check_err $?
294 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
295 check_err $?
296
297 ip netns exec testns ip route get fibmatch 198.51.100.1 | \
298 grep -q "linkdown"
299 check_fail $?
300 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
301 grep -q "linkdown"
302 check_fail $?
303
304 ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
305 ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
306
307 ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null
308 check_err $?
309 ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null
310 check_err $?
311
312 ip netns exec testns ip route get fibmatch 192.0.2.1 | \
313 grep -q "linkdown"
314 check_fail $?
315 ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \
316 grep -q "linkdown"
317 check_fail $?
318
319 ip netns exec testns ip link del dev dummy0
320
321 ip netns del testns
322
323 if [ $ret -ne 0 ]; then
324 echo "FAIL: local route carrier test"
325 return 1
326 fi
327 echo "PASS: local route carrier test"
328}
329
330fib_carrier_unicast_test()
331{
332 ret=0
333
334 netns_create "testns"
335
336 ip netns exec testns ip link add dummy0 type dummy
337 ip netns exec testns ip link set dev dummy0 up
338
339 ip netns exec testns ip link set dev dummy0 carrier on
340
341 ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
342 ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
343
344 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
345 check_err $?
346 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
347 check_err $?
348
349 ip netns exec testns ip route get fibmatch 198.51.100.2 | \
350 grep -q "linkdown"
351 check_fail $?
352 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
353 grep -q "linkdown"
354 check_fail $?
355
356 ip netns exec testns ip link set dev dummy0 carrier off
357
358 ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
359 check_err $?
360 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
361 check_err $?
362
363 ip netns exec testns ip route get fibmatch 198.51.100.2 | \
364 grep -q "linkdown"
365 check_err $?
366 ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
367 grep -q "linkdown"
368 check_err $?
369
370 ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
371 ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
372
373 ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null
374 check_err $?
375 ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null
376 check_err $?
377
378 ip netns exec testns ip route get fibmatch 192.0.2.2 | \
379 grep -q "linkdown"
380 check_err $?
381 ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \
382 grep -q "linkdown"
383 check_err $?
384
385 ip netns exec testns ip link del dev dummy0
386
387 ip netns del testns
388
389 if [ $ret -ne 0 ]; then
390 echo "FAIL: unicast route carrier test"
391 return 1
392 fi
393 echo "PASS: unicast route carrier test"
394}
395
396fib_carrier_test()
397{
398 echo "Running netdev carrier change tests"
399
400 fib_carrier_local_test
401 fib_carrier_unicast_test
402}
403
404fib_test()
405{
406 fib_unreg_test
407 fib_down_test
408 fib_carrier_test
409}
410
411if [ "$(id -u)" -ne 0 ];then
412 echo "SKIP: Need root privileges"
413 exit 0
414fi
415
416if [ ! -x "$(command -v ip)" ]; then
417 echo "SKIP: Could not run test without ip tool"
418 exit 0
419fi
420
421ip route help 2>&1 | grep -q fibmatch
422if [ $? -ne 0 ]; then
423 echo "SKIP: iproute2 too old, missing fibmatch"
424 exit 0
425fi
426
427fib_test
428
429exit $ret