aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIdo Schimmel <idosch@mellanox.com>2018-01-09 09:40:25 -0500
committerDavid S. Miller <davem@davemloft.net>2018-01-10 15:14:44 -0500
commitd7dedee184e775f77d321cfa1c660a7680cf6588 (patch)
tree7d59d50b661562ccf2cfad936be59b37f7f69b99
parente2b3b35eb9896f26c98b9a2c047d9111638059a2 (diff)
ipv6: Calculate hash thresholds for IPv6 nexthops
Before we convert IPv6 to use hash-threshold instead of modulo-N, we first need each nexthop to store its region boundary in the hash function's output space. The boundary is calculated by dividing the output space equally between the different active nexthops. That is, nexthops that are not dead or linkdown. The boundaries are rebalanced whenever a nexthop is added or removed to a multipath route and whenever a nexthop becomes active or inactive. Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip6_fib.h1
-rw-r--r--include/net/ip6_route.h7
-rw-r--r--net/ipv6/ip6_fib.c8
-rw-r--r--net/ipv6/route.c96
4 files changed, 106 insertions, 6 deletions
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index ddf53dd1e948..97cd05d87780 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -149,6 +149,7 @@ struct rt6_info {
149 */ 149 */
150 struct list_head rt6i_siblings; 150 struct list_head rt6i_siblings;
151 unsigned int rt6i_nsiblings; 151 unsigned int rt6i_nsiblings;
152 atomic_t rt6i_nh_upper_bound;
152 153
153 atomic_t rt6i_ref; 154 atomic_t rt6i_ref;
154 155
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 34cd3b0c6ded..27d23a65f3cd 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
66 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 66 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
67} 67}
68 68
69static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
70{
71 return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
72 RTF_GATEWAY;
73}
74
69void ip6_route_input(struct sk_buff *skb); 75void ip6_route_input(struct sk_buff *skb);
70struct dst_entry *ip6_route_input_lookup(struct net *net, 76struct dst_entry *ip6_route_input_lookup(struct net *net,
71 struct net_device *dev, 77 struct net_device *dev,
@@ -171,6 +177,7 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
171void rt6_sync_up(struct net_device *dev, unsigned int nh_flags); 177void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
172void rt6_disable_ip(struct net_device *dev, unsigned long event); 178void rt6_disable_ip(struct net_device *dev, unsigned long event);
173void rt6_sync_down_dev(struct net_device *dev, unsigned long event); 179void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
180void rt6_multipath_rebalance(struct rt6_info *rt);
174 181
175static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) 182static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
176{ 183{
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b5f19703fca6..e31118f417b4 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -796,12 +796,6 @@ insert_above:
796 return ln; 796 return ln;
797} 797}
798 798
799static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
800{
801 return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
802 RTF_GATEWAY;
803}
804
805static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) 799static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
806{ 800{
807 int i; 801 int i;
@@ -991,6 +985,7 @@ next_iter:
991 rt6i_nsiblings++; 985 rt6i_nsiblings++;
992 } 986 }
993 BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); 987 BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
988 rt6_multipath_rebalance(temp_sibling);
994 } 989 }
995 990
996 /* 991 /*
@@ -1672,6 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1672 sibling->rt6i_nsiblings--; 1667 sibling->rt6i_nsiblings--;
1673 rt->rt6i_nsiblings = 0; 1668 rt->rt6i_nsiblings = 0;
1674 list_del_init(&rt->rt6i_siblings); 1669 list_del_init(&rt->rt6i_siblings);
1670 rt6_multipath_rebalance(next_sibling);
1675 } 1671 }
1676 1672
1677 /* Adjust walkers */ 1673 /* Adjust walkers */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1054b059747f..ced2c9bed10b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3481,6 +3481,99 @@ struct arg_netdev_event {
3481 }; 3481 };
3482}; 3482};
3483 3483
3484static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3485{
3486 struct rt6_info *iter;
3487 struct fib6_node *fn;
3488
3489 fn = rcu_dereference_protected(rt->rt6i_node,
3490 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3491 iter = rcu_dereference_protected(fn->leaf,
3492 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3493 while (iter) {
3494 if (iter->rt6i_metric == rt->rt6i_metric &&
3495 rt6_qualify_for_ecmp(iter))
3496 return iter;
3497 iter = rcu_dereference_protected(iter->rt6_next,
3498 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3499 }
3500
3501 return NULL;
3502}
3503
3504static bool rt6_is_dead(const struct rt6_info *rt)
3505{
3506 if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
3507 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
3508 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3509 return true;
3510
3511 return false;
3512}
3513
3514static int rt6_multipath_total_weight(const struct rt6_info *rt)
3515{
3516 struct rt6_info *iter;
3517 int total = 0;
3518
3519 if (!rt6_is_dead(rt))
3520 total++;
3521
3522 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3523 if (!rt6_is_dead(iter))
3524 total++;
3525 }
3526
3527 return total;
3528}
3529
3530static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3531{
3532 int upper_bound = -1;
3533
3534 if (!rt6_is_dead(rt)) {
3535 (*weight)++;
3536 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3537 total) - 1;
3538 }
3539 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
3540}
3541
3542static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3543{
3544 struct rt6_info *iter;
3545 int weight = 0;
3546
3547 rt6_upper_bound_set(rt, &weight, total);
3548
3549 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3550 rt6_upper_bound_set(iter, &weight, total);
3551}
3552
3553void rt6_multipath_rebalance(struct rt6_info *rt)
3554{
3555 struct rt6_info *first;
3556 int total;
3557
3558 /* In case the entire multipath route was marked for flushing,
3559 * then there is no need to rebalance upon the removal of every
3560 * sibling route.
3561 */
3562 if (!rt->rt6i_nsiblings || rt->should_flush)
3563 return;
3564
3565 /* During lookup routes are evaluated in order, so we need to
3566 * make sure upper bounds are assigned from the first sibling
3567 * onwards.
3568 */
3569 first = rt6_multipath_first_sibling(rt);
3570 if (WARN_ON_ONCE(!first))
3571 return;
3572
3573 total = rt6_multipath_total_weight(first);
3574 rt6_multipath_upper_bound_set(first, total);
3575}
3576
3484static int fib6_ifup(struct rt6_info *rt, void *p_arg) 3577static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3485{ 3578{
3486 const struct arg_netdev_event *arg = p_arg; 3579 const struct arg_netdev_event *arg = p_arg;
@@ -3489,6 +3582,7 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3489 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { 3582 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
3490 rt->rt6i_nh_flags &= ~arg->nh_flags; 3583 rt->rt6i_nh_flags &= ~arg->nh_flags;
3491 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); 3584 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
3585 rt6_multipath_rebalance(rt);
3492 } 3586 }
3493 3587
3494 return 0; 3588 return 0;
@@ -3588,6 +3682,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
3588 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | 3682 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3589 RTNH_F_LINKDOWN); 3683 RTNH_F_LINKDOWN);
3590 fib6_update_sernum(rt); 3684 fib6_update_sernum(rt);
3685 rt6_multipath_rebalance(rt);
3591 } 3686 }
3592 return -2; 3687 return -2;
3593 case NETDEV_CHANGE: 3688 case NETDEV_CHANGE:
@@ -3595,6 +3690,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
3595 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) 3690 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
3596 break; 3691 break;
3597 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; 3692 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
3693 rt6_multipath_rebalance(rt);
3598 break; 3694 break;
3599 } 3695 }
3600 3696