aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorWei Wang <weiwan@google.com>2019-05-16 16:30:54 -0400
committerDavid S. Miller <davem@davemloft.net>2019-05-16 17:30:53 -0400
commit510e2ceda031eed97a7a0f9aad65d271a58b460d (patch)
treebd45f013d94d901034ec055e63cf83fd8427e89f /net/ipv6
parent9a6c8bf91b6025b1bb95d4fb454a16019ad22fc4 (diff)
ipv6: fix src addr routing with the exception table
When inserting route cache into the exception table, the key is generated with both src_addr and dest_addr with src addr routing. However, current logic always assumes the src_addr used to generate the key is a /128 host address. This is not true in the following scenarios: 1. When the route is a gateway route or does not have next hop. (rt6_is_gw_or_nonexthop() == false) 2. When calling ip6_rt_cache_alloc(), saddr is passed in as NULL. This means, when looking for a route cache in the exception table, we have to do the lookup twice: first time with the passed in /128 host address, second time with the src_addr stored in fib6_info. This solves the pmtu discovery issue reported by Mikael Magnusson where a route cache with a lower mtu info is created for a gateway route with src addr. However, the lookup code is not able to find this route cache. Fixes: 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache") Reported-by: Mikael Magnusson <mikael.kernel@lists.m7n.se> Bisected-by: David Ahern <dsahern@gmail.com> Signed-off-by: Wei Wang <weiwan@google.com> Cc: Martin Lau <kafai@fb.com> Cc: Eric Dumazet <edumazet@google.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/route.c51
1 files changed, 27 insertions, 24 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 27c0cc5d9d30..7a014ca877ed 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -111,8 +111,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
111 int iif, int type, u32 portid, u32 seq, 111 int iif, int type, u32 portid, u32 seq,
112 unsigned int flags); 112 unsigned int flags);
113static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, 113static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
114 struct in6_addr *daddr, 114 const struct in6_addr *daddr,
115 struct in6_addr *saddr); 115 const struct in6_addr *saddr);
116 116
117#ifdef CONFIG_IPV6_ROUTE_INFO 117#ifdef CONFIG_IPV6_ROUTE_INFO
118static struct fib6_info *rt6_add_route_info(struct net *net, 118static struct fib6_info *rt6_add_route_info(struct net *net,
@@ -1573,31 +1573,44 @@ out:
1573 * Caller has to hold rcu_read_lock() 1573 * Caller has to hold rcu_read_lock()
1574 */ 1574 */
1575static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, 1575static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
1576 struct in6_addr *daddr, 1576 const struct in6_addr *daddr,
1577 struct in6_addr *saddr) 1577 const struct in6_addr *saddr)
1578{ 1578{
1579 const struct in6_addr *src_key = NULL;
1579 struct rt6_exception_bucket *bucket; 1580 struct rt6_exception_bucket *bucket;
1580 struct in6_addr *src_key = NULL;
1581 struct rt6_exception *rt6_ex; 1581 struct rt6_exception *rt6_ex;
1582 struct rt6_info *ret = NULL; 1582 struct rt6_info *ret = NULL;
1583 1583
1584 bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
1585
1586#ifdef CONFIG_IPV6_SUBTREES 1584#ifdef CONFIG_IPV6_SUBTREES
1587 /* fib6i_src.plen != 0 indicates f6i is in subtree 1585 /* fib6i_src.plen != 0 indicates f6i is in subtree
1588 * and exception table is indexed by a hash of 1586 * and exception table is indexed by a hash of
1589 * both fib6_dst and fib6_src. 1587 * both fib6_dst and fib6_src.
1590 * Otherwise, the exception table is indexed by 1588 * However, the src addr used to create the hash
1591 * a hash of only fib6_dst. 1589 * might not be exactly the passed in saddr which
1590 * is a /128 addr from the flow.
1591 * So we need to use f6i->fib6_src to redo lookup
1592 * if the passed in saddr does not find anything.
1593 * (See the logic in ip6_rt_cache_alloc() on how
1594 * rt->rt6i_src is updated.)
1592 */ 1595 */
1593 if (res->f6i->fib6_src.plen) 1596 if (res->f6i->fib6_src.plen)
1594 src_key = saddr; 1597 src_key = saddr;
1598find_ex:
1595#endif 1599#endif
1600 bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
1596 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); 1601 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1597 1602
1598 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) 1603 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1599 ret = rt6_ex->rt6i; 1604 ret = rt6_ex->rt6i;
1600 1605
1606#ifdef CONFIG_IPV6_SUBTREES
1607 /* Use fib6_src as src_key and redo lookup */
1608 if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1609 src_key = &res->f6i->fib6_src.addr;
1610 goto find_ex;
1611 }
1612#endif
1613
1601 return ret; 1614 return ret;
1602} 1615}
1603 1616
@@ -2672,12 +2685,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
2672 const struct in6_addr *daddr, 2685 const struct in6_addr *daddr,
2673 const struct in6_addr *saddr) 2686 const struct in6_addr *saddr)
2674{ 2687{
2675 struct rt6_exception_bucket *bucket;
2676 const struct fib6_nh *nh = res->nh; 2688 const struct fib6_nh *nh = res->nh;
2677 struct fib6_info *f6i = res->f6i; 2689 struct fib6_info *f6i = res->f6i;
2678 const struct in6_addr *src_key;
2679 struct rt6_exception *rt6_ex;
2680 struct inet6_dev *idev; 2690 struct inet6_dev *idev;
2691 struct rt6_info *rt;
2681 u32 mtu = 0; 2692 u32 mtu = 0;
2682 2693
2683 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) { 2694 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
@@ -2686,18 +2697,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
2686 goto out; 2697 goto out;
2687 } 2698 }
2688 2699
2689 src_key = NULL; 2700 rt = rt6_find_cached_rt(res, daddr, saddr);
2690#ifdef CONFIG_IPV6_SUBTREES 2701 if (unlikely(rt)) {
2691 if (f6i->fib6_src.plen) 2702 mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
2692 src_key = saddr; 2703 } else {
2693#endif
2694
2695 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2696 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2697 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2698 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2699
2700 if (likely(!mtu)) {
2701 struct net_device *dev = nh->fib_nh_dev; 2704 struct net_device *dev = nh->fib_nh_dev;
2702 2705
2703 mtu = IPV6_MIN_MTU; 2706 mtu = IPV6_MIN_MTU;