diff options
author | Wei Wang <weiwan@google.com> | 2019-05-16 16:30:54 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-05-16 17:30:53 -0400 |
commit | 510e2ceda031eed97a7a0f9aad65d271a58b460d (patch) | |
tree | bd45f013d94d901034ec055e63cf83fd8427e89f /net/ipv6 | |
parent | 9a6c8bf91b6025b1bb95d4fb454a16019ad22fc4 (diff) |
ipv6: fix src addr routing with the exception table
When inserting route cache into the exception table, the key is
generated with both src_addr and dest_addr with src addr routing.
However, current logic always assumes the src_addr used to generate the
key is a /128 host address. This is not true in the following scenarios:
1. When the route is a gateway route or does not have next hop.
(rt6_is_gw_or_nonexthop() == false)
2. When calling ip6_rt_cache_alloc(), saddr is passed in as NULL.
This means, when looking for a route cache in the exception table, we
have to do the lookup twice: first time with the passed in /128 host
address, second time with the src_addr stored in fib6_info.
This solves the pmtu discovery issue reported by Mikael Magnusson where
a route cache with a lower mtu info is created for a gateway route with
src addr. However, the lookup code is not able to find this route cache.
Fixes: 2b760fcf5cfb ("ipv6: hook up exception table to store dst cache")
Reported-by: Mikael Magnusson <mikael.kernel@lists.m7n.se>
Bisected-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Cc: Martin Lau <kafai@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/route.c | 51 |
1 files changed, 27 insertions, 24 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 27c0cc5d9d30..7a014ca877ed 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -111,8 +111,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, | |||
111 | int iif, int type, u32 portid, u32 seq, | 111 | int iif, int type, u32 portid, u32 seq, |
112 | unsigned int flags); | 112 | unsigned int flags); |
113 | static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, | 113 | static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, |
114 | struct in6_addr *daddr, | 114 | const struct in6_addr *daddr, |
115 | struct in6_addr *saddr); | 115 | const struct in6_addr *saddr); |
116 | 116 | ||
117 | #ifdef CONFIG_IPV6_ROUTE_INFO | 117 | #ifdef CONFIG_IPV6_ROUTE_INFO |
118 | static struct fib6_info *rt6_add_route_info(struct net *net, | 118 | static struct fib6_info *rt6_add_route_info(struct net *net, |
@@ -1573,31 +1573,44 @@ out: | |||
1573 | * Caller has to hold rcu_read_lock() | 1573 | * Caller has to hold rcu_read_lock() |
1574 | */ | 1574 | */ |
1575 | static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, | 1575 | static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, |
1576 | struct in6_addr *daddr, | 1576 | const struct in6_addr *daddr, |
1577 | struct in6_addr *saddr) | 1577 | const struct in6_addr *saddr) |
1578 | { | 1578 | { |
1579 | const struct in6_addr *src_key = NULL; | ||
1579 | struct rt6_exception_bucket *bucket; | 1580 | struct rt6_exception_bucket *bucket; |
1580 | struct in6_addr *src_key = NULL; | ||
1581 | struct rt6_exception *rt6_ex; | 1581 | struct rt6_exception *rt6_ex; |
1582 | struct rt6_info *ret = NULL; | 1582 | struct rt6_info *ret = NULL; |
1583 | 1583 | ||
1584 | bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); | ||
1585 | |||
1586 | #ifdef CONFIG_IPV6_SUBTREES | 1584 | #ifdef CONFIG_IPV6_SUBTREES |
1587 | /* fib6i_src.plen != 0 indicates f6i is in subtree | 1585 | /* fib6i_src.plen != 0 indicates f6i is in subtree |
1588 | * and exception table is indexed by a hash of | 1586 | * and exception table is indexed by a hash of |
1589 | * both fib6_dst and fib6_src. | 1587 | * both fib6_dst and fib6_src. |
1590 | * Otherwise, the exception table is indexed by | 1588 | * However, the src addr used to create the hash |
1591 | * a hash of only fib6_dst. | 1589 | * might not be exactly the passed in saddr which |
1590 | * is a /128 addr from the flow. | ||
1591 | * So we need to use f6i->fib6_src to redo lookup | ||
1592 | * if the passed in saddr does not find anything. | ||
1593 | * (See the logic in ip6_rt_cache_alloc() on how | ||
1594 | * rt->rt6i_src is updated.) | ||
1592 | */ | 1595 | */ |
1593 | if (res->f6i->fib6_src.plen) | 1596 | if (res->f6i->fib6_src.plen) |
1594 | src_key = saddr; | 1597 | src_key = saddr; |
1598 | find_ex: | ||
1595 | #endif | 1599 | #endif |
1600 | bucket = rcu_dereference(res->f6i->rt6i_exception_bucket); | ||
1596 | rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); | 1601 | rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); |
1597 | 1602 | ||
1598 | if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) | 1603 | if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) |
1599 | ret = rt6_ex->rt6i; | 1604 | ret = rt6_ex->rt6i; |
1600 | 1605 | ||
1606 | #ifdef CONFIG_IPV6_SUBTREES | ||
1607 | /* Use fib6_src as src_key and redo lookup */ | ||
1608 | if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) { | ||
1609 | src_key = &res->f6i->fib6_src.addr; | ||
1610 | goto find_ex; | ||
1611 | } | ||
1612 | #endif | ||
1613 | |||
1601 | return ret; | 1614 | return ret; |
1602 | } | 1615 | } |
1603 | 1616 | ||
@@ -2672,12 +2685,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, | |||
2672 | const struct in6_addr *daddr, | 2685 | const struct in6_addr *daddr, |
2673 | const struct in6_addr *saddr) | 2686 | const struct in6_addr *saddr) |
2674 | { | 2687 | { |
2675 | struct rt6_exception_bucket *bucket; | ||
2676 | const struct fib6_nh *nh = res->nh; | 2688 | const struct fib6_nh *nh = res->nh; |
2677 | struct fib6_info *f6i = res->f6i; | 2689 | struct fib6_info *f6i = res->f6i; |
2678 | const struct in6_addr *src_key; | ||
2679 | struct rt6_exception *rt6_ex; | ||
2680 | struct inet6_dev *idev; | 2690 | struct inet6_dev *idev; |
2691 | struct rt6_info *rt; | ||
2681 | u32 mtu = 0; | 2692 | u32 mtu = 0; |
2682 | 2693 | ||
2683 | if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) { | 2694 | if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) { |
@@ -2686,18 +2697,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, | |||
2686 | goto out; | 2697 | goto out; |
2687 | } | 2698 | } |
2688 | 2699 | ||
2689 | src_key = NULL; | 2700 | rt = rt6_find_cached_rt(res, daddr, saddr); |
2690 | #ifdef CONFIG_IPV6_SUBTREES | 2701 | if (unlikely(rt)) { |
2691 | if (f6i->fib6_src.plen) | 2702 | mtu = dst_metric_raw(&rt->dst, RTAX_MTU); |
2692 | src_key = saddr; | 2703 | } else { |
2693 | #endif | ||
2694 | |||
2695 | bucket = rcu_dereference(f6i->rt6i_exception_bucket); | ||
2696 | rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); | ||
2697 | if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) | ||
2698 | mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU); | ||
2699 | |||
2700 | if (likely(!mtu)) { | ||
2701 | struct net_device *dev = nh->fib_nh_dev; | 2704 | struct net_device *dev = nh->fib_nh_dev; |
2702 | 2705 | ||
2703 | mtu = IPV6_MIN_MTU; | 2706 | mtu = IPV6_MIN_MTU; |