aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWei Wang <weiwan@google.com>2019-06-20 20:36:37 -0400
committerDavid S. Miller <davem@davemloft.net>2019-06-23 16:24:17 -0400
commit0e09edcce7ad9c8120eb8462334e1c9e8f3be09a (patch)
treec4e892bb20a039357fd2905f560a3692d37f6645
parent8c25c0cb5bb4e63170bb7760179ec294a3827694 (diff)
ipv6: introduce RT6_LOOKUP_F_DST_NOREF flag in ip6_pol_route()
This new flag is to instruct the route lookup function to not take refcnt on the dst entry. The user which does route lookup with this flag must properly use rcu protection. ip6_pol_route() is the major route lookup function for both tx and rx path. In this function: Do not take refcnt on dst if RT6_LOOKUP_F_DST_NOREF flag is set, and directly return the route entry. The caller should be holding rcu lock when using this flag, and decide whether to take refcnt or not. One note on the dst cache in the uncached_list: As uncached_list does not consume refcnt, one refcnt is always returned back to the caller even if RT6_LOOKUP_F_DST_NOREF flag is set. Uncached dst is only possible in the output path. So in such call path, caller MUST check if the dst is in the uncached_list before assuming that there is no refcnt taken on the returned dst. Signed-off-by: Wei Wang <weiwan@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Acked-by: Mahesh Bandewar <maheshb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip6_route.h1
-rw-r--r--net/ipv6/route.c73
2 files changed, 31 insertions, 43 deletions
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 7375a165fd98..82bced2fc1e3 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -36,6 +36,7 @@ struct route_info {
36#define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 36#define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010
37#define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 37#define RT6_LOOKUP_F_SRCPREF_COA 0x00000020
38#define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 38#define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040
39#define RT6_LOOKUP_F_DST_NOREF 0x00000080
39 40
40/* We do not (yet ?) support IPv6 jumbograms (RFC 2675) 41/* We do not (yet ?) support IPv6 jumbograms (RFC 2675)
41 * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header 42 * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4c5142a30808..5469e0a9c810 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1391,9 +1391,6 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1391 1391
1392 pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu); 1392 pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
1393 1393
1394 if (pcpu_rt)
1395 ip6_hold_safe(NULL, &pcpu_rt);
1396
1397 return pcpu_rt; 1394 return pcpu_rt;
1398} 1395}
1399 1396
@@ -1403,12 +1400,9 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1403 struct rt6_info *pcpu_rt, *prev, **p; 1400 struct rt6_info *pcpu_rt, *prev, **p;
1404 1401
1405 pcpu_rt = ip6_rt_pcpu_alloc(res); 1402 pcpu_rt = ip6_rt_pcpu_alloc(res);
1406 if (!pcpu_rt) { 1403 if (!pcpu_rt)
1407 dst_hold(&net->ipv6.ip6_null_entry->dst); 1404 return NULL;
1408 return net->ipv6.ip6_null_entry;
1409 }
1410 1405
1411 dst_hold(&pcpu_rt->dst);
1412 p = this_cpu_ptr(res->nh->rt6i_pcpu); 1406 p = this_cpu_ptr(res->nh->rt6i_pcpu);
1413 prev = cmpxchg(p, NULL, pcpu_rt); 1407 prev = cmpxchg(p, NULL, pcpu_rt);
1414 BUG_ON(prev); 1408 BUG_ON(prev);
@@ -2189,9 +2183,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2189 const struct sk_buff *skb, int flags) 2183 const struct sk_buff *skb, int flags)
2190{ 2184{
2191 struct fib6_result res = {}; 2185 struct fib6_result res = {};
2192 struct rt6_info *rt; 2186 struct rt6_info *rt = NULL;
2193 int strict = 0; 2187 int strict = 0;
2194 2188
2189 WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2190 !rcu_read_lock_held());
2191
2195 strict |= flags & RT6_LOOKUP_F_IFACE; 2192 strict |= flags & RT6_LOOKUP_F_IFACE;
2196 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; 2193 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2197 if (net->ipv6.devconf_all->forwarding == 0) 2194 if (net->ipv6.devconf_all->forwarding == 0)
@@ -2200,23 +2197,15 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2200 rcu_read_lock(); 2197 rcu_read_lock();
2201 2198
2202 fib6_table_lookup(net, table, oif, fl6, &res, strict); 2199 fib6_table_lookup(net, table, oif, fl6, &res, strict);
2203 if (res.f6i == net->ipv6.fib6_null_entry) { 2200 if (res.f6i == net->ipv6.fib6_null_entry)
2204 rt = net->ipv6.ip6_null_entry; 2201 goto out;
2205 rcu_read_unlock();
2206 dst_hold(&rt->dst);
2207 return rt;
2208 }
2209 2202
2210 fib6_select_path(net, &res, fl6, oif, false, skb, strict); 2203 fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2211 2204
2212 /*Search through exception table */ 2205 /*Search through exception table */
2213 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); 2206 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2214 if (rt) { 2207 if (rt) {
2215 if (ip6_hold_safe(net, &rt)) 2208 goto out;
2216 dst_use_noref(&rt->dst, jiffies);
2217
2218 rcu_read_unlock();
2219 return rt;
2220 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 2209 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2221 !res.nh->fib_nh_gw_family)) { 2210 !res.nh->fib_nh_gw_family)) {
2222 /* Create a RTF_CACHE clone which will not be 2211 /* Create a RTF_CACHE clone which will not be
@@ -2224,40 +2213,38 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2224 * the daddr in the skb during the neighbor look-up is different 2213 * the daddr in the skb during the neighbor look-up is different
2225 * from the fl6->daddr used to look-up route here. 2214 * from the fl6->daddr used to look-up route here.
2226 */ 2215 */
2227 struct rt6_info *uncached_rt; 2216 rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2228 2217
2229 uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL); 2218 if (rt) {
2230 2219 /* 1 refcnt is taken during ip6_rt_cache_alloc().
2231 rcu_read_unlock(); 2220 * As rt6_uncached_list_add() does not consume refcnt,
2232 2221 * this refcnt is always returned to the caller even
2233 if (uncached_rt) { 2222 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2234 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
2235 * No need for another dst_hold()
2236 */ 2223 */
2237 rt6_uncached_list_add(uncached_rt); 2224 rt6_uncached_list_add(rt);
2238 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); 2225 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2239 } else { 2226 rcu_read_unlock();
2240 uncached_rt = net->ipv6.ip6_null_entry;
2241 dst_hold(&uncached_rt->dst);
2242 }
2243 2227
2244 return uncached_rt; 2228 return rt;
2229 }
2245 } else { 2230 } else {
2246 /* Get a percpu copy */ 2231 /* Get a percpu copy */
2247
2248 struct rt6_info *pcpu_rt;
2249
2250 local_bh_disable(); 2232 local_bh_disable();
2251 pcpu_rt = rt6_get_pcpu_route(&res); 2233 rt = rt6_get_pcpu_route(&res);
2252 2234
2253 if (!pcpu_rt) 2235 if (!rt)
2254 pcpu_rt = rt6_make_pcpu_route(net, &res); 2236 rt = rt6_make_pcpu_route(net, &res);
2255 2237
2256 local_bh_enable(); 2238 local_bh_enable();
2257 rcu_read_unlock();
2258
2259 return pcpu_rt;
2260 } 2239 }
2240out:
2241 if (!rt)
2242 rt = net->ipv6.ip6_null_entry;
2243 if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2244 ip6_hold_safe(net, &rt);
2245 rcu_read_unlock();
2246
2247 return rt;
2261} 2248}
2262EXPORT_SYMBOL_GPL(ip6_pol_route); 2249EXPORT_SYMBOL_GPL(ip6_pol_route);
2263 2250