aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c112
1 files changed, 64 insertions, 48 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4c5142a30808..3975ae8e2440 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1391,9 +1391,6 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1391 1391
1392 pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu); 1392 pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
1393 1393
1394 if (pcpu_rt)
1395 ip6_hold_safe(NULL, &pcpu_rt);
1396
1397 return pcpu_rt; 1394 return pcpu_rt;
1398} 1395}
1399 1396
@@ -1403,12 +1400,9 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1403 struct rt6_info *pcpu_rt, *prev, **p; 1400 struct rt6_info *pcpu_rt, *prev, **p;
1404 1401
1405 pcpu_rt = ip6_rt_pcpu_alloc(res); 1402 pcpu_rt = ip6_rt_pcpu_alloc(res);
1406 if (!pcpu_rt) { 1403 if (!pcpu_rt)
1407 dst_hold(&net->ipv6.ip6_null_entry->dst); 1404 return NULL;
1408 return net->ipv6.ip6_null_entry;
1409 }
1410 1405
1411 dst_hold(&pcpu_rt->dst);
1412 p = this_cpu_ptr(res->nh->rt6i_pcpu); 1406 p = this_cpu_ptr(res->nh->rt6i_pcpu);
1413 prev = cmpxchg(p, NULL, pcpu_rt); 1407 prev = cmpxchg(p, NULL, pcpu_rt);
1414 BUG_ON(prev); 1408 BUG_ON(prev);
@@ -2189,9 +2183,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2189 const struct sk_buff *skb, int flags) 2183 const struct sk_buff *skb, int flags)
2190{ 2184{
2191 struct fib6_result res = {}; 2185 struct fib6_result res = {};
2192 struct rt6_info *rt; 2186 struct rt6_info *rt = NULL;
2193 int strict = 0; 2187 int strict = 0;
2194 2188
2189 WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2190 !rcu_read_lock_held());
2191
2195 strict |= flags & RT6_LOOKUP_F_IFACE; 2192 strict |= flags & RT6_LOOKUP_F_IFACE;
2196 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; 2193 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2197 if (net->ipv6.devconf_all->forwarding == 0) 2194 if (net->ipv6.devconf_all->forwarding == 0)
@@ -2200,23 +2197,15 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2200 rcu_read_lock(); 2197 rcu_read_lock();
2201 2198
2202 fib6_table_lookup(net, table, oif, fl6, &res, strict); 2199 fib6_table_lookup(net, table, oif, fl6, &res, strict);
2203 if (res.f6i == net->ipv6.fib6_null_entry) { 2200 if (res.f6i == net->ipv6.fib6_null_entry)
2204 rt = net->ipv6.ip6_null_entry; 2201 goto out;
2205 rcu_read_unlock();
2206 dst_hold(&rt->dst);
2207 return rt;
2208 }
2209 2202
2210 fib6_select_path(net, &res, fl6, oif, false, skb, strict); 2203 fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2211 2204
2212 /*Search through exception table */ 2205 /*Search through exception table */
2213 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); 2206 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2214 if (rt) { 2207 if (rt) {
2215 if (ip6_hold_safe(net, &rt)) 2208 goto out;
2216 dst_use_noref(&rt->dst, jiffies);
2217
2218 rcu_read_unlock();
2219 return rt;
2220 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 2209 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2221 !res.nh->fib_nh_gw_family)) { 2210 !res.nh->fib_nh_gw_family)) {
2222 /* Create a RTF_CACHE clone which will not be 2211 /* Create a RTF_CACHE clone which will not be
@@ -2224,40 +2213,38 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2224 * the daddr in the skb during the neighbor look-up is different 2213 * the daddr in the skb during the neighbor look-up is different
2225 * from the fl6->daddr used to look-up route here. 2214 * from the fl6->daddr used to look-up route here.
2226 */ 2215 */
2227 struct rt6_info *uncached_rt; 2216 rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2228
2229 uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2230 2217
2231 rcu_read_unlock(); 2218 if (rt) {
2232 2219 /* 1 refcnt is taken during ip6_rt_cache_alloc().
2233 if (uncached_rt) { 2220 * As rt6_uncached_list_add() does not consume refcnt,
2234 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() 2221 * this refcnt is always returned to the caller even
2235 * No need for another dst_hold() 2222 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2236 */ 2223 */
2237 rt6_uncached_list_add(uncached_rt); 2224 rt6_uncached_list_add(rt);
2238 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); 2225 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2239 } else { 2226 rcu_read_unlock();
2240 uncached_rt = net->ipv6.ip6_null_entry;
2241 dst_hold(&uncached_rt->dst);
2242 }
2243 2227
2244 return uncached_rt; 2228 return rt;
2229 }
2245 } else { 2230 } else {
2246 /* Get a percpu copy */ 2231 /* Get a percpu copy */
2247
2248 struct rt6_info *pcpu_rt;
2249
2250 local_bh_disable(); 2232 local_bh_disable();
2251 pcpu_rt = rt6_get_pcpu_route(&res); 2233 rt = rt6_get_pcpu_route(&res);
2252 2234
2253 if (!pcpu_rt) 2235 if (!rt)
2254 pcpu_rt = rt6_make_pcpu_route(net, &res); 2236 rt = rt6_make_pcpu_route(net, &res);
2255 2237
2256 local_bh_enable(); 2238 local_bh_enable();
2257 rcu_read_unlock();
2258
2259 return pcpu_rt;
2260 } 2239 }
2240out:
2241 if (!rt)
2242 rt = net->ipv6.ip6_null_entry;
2243 if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2244 ip6_hold_safe(net, &rt);
2245 rcu_read_unlock();
2246
2247 return rt;
2261} 2248}
2262EXPORT_SYMBOL_GPL(ip6_pol_route); 2249EXPORT_SYMBOL_GPL(ip6_pol_route);
2263 2250
@@ -2388,11 +2375,12 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2388 return mhash >> 1; 2375 return mhash >> 1;
2389} 2376}
2390 2377
2378/* Called with rcu held */
2391void ip6_route_input(struct sk_buff *skb) 2379void ip6_route_input(struct sk_buff *skb)
2392{ 2380{
2393 const struct ipv6hdr *iph = ipv6_hdr(skb); 2381 const struct ipv6hdr *iph = ipv6_hdr(skb);
2394 struct net *net = dev_net(skb->dev); 2382 struct net *net = dev_net(skb->dev);
2395 int flags = RT6_LOOKUP_F_HAS_SADDR; 2383 int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
2396 struct ip_tunnel_info *tun_info; 2384 struct ip_tunnel_info *tun_info;
2397 struct flowi6 fl6 = { 2385 struct flowi6 fl6 = {
2398 .flowi6_iif = skb->dev->ifindex, 2386 .flowi6_iif = skb->dev->ifindex,
@@ -2414,8 +2402,8 @@ void ip6_route_input(struct sk_buff *skb)
2414 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) 2402 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2415 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys); 2403 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2416 skb_dst_drop(skb); 2404 skb_dst_drop(skb);
2417 skb_dst_set(skb, 2405 skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2418 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); 2406 &fl6, skb, flags));
2419} 2407}
2420 2408
2421static struct rt6_info *ip6_pol_route_output(struct net *net, 2409static struct rt6_info *ip6_pol_route_output(struct net *net,
@@ -2427,8 +2415,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net,
2427 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); 2415 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2428} 2416}
2429 2417
2430struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 2418struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2431 struct flowi6 *fl6, int flags) 2419 const struct sock *sk,
2420 struct flowi6 *fl6, int flags)
2432{ 2421{
2433 bool any_src; 2422 bool any_src;
2434 2423
@@ -2436,6 +2425,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2436 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) { 2425 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2437 struct dst_entry *dst; 2426 struct dst_entry *dst;
2438 2427
2428 /* This function does not take refcnt on the dst */
2439 dst = l3mdev_link_scope_lookup(net, fl6); 2429 dst = l3mdev_link_scope_lookup(net, fl6);
2440 if (dst) 2430 if (dst)
2441 return dst; 2431 return dst;
@@ -2443,6 +2433,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2443 2433
2444 fl6->flowi6_iif = LOOPBACK_IFINDEX; 2434 fl6->flowi6_iif = LOOPBACK_IFINDEX;
2445 2435
2436 flags |= RT6_LOOKUP_F_DST_NOREF;
2446 any_src = ipv6_addr_any(&fl6->saddr); 2437 any_src = ipv6_addr_any(&fl6->saddr);
2447 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 2438 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2448 (fl6->flowi6_oif && any_src)) 2439 (fl6->flowi6_oif && any_src))
@@ -2455,6 +2446,28 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2455 2446
2456 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); 2447 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2457} 2448}
2449EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
2450
2451struct dst_entry *ip6_route_output_flags(struct net *net,
2452 const struct sock *sk,
2453 struct flowi6 *fl6,
2454 int flags)
2455{
2456 struct dst_entry *dst;
2457 struct rt6_info *rt6;
2458
2459 rcu_read_lock();
2460 dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2461 rt6 = (struct rt6_info *)dst;
2462 /* For dst cached in uncached_list, refcnt is already taken. */
2463 if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
2464 dst = &net->ipv6.ip6_null_entry->dst;
2465 dst_hold(dst);
2466 }
2467 rcu_read_unlock();
2468
2469 return dst;
2470}
2458EXPORT_SYMBOL_GPL(ip6_route_output_flags); 2471EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2459 2472
2460struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 2473struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
@@ -6029,6 +6042,7 @@ static int __net_init ip6_route_net_init(struct net *net)
6029 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 6042 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6030 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 6043 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6031 ip6_template_metrics, true); 6044 ip6_template_metrics, true);
6045 INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
6032 6046
6033#ifdef CONFIG_IPV6_MULTIPLE_TABLES 6047#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6034 net->ipv6.fib6_has_custom_rules = false; 6048 net->ipv6.fib6_has_custom_rules = false;
@@ -6040,6 +6054,7 @@ static int __net_init ip6_route_net_init(struct net *net)
6040 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 6054 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6041 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 6055 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6042 ip6_template_metrics, true); 6056 ip6_template_metrics, true);
6057 INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
6043 6058
6044 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 6059 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6045 sizeof(*net->ipv6.ip6_blk_hole_entry), 6060 sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -6049,6 +6064,7 @@ static int __net_init ip6_route_net_init(struct net *net)
6049 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 6064 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6050 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 6065 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6051 ip6_template_metrics, true); 6066 ip6_template_metrics, true);
6067 INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
6052#endif 6068#endif
6053 6069
6054 net->ipv6.sysctl.flush_delay = 0; 6070 net->ipv6.sysctl.flush_delay = 0;