aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-06-23 16:24:17 -0400
committerDavid S. Miller <davem@davemloft.net>2019-06-23 16:24:17 -0400
commit7d30a7f6424e88c958c19a02f6f54ab8d25919cd (patch)
tree5649667efad829c536543d0feb5374ad42b93483
parent8c25c0cb5bb4e63170bb7760179ec294a3827694 (diff)
parent7d9e5f422150ed00de744e02a80734d74cc9704d (diff)
Merge branch 'ipv6-avoid-taking-refcnt-on-dst-during-route-lookup'
Wei Wang says: ==================== ipv6: avoid taking refcnt on dst during route lookup Ipv6 route lookup code always grabs refcnt on the dst for the caller. But for certain cases, grabbing refcnt is not always necessary if the call path is rcu protected and the caller does not cache the dst. Another issue in the route lookup logic is: When there are multiple custom rules, we have to do the lookup into each table associated to each rule individually. And when we can't find the route in one table, we grab and release refcnt on net->ipv6.ip6_null_entry before going to the next table. This operation is completely redundant, and causes false issue because net->ipv6.ip6_null_entry is a shared object. This patch set introduces a new flag RT6_LOOKUP_F_DST_NOREF for route lookup callers to set, to avoid any manipulation on the dst refcnt. And it converts the major input and output path to use it. The performance gain is noticable. I ran synflood tests between 2 hosts under the same switch. Both hosts have 20G mlx NIC, and 8 tx/rx queues. Sender sends pure SYN flood with random src IPs and ports using trafgen. Receiver has a simple TCP listener on the target port. Both hosts have multiple custom rules: - For incoming packets, only local table is traversed. - For outgoing packets, 3 tables are traversed to find the route. The packet processing rate on the receiver is as follows: - Before the fix: 3.78Mpps - After the fix: 5.50Mpps v2->v3: - Handled fib6_rule_lookup() when CONFIG_IPV6_MULTIPLE_TABLES is not configured in patch 03 (suggested by David Ahern) - Removed the renaming of l3mdev_link_scope_lookup() in patch 05 (suggested by David Ahern) - Moved definition of ip6_route_output_flags() from an inline function in /net/ipv6/route.c to net/ipv6/route.c in order to address kbuild error in patch 05 v1->v2: - Added a helper ip6_rt_put_flags() in patch 3 suggested by David Miller ==================== Reviewed-by: David Ahern <dsahern@gmail.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/vrf.c5
-rw-r--r--include/net/ip6_route.h15
-rw-r--r--net/ipv6/fib6_rules.c12
-rw-r--r--net/ipv6/ip6_fib.c5
-rw-r--r--net/ipv6/route.c112
-rw-r--r--net/l3mdev/l3mdev.c7
6 files changed, 95 insertions, 61 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 11b9525dff27..69ef9cce5858 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1072,12 +1072,14 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
1072#if IS_ENABLED(CONFIG_IPV6) 1072#if IS_ENABLED(CONFIG_IPV6)
1073/* send to link-local or multicast address via interface enslaved to 1073/* send to link-local or multicast address via interface enslaved to
1074 * VRF device. Force lookup to VRF table without changing flow struct 1074 * VRF device. Force lookup to VRF table without changing flow struct
1075 * Note: Caller to this function must hold rcu_read_lock() and no refcnt
1076 * is taken on the dst by this function.
1075 */ 1077 */
1076static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, 1078static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
1077 struct flowi6 *fl6) 1079 struct flowi6 *fl6)
1078{ 1080{
1079 struct net *net = dev_net(dev); 1081 struct net *net = dev_net(dev);
1080 int flags = RT6_LOOKUP_F_IFACE; 1082 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF;
1081 struct dst_entry *dst = NULL; 1083 struct dst_entry *dst = NULL;
1082 struct rt6_info *rt; 1084 struct rt6_info *rt;
1083 1085
@@ -1087,7 +1089,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
1087 */ 1089 */
1088 if (fl6->flowi6_oif == dev->ifindex) { 1090 if (fl6->flowi6_oif == dev->ifindex) {
1089 dst = &net->ipv6.ip6_null_entry->dst; 1091 dst = &net->ipv6.ip6_null_entry->dst;
1090 dst_hold(dst);
1091 return dst; 1092 return dst;
1092 } 1093 }
1093 1094
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 7375a165fd98..89ad7917b98d 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -36,6 +36,7 @@ struct route_info {
36#define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 36#define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010
37#define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 37#define RT6_LOOKUP_F_SRCPREF_COA 0x00000020
38#define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 38#define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040
39#define RT6_LOOKUP_F_DST_NOREF 0x00000080
39 40
40/* We do not (yet ?) support IPv6 jumbograms (RFC 2675) 41/* We do not (yet ?) support IPv6 jumbograms (RFC 2675)
41 * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header 42 * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header
@@ -83,6 +84,10 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
83 struct flowi6 *fl6, 84 struct flowi6 *fl6,
84 const struct sk_buff *skb, int flags); 85 const struct sk_buff *skb, int flags);
85 86
87struct dst_entry *ip6_route_output_flags_noref(struct net *net,
88 const struct sock *sk,
89 struct flowi6 *fl6, int flags);
90
86struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 91struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
87 struct flowi6 *fl6, int flags); 92 struct flowi6 *fl6, int flags);
88 93
@@ -93,6 +98,16 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
93 return ip6_route_output_flags(net, sk, fl6, 0); 98 return ip6_route_output_flags(net, sk, fl6, 0);
94} 99}
95 100
101/* Only conditionally release dst if flags indicates
102 * !RT6_LOOKUP_F_DST_NOREF or dst is in uncached_list.
103 */
104static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags)
105{
106 if (!(flags & RT6_LOOKUP_F_DST_NOREF) ||
107 !list_empty(&rt->rt6i_uncached))
108 ip6_rt_put(rt);
109}
110
96struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 111struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
97 const struct sk_buff *skb, int flags); 112 const struct sk_buff *skb, int flags);
98struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, 113struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index bcfae13409b5..d22b6c140f23 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -113,14 +113,15 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
113 rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags); 113 rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
114 if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) 114 if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
115 return &rt->dst; 115 return &rt->dst;
116 ip6_rt_put(rt); 116 ip6_rt_put_flags(rt, flags);
117 rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); 117 rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
118 if (rt->dst.error != -EAGAIN) 118 if (rt->dst.error != -EAGAIN)
119 return &rt->dst; 119 return &rt->dst;
120 ip6_rt_put(rt); 120 ip6_rt_put_flags(rt, flags);
121 } 121 }
122 122
123 dst_hold(&net->ipv6.ip6_null_entry->dst); 123 if (!(flags & RT6_LOOKUP_F_DST_NOREF))
124 dst_hold(&net->ipv6.ip6_null_entry->dst);
124 return &net->ipv6.ip6_null_entry->dst; 125 return &net->ipv6.ip6_null_entry->dst;
125} 126}
126 127
@@ -237,13 +238,14 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
237 goto out; 238 goto out;
238 } 239 }
239again: 240again:
240 ip6_rt_put(rt); 241 ip6_rt_put_flags(rt, flags);
241 err = -EAGAIN; 242 err = -EAGAIN;
242 rt = NULL; 243 rt = NULL;
243 goto out; 244 goto out;
244 245
245discard_pkt: 246discard_pkt:
246 dst_hold(&rt->dst); 247 if (!(flags & RT6_LOOKUP_F_DST_NOREF))
248 dst_hold(&rt->dst);
247out: 249out:
248 res->rt6 = rt; 250 res->rt6 = rt;
249 return err; 251 return err;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1d16a01eccf5..5b1c9b5b9247 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -316,9 +316,10 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
316 316
317 rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); 317 rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
318 if (rt->dst.error == -EAGAIN) { 318 if (rt->dst.error == -EAGAIN) {
319 ip6_rt_put(rt); 319 ip6_rt_put_flags(rt, flags);
320 rt = net->ipv6.ip6_null_entry; 320 rt = net->ipv6.ip6_null_entry;
321 dst_hold(&rt->dst); 321 if (!(flags | RT6_LOOKUP_F_DST_NOREF))
322 dst_hold(&rt->dst);
322 } 323 }
323 324
324 return &rt->dst; 325 return &rt->dst;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4c5142a30808..3975ae8e2440 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1391,9 +1391,6 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1391 1391
1392 pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu); 1392 pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
1393 1393
1394 if (pcpu_rt)
1395 ip6_hold_safe(NULL, &pcpu_rt);
1396
1397 return pcpu_rt; 1394 return pcpu_rt;
1398} 1395}
1399 1396
@@ -1403,12 +1400,9 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1403 struct rt6_info *pcpu_rt, *prev, **p; 1400 struct rt6_info *pcpu_rt, *prev, **p;
1404 1401
1405 pcpu_rt = ip6_rt_pcpu_alloc(res); 1402 pcpu_rt = ip6_rt_pcpu_alloc(res);
1406 if (!pcpu_rt) { 1403 if (!pcpu_rt)
1407 dst_hold(&net->ipv6.ip6_null_entry->dst); 1404 return NULL;
1408 return net->ipv6.ip6_null_entry;
1409 }
1410 1405
1411 dst_hold(&pcpu_rt->dst);
1412 p = this_cpu_ptr(res->nh->rt6i_pcpu); 1406 p = this_cpu_ptr(res->nh->rt6i_pcpu);
1413 prev = cmpxchg(p, NULL, pcpu_rt); 1407 prev = cmpxchg(p, NULL, pcpu_rt);
1414 BUG_ON(prev); 1408 BUG_ON(prev);
@@ -2189,9 +2183,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2189 const struct sk_buff *skb, int flags) 2183 const struct sk_buff *skb, int flags)
2190{ 2184{
2191 struct fib6_result res = {}; 2185 struct fib6_result res = {};
2192 struct rt6_info *rt; 2186 struct rt6_info *rt = NULL;
2193 int strict = 0; 2187 int strict = 0;
2194 2188
2189 WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2190 !rcu_read_lock_held());
2191
2195 strict |= flags & RT6_LOOKUP_F_IFACE; 2192 strict |= flags & RT6_LOOKUP_F_IFACE;
2196 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; 2193 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2197 if (net->ipv6.devconf_all->forwarding == 0) 2194 if (net->ipv6.devconf_all->forwarding == 0)
@@ -2200,23 +2197,15 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2200 rcu_read_lock(); 2197 rcu_read_lock();
2201 2198
2202 fib6_table_lookup(net, table, oif, fl6, &res, strict); 2199 fib6_table_lookup(net, table, oif, fl6, &res, strict);
2203 if (res.f6i == net->ipv6.fib6_null_entry) { 2200 if (res.f6i == net->ipv6.fib6_null_entry)
2204 rt = net->ipv6.ip6_null_entry; 2201 goto out;
2205 rcu_read_unlock();
2206 dst_hold(&rt->dst);
2207 return rt;
2208 }
2209 2202
2210 fib6_select_path(net, &res, fl6, oif, false, skb, strict); 2203 fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2211 2204
2212 /*Search through exception table */ 2205 /*Search through exception table */
2213 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); 2206 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2214 if (rt) { 2207 if (rt) {
2215 if (ip6_hold_safe(net, &rt)) 2208 goto out;
2216 dst_use_noref(&rt->dst, jiffies);
2217
2218 rcu_read_unlock();
2219 return rt;
2220 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 2209 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2221 !res.nh->fib_nh_gw_family)) { 2210 !res.nh->fib_nh_gw_family)) {
2222 /* Create a RTF_CACHE clone which will not be 2211 /* Create a RTF_CACHE clone which will not be
@@ -2224,40 +2213,38 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2224 * the daddr in the skb during the neighbor look-up is different 2213 * the daddr in the skb during the neighbor look-up is different
2225 * from the fl6->daddr used to look-up route here. 2214 * from the fl6->daddr used to look-up route here.
2226 */ 2215 */
2227 struct rt6_info *uncached_rt; 2216 rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2228
2229 uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2230 2217
2231 rcu_read_unlock(); 2218 if (rt) {
2232 2219 /* 1 refcnt is taken during ip6_rt_cache_alloc().
2233 if (uncached_rt) { 2220 * As rt6_uncached_list_add() does not consume refcnt,
2234 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() 2221 * this refcnt is always returned to the caller even
2235 * No need for another dst_hold() 2222 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2236 */ 2223 */
2237 rt6_uncached_list_add(uncached_rt); 2224 rt6_uncached_list_add(rt);
2238 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); 2225 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2239 } else { 2226 rcu_read_unlock();
2240 uncached_rt = net->ipv6.ip6_null_entry;
2241 dst_hold(&uncached_rt->dst);
2242 }
2243 2227
2244 return uncached_rt; 2228 return rt;
2229 }
2245 } else { 2230 } else {
2246 /* Get a percpu copy */ 2231 /* Get a percpu copy */
2247
2248 struct rt6_info *pcpu_rt;
2249
2250 local_bh_disable(); 2232 local_bh_disable();
2251 pcpu_rt = rt6_get_pcpu_route(&res); 2233 rt = rt6_get_pcpu_route(&res);
2252 2234
2253 if (!pcpu_rt) 2235 if (!rt)
2254 pcpu_rt = rt6_make_pcpu_route(net, &res); 2236 rt = rt6_make_pcpu_route(net, &res);
2255 2237
2256 local_bh_enable(); 2238 local_bh_enable();
2257 rcu_read_unlock();
2258
2259 return pcpu_rt;
2260 } 2239 }
2240out:
2241 if (!rt)
2242 rt = net->ipv6.ip6_null_entry;
2243 if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2244 ip6_hold_safe(net, &rt);
2245 rcu_read_unlock();
2246
2247 return rt;
2261} 2248}
2262EXPORT_SYMBOL_GPL(ip6_pol_route); 2249EXPORT_SYMBOL_GPL(ip6_pol_route);
2263 2250
@@ -2388,11 +2375,12 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2388 return mhash >> 1; 2375 return mhash >> 1;
2389} 2376}
2390 2377
2378/* Called with rcu held */
2391void ip6_route_input(struct sk_buff *skb) 2379void ip6_route_input(struct sk_buff *skb)
2392{ 2380{
2393 const struct ipv6hdr *iph = ipv6_hdr(skb); 2381 const struct ipv6hdr *iph = ipv6_hdr(skb);
2394 struct net *net = dev_net(skb->dev); 2382 struct net *net = dev_net(skb->dev);
2395 int flags = RT6_LOOKUP_F_HAS_SADDR; 2383 int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
2396 struct ip_tunnel_info *tun_info; 2384 struct ip_tunnel_info *tun_info;
2397 struct flowi6 fl6 = { 2385 struct flowi6 fl6 = {
2398 .flowi6_iif = skb->dev->ifindex, 2386 .flowi6_iif = skb->dev->ifindex,
@@ -2414,8 +2402,8 @@ void ip6_route_input(struct sk_buff *skb)
2414 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) 2402 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2415 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys); 2403 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2416 skb_dst_drop(skb); 2404 skb_dst_drop(skb);
2417 skb_dst_set(skb, 2405 skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2418 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); 2406 &fl6, skb, flags));
2419} 2407}
2420 2408
2421static struct rt6_info *ip6_pol_route_output(struct net *net, 2409static struct rt6_info *ip6_pol_route_output(struct net *net,
@@ -2427,8 +2415,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net,
2427 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); 2415 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2428} 2416}
2429 2417
2430struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 2418struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2431 struct flowi6 *fl6, int flags) 2419 const struct sock *sk,
2420 struct flowi6 *fl6, int flags)
2432{ 2421{
2433 bool any_src; 2422 bool any_src;
2434 2423
@@ -2436,6 +2425,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2436 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) { 2425 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2437 struct dst_entry *dst; 2426 struct dst_entry *dst;
2438 2427
2428 /* This function does not take refcnt on the dst */
2439 dst = l3mdev_link_scope_lookup(net, fl6); 2429 dst = l3mdev_link_scope_lookup(net, fl6);
2440 if (dst) 2430 if (dst)
2441 return dst; 2431 return dst;
@@ -2443,6 +2433,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2443 2433
2444 fl6->flowi6_iif = LOOPBACK_IFINDEX; 2434 fl6->flowi6_iif = LOOPBACK_IFINDEX;
2445 2435
2436 flags |= RT6_LOOKUP_F_DST_NOREF;
2446 any_src = ipv6_addr_any(&fl6->saddr); 2437 any_src = ipv6_addr_any(&fl6->saddr);
2447 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 2438 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2448 (fl6->flowi6_oif && any_src)) 2439 (fl6->flowi6_oif && any_src))
@@ -2455,6 +2446,28 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2455 2446
2456 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); 2447 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2457} 2448}
2449EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
2450
2451struct dst_entry *ip6_route_output_flags(struct net *net,
2452 const struct sock *sk,
2453 struct flowi6 *fl6,
2454 int flags)
2455{
2456 struct dst_entry *dst;
2457 struct rt6_info *rt6;
2458
2459 rcu_read_lock();
2460 dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2461 rt6 = (struct rt6_info *)dst;
2462 /* For dst cached in uncached_list, refcnt is already taken. */
2463 if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
2464 dst = &net->ipv6.ip6_null_entry->dst;
2465 dst_hold(dst);
2466 }
2467 rcu_read_unlock();
2468
2469 return dst;
2470}
2458EXPORT_SYMBOL_GPL(ip6_route_output_flags); 2471EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2459 2472
2460struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 2473struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
@@ -6029,6 +6042,7 @@ static int __net_init ip6_route_net_init(struct net *net)
6029 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 6042 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6030 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 6043 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6031 ip6_template_metrics, true); 6044 ip6_template_metrics, true);
6045 INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
6032 6046
6033#ifdef CONFIG_IPV6_MULTIPLE_TABLES 6047#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6034 net->ipv6.fib6_has_custom_rules = false; 6048 net->ipv6.fib6_has_custom_rules = false;
@@ -6040,6 +6054,7 @@ static int __net_init ip6_route_net_init(struct net *net)
6040 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 6054 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6041 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 6055 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6042 ip6_template_metrics, true); 6056 ip6_template_metrics, true);
6057 INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
6043 6058
6044 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 6059 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6045 sizeof(*net->ipv6.ip6_blk_hole_entry), 6060 sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -6049,6 +6064,7 @@ static int __net_init ip6_route_net_init(struct net *net)
6049 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 6064 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6050 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 6065 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6051 ip6_template_metrics, true); 6066 ip6_template_metrics, true);
6067 INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
6052#endif 6068#endif
6053 6069
6054 net->ipv6.sysctl.flush_delay = 0; 6070 net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index cfc9fcb97465..f35899d45a9a 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -118,6 +118,8 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
118 * local and multicast addresses 118 * local and multicast addresses
119 * @net: network namespace for device index lookup 119 * @net: network namespace for device index lookup
120 * @fl6: IPv6 flow struct for lookup 120 * @fl6: IPv6 flow struct for lookup
121 * This function does not hold refcnt on the returned dst.
122 * Caller must hold rcu_read_lock().
121 */ 123 */
122 124
123struct dst_entry *l3mdev_link_scope_lookup(struct net *net, 125struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
@@ -126,9 +128,8 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
126 struct dst_entry *dst = NULL; 128 struct dst_entry *dst = NULL;
127 struct net_device *dev; 129 struct net_device *dev;
128 130
131 WARN_ON_ONCE(!rcu_read_lock_held());
129 if (fl6->flowi6_oif) { 132 if (fl6->flowi6_oif) {
130 rcu_read_lock();
131
132 dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); 133 dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
133 if (dev && netif_is_l3_slave(dev)) 134 if (dev && netif_is_l3_slave(dev))
134 dev = netdev_master_upper_dev_get_rcu(dev); 135 dev = netdev_master_upper_dev_get_rcu(dev);
@@ -136,8 +137,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
136 if (dev && netif_is_l3_master(dev) && 137 if (dev && netif_is_l3_master(dev) &&
137 dev->l3mdev_ops->l3mdev_link_scope_lookup) 138 dev->l3mdev_ops->l3mdev_link_scope_lookup)
138 dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); 139 dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6);
139
140 rcu_read_unlock();
141 } 140 }
142 141
143 return dst; 142 return dst;