aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJulian Anastasov <ja@ssi.bg>2018-05-02 02:41:19 -0400
committerDavid S. Miller <davem@davemloft.net>2018-05-02 22:52:35 -0400
commit94720e3aee6884d8c8beb678001629da60ec6366 (patch)
treeb340c7c1a126376591db0bac4fb7c5ed6074dac0 /net
parente002434e88882d8f8205619b5b8f1d8e373a2724 (diff)
ipv4: fix fnhe usage by non-cached routes
Allow some non-cached routes to use non-expired fnhe: 1. ip_del_fnhe: moved above and now called by find_exception. The 4.5+ commit deed49df7390 expires fnhe only when caching routes. Change that to: 1.1. use fnhe for non-cached local output routes, with the help from (2) 1.2. allow __mkroute_input to detect expired fnhe (outdated fnhe_gw, for example) when do_cache is false, eg. when itag!=0 for unicast destinations. 2. __mkroute_output: keep fi to allow local routes with orig_oif != 0 to use fnhe info even when the new route will not be cached into fnhe. After commit 839da4d98960 ("net: ipv4: set orig_oif based on fib result for local traffic") it means all local routes will be affected because they are not cached. This change is used to solve a PMTU problem with IPVS (and probably Netfilter DNAT) setups that redirect local clients from target local IP (local route to Virtual IP) to new remote IP target, eg. IPVS TUN real server. Loopback has 64K MTU and we need to create fnhe on the local route that will keep the reduced PMTU for the Virtual IP. Without this change fnhe_pmtu is updated from ICMP but never exposed to non-cached local routes. This includes routes with flowi4_oif!=0 for 4.6+ and with flowi4_oif=any for 4.14+). 3. update_or_create_fnhe: make sure fnhe_expires is not 0 for new entries Fixes: 839da4d98960 ("net: ipv4: set orig_oif based on fib result for local traffic") Fixes: d6d5e999e5df ("route: do not cache fib route info on local routes with oif") Fixes: deed49df7390 ("route: check and remove route cache when we get route") Cc: David Ahern <dsahern@gmail.com> Cc: Xin Long <lucien.xin@gmail.com> Signed-off-by: Julian Anastasov <ja@ssi.bg> Acked-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/route.c118
1 files changed, 53 insertions, 65 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ccb25d80f679..1412a7baf0b9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -709,7 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
709 fnhe->fnhe_gw = gw; 709 fnhe->fnhe_gw = gw;
710 fnhe->fnhe_pmtu = pmtu; 710 fnhe->fnhe_pmtu = pmtu;
711 fnhe->fnhe_mtu_locked = lock; 711 fnhe->fnhe_mtu_locked = lock;
712 fnhe->fnhe_expires = expires; 712 fnhe->fnhe_expires = max(1UL, expires);
713 713
714 /* Exception created; mark the cached routes for the nexthop 714 /* Exception created; mark the cached routes for the nexthop
715 * stale, so anyone caching it rechecks if this exception 715 * stale, so anyone caching it rechecks if this exception
@@ -1297,6 +1297,36 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
1297 return mtu - lwtunnel_headroom(dst->lwtstate, mtu); 1297 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1298} 1298}
1299 1299
1300static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1301{
1302 struct fnhe_hash_bucket *hash;
1303 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1304 u32 hval = fnhe_hashfun(daddr);
1305
1306 spin_lock_bh(&fnhe_lock);
1307
1308 hash = rcu_dereference_protected(nh->nh_exceptions,
1309 lockdep_is_held(&fnhe_lock));
1310 hash += hval;
1311
1312 fnhe_p = &hash->chain;
1313 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1314 while (fnhe) {
1315 if (fnhe->fnhe_daddr == daddr) {
1316 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1317 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1318 fnhe_flush_routes(fnhe);
1319 kfree_rcu(fnhe, rcu);
1320 break;
1321 }
1322 fnhe_p = &fnhe->fnhe_next;
1323 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1324 lockdep_is_held(&fnhe_lock));
1325 }
1326
1327 spin_unlock_bh(&fnhe_lock);
1328}
1329
1300static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) 1330static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1301{ 1331{
1302 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); 1332 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
@@ -1310,8 +1340,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1310 1340
1311 for (fnhe = rcu_dereference(hash[hval].chain); fnhe; 1341 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1312 fnhe = rcu_dereference(fnhe->fnhe_next)) { 1342 fnhe = rcu_dereference(fnhe->fnhe_next)) {
1313 if (fnhe->fnhe_daddr == daddr) 1343 if (fnhe->fnhe_daddr == daddr) {
1344 if (fnhe->fnhe_expires &&
1345 time_after(jiffies, fnhe->fnhe_expires)) {
1346 ip_del_fnhe(nh, daddr);
1347 break;
1348 }
1314 return fnhe; 1349 return fnhe;
1350 }
1315 } 1351 }
1316 return NULL; 1352 return NULL;
1317} 1353}
@@ -1636,36 +1672,6 @@ static void ip_handle_martian_source(struct net_device *dev,
1636#endif 1672#endif
1637} 1673}
1638 1674
1639static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1640{
1641 struct fnhe_hash_bucket *hash;
1642 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1643 u32 hval = fnhe_hashfun(daddr);
1644
1645 spin_lock_bh(&fnhe_lock);
1646
1647 hash = rcu_dereference_protected(nh->nh_exceptions,
1648 lockdep_is_held(&fnhe_lock));
1649 hash += hval;
1650
1651 fnhe_p = &hash->chain;
1652 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1653 while (fnhe) {
1654 if (fnhe->fnhe_daddr == daddr) {
1655 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1656 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1657 fnhe_flush_routes(fnhe);
1658 kfree_rcu(fnhe, rcu);
1659 break;
1660 }
1661 fnhe_p = &fnhe->fnhe_next;
1662 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1663 lockdep_is_held(&fnhe_lock));
1664 }
1665
1666 spin_unlock_bh(&fnhe_lock);
1667}
1668
1669/* called in rcu_read_lock() section */ 1675/* called in rcu_read_lock() section */
1670static int __mkroute_input(struct sk_buff *skb, 1676static int __mkroute_input(struct sk_buff *skb,
1671 const struct fib_result *res, 1677 const struct fib_result *res,
@@ -1719,20 +1725,10 @@ static int __mkroute_input(struct sk_buff *skb,
1719 1725
1720 fnhe = find_exception(&FIB_RES_NH(*res), daddr); 1726 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1721 if (do_cache) { 1727 if (do_cache) {
1722 if (fnhe) { 1728 if (fnhe)
1723 rth = rcu_dereference(fnhe->fnhe_rth_input); 1729 rth = rcu_dereference(fnhe->fnhe_rth_input);
1724 if (rth && rth->dst.expires && 1730 else
1725 time_after(jiffies, rth->dst.expires)) { 1731 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1726 ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1727 fnhe = NULL;
1728 } else {
1729 goto rt_cache;
1730 }
1731 }
1732
1733 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1734
1735rt_cache:
1736 if (rt_cache_valid(rth)) { 1732 if (rt_cache_valid(rth)) {
1737 skb_dst_set_noref(skb, &rth->dst); 1733 skb_dst_set_noref(skb, &rth->dst);
1738 goto out; 1734 goto out;
@@ -2216,39 +2212,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2216 * the loopback interface and the IP_PKTINFO ipi_ifindex will 2212 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2217 * be set to the loopback interface as well. 2213 * be set to the loopback interface as well.
2218 */ 2214 */
2219 fi = NULL; 2215 do_cache = false;
2220 } 2216 }
2221 2217
2222 fnhe = NULL; 2218 fnhe = NULL;
2223 do_cache &= fi != NULL; 2219 do_cache &= fi != NULL;
2224 if (do_cache) { 2220 if (fi) {
2225 struct rtable __rcu **prth; 2221 struct rtable __rcu **prth;
2226 struct fib_nh *nh = &FIB_RES_NH(*res); 2222 struct fib_nh *nh = &FIB_RES_NH(*res);
2227 2223
2228 fnhe = find_exception(nh, fl4->daddr); 2224 fnhe = find_exception(nh, fl4->daddr);
2225 if (!do_cache)
2226 goto add;
2229 if (fnhe) { 2227 if (fnhe) {
2230 prth = &fnhe->fnhe_rth_output; 2228 prth = &fnhe->fnhe_rth_output;
2231 rth = rcu_dereference(*prth); 2229 } else {
2232 if (rth && rth->dst.expires && 2230 if (unlikely(fl4->flowi4_flags &
2233 time_after(jiffies, rth->dst.expires)) { 2231 FLOWI_FLAG_KNOWN_NH &&
2234 ip_del_fnhe(nh, fl4->daddr); 2232 !(nh->nh_gw &&
2235 fnhe = NULL; 2233 nh->nh_scope == RT_SCOPE_LINK))) {
2236 } else { 2234 do_cache = false;
2237 goto rt_cache; 2235 goto add;
2238 } 2236 }
2237 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
2239 } 2238 }
2240
2241 if (unlikely(fl4->flowi4_flags &
2242 FLOWI_FLAG_KNOWN_NH &&
2243 !(nh->nh_gw &&
2244 nh->nh_scope == RT_SCOPE_LINK))) {
2245 do_cache = false;
2246 goto add;
2247 }
2248 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
2249 rth = rcu_dereference(*prth); 2239 rth = rcu_dereference(*prth);
2250
2251rt_cache:
2252 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) 2240 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
2253 return rth; 2241 return rth;
2254 } 2242 }