diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 96 |
1 files changed, 79 insertions, 17 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 85f184e429c6..60398a9370e7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; | |||
129 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | 129 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
130 | static int ip_rt_min_advmss __read_mostly = 256; | 130 | static int ip_rt_min_advmss __read_mostly = 256; |
131 | 131 | ||
132 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; | ||
132 | /* | 133 | /* |
133 | * Interface to generic destination cache. | 134 | * Interface to generic destination cache. |
134 | */ | 135 | */ |
@@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow | |||
755 | struct fib_nh *nh = &FIB_RES_NH(res); | 756 | struct fib_nh *nh = &FIB_RES_NH(res); |
756 | 757 | ||
757 | update_or_create_fnhe(nh, fl4->daddr, new_gw, | 758 | update_or_create_fnhe(nh, fl4->daddr, new_gw, |
758 | 0, 0); | 759 | 0, jiffies + ip_rt_gc_timeout); |
759 | } | 760 | } |
760 | if (kill_route) | 761 | if (kill_route) |
761 | rt->dst.obsolete = DST_OBSOLETE_KILL; | 762 | rt->dst.obsolete = DST_OBSOLETE_KILL; |
@@ -1437,9 +1438,9 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | |||
1437 | #endif | 1438 | #endif |
1438 | } | 1439 | } |
1439 | 1440 | ||
1440 | static struct rtable *rt_dst_alloc(struct net_device *dev, | 1441 | struct rtable *rt_dst_alloc(struct net_device *dev, |
1441 | unsigned int flags, u16 type, | 1442 | unsigned int flags, u16 type, |
1442 | bool nopolicy, bool noxfrm, bool will_cache) | 1443 | bool nopolicy, bool noxfrm, bool will_cache) |
1443 | { | 1444 | { |
1444 | struct rtable *rt; | 1445 | struct rtable *rt; |
1445 | 1446 | ||
@@ -1467,6 +1468,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, | |||
1467 | 1468 | ||
1468 | return rt; | 1469 | return rt; |
1469 | } | 1470 | } |
1471 | EXPORT_SYMBOL(rt_dst_alloc); | ||
1470 | 1472 | ||
1471 | /* called in rcu_read_lock() section */ | 1473 | /* called in rcu_read_lock() section */ |
1472 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 1474 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
@@ -1556,6 +1558,36 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1556 | #endif | 1558 | #endif |
1557 | } | 1559 | } |
1558 | 1560 | ||
1561 | static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) | ||
1562 | { | ||
1563 | struct fnhe_hash_bucket *hash; | ||
1564 | struct fib_nh_exception *fnhe, __rcu **fnhe_p; | ||
1565 | u32 hval = fnhe_hashfun(daddr); | ||
1566 | |||
1567 | spin_lock_bh(&fnhe_lock); | ||
1568 | |||
1569 | hash = rcu_dereference_protected(nh->nh_exceptions, | ||
1570 | lockdep_is_held(&fnhe_lock)); | ||
1571 | hash += hval; | ||
1572 | |||
1573 | fnhe_p = &hash->chain; | ||
1574 | fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); | ||
1575 | while (fnhe) { | ||
1576 | if (fnhe->fnhe_daddr == daddr) { | ||
1577 | rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( | ||
1578 | fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); | ||
1579 | fnhe_flush_routes(fnhe); | ||
1580 | kfree_rcu(fnhe, rcu); | ||
1581 | break; | ||
1582 | } | ||
1583 | fnhe_p = &fnhe->fnhe_next; | ||
1584 | fnhe = rcu_dereference_protected(fnhe->fnhe_next, | ||
1585 | lockdep_is_held(&fnhe_lock)); | ||
1586 | } | ||
1587 | |||
1588 | spin_unlock_bh(&fnhe_lock); | ||
1589 | } | ||
1590 | |||
1559 | /* called in rcu_read_lock() section */ | 1591 | /* called in rcu_read_lock() section */ |
1560 | static int __mkroute_input(struct sk_buff *skb, | 1592 | static int __mkroute_input(struct sk_buff *skb, |
1561 | const struct fib_result *res, | 1593 | const struct fib_result *res, |
@@ -1609,11 +1641,20 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1609 | 1641 | ||
1610 | fnhe = find_exception(&FIB_RES_NH(*res), daddr); | 1642 | fnhe = find_exception(&FIB_RES_NH(*res), daddr); |
1611 | if (do_cache) { | 1643 | if (do_cache) { |
1612 | if (fnhe) | 1644 | if (fnhe) { |
1613 | rth = rcu_dereference(fnhe->fnhe_rth_input); | 1645 | rth = rcu_dereference(fnhe->fnhe_rth_input); |
1614 | else | 1646 | if (rth && rth->dst.expires && |
1615 | rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); | 1647 | time_after(jiffies, rth->dst.expires)) { |
1648 | ip_del_fnhe(&FIB_RES_NH(*res), daddr); | ||
1649 | fnhe = NULL; | ||
1650 | } else { | ||
1651 | goto rt_cache; | ||
1652 | } | ||
1653 | } | ||
1654 | |||
1655 | rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); | ||
1616 | 1656 | ||
1657 | rt_cache: | ||
1617 | if (rt_cache_valid(rth)) { | 1658 | if (rt_cache_valid(rth)) { |
1618 | skb_dst_set_noref(skb, &rth->dst); | 1659 | skb_dst_set_noref(skb, &rth->dst); |
1619 | goto out; | 1660 | goto out; |
@@ -2005,6 +2046,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2005 | */ | 2046 | */ |
2006 | if (fi && res->prefixlen < 4) | 2047 | if (fi && res->prefixlen < 4) |
2007 | fi = NULL; | 2048 | fi = NULL; |
2049 | } else if ((type == RTN_LOCAL) && (orig_oif != 0) && | ||
2050 | (orig_oif != dev_out->ifindex)) { | ||
2051 | /* For local routes that require a particular output interface | ||
2052 | * we do not want to cache the result. Caching the result | ||
2053 | * causes incorrect behaviour when there are multiple source | ||
2054 | * addresses on the interface, the end result being that if the | ||
2055 | * intended recipient is waiting on that interface for the | ||
2056 | * packet he won't receive it because it will be delivered on | ||
2057 | * the loopback interface and the IP_PKTINFO ipi_ifindex will | ||
2058 | * be set to the loopback interface as well. | ||
2059 | */ | ||
2060 | fi = NULL; | ||
2008 | } | 2061 | } |
2009 | 2062 | ||
2010 | fnhe = NULL; | 2063 | fnhe = NULL; |
@@ -2014,19 +2067,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2014 | struct fib_nh *nh = &FIB_RES_NH(*res); | 2067 | struct fib_nh *nh = &FIB_RES_NH(*res); |
2015 | 2068 | ||
2016 | fnhe = find_exception(nh, fl4->daddr); | 2069 | fnhe = find_exception(nh, fl4->daddr); |
2017 | if (fnhe) | 2070 | if (fnhe) { |
2018 | prth = &fnhe->fnhe_rth_output; | 2071 | prth = &fnhe->fnhe_rth_output; |
2019 | else { | 2072 | rth = rcu_dereference(*prth); |
2020 | if (unlikely(fl4->flowi4_flags & | 2073 | if (rth && rth->dst.expires && |
2021 | FLOWI_FLAG_KNOWN_NH && | 2074 | time_after(jiffies, rth->dst.expires)) { |
2022 | !(nh->nh_gw && | 2075 | ip_del_fnhe(nh, fl4->daddr); |
2023 | nh->nh_scope == RT_SCOPE_LINK))) { | 2076 | fnhe = NULL; |
2024 | do_cache = false; | 2077 | } else { |
2025 | goto add; | 2078 | goto rt_cache; |
2026 | } | 2079 | } |
2027 | prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); | ||
2028 | } | 2080 | } |
2081 | |||
2082 | if (unlikely(fl4->flowi4_flags & | ||
2083 | FLOWI_FLAG_KNOWN_NH && | ||
2084 | !(nh->nh_gw && | ||
2085 | nh->nh_scope == RT_SCOPE_LINK))) { | ||
2086 | do_cache = false; | ||
2087 | goto add; | ||
2088 | } | ||
2089 | prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); | ||
2029 | rth = rcu_dereference(*prth); | 2090 | rth = rcu_dereference(*prth); |
2091 | |||
2092 | rt_cache: | ||
2030 | if (rt_cache_valid(rth)) { | 2093 | if (rt_cache_valid(rth)) { |
2031 | dst_hold(&rth->dst); | 2094 | dst_hold(&rth->dst); |
2032 | return rth; | 2095 | return rth; |
@@ -2569,7 +2632,6 @@ void ip_rt_multicast_event(struct in_device *in_dev) | |||
2569 | } | 2632 | } |
2570 | 2633 | ||
2571 | #ifdef CONFIG_SYSCTL | 2634 | #ifdef CONFIG_SYSCTL |
2572 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; | ||
2573 | static int ip_rt_gc_interval __read_mostly = 60 * HZ; | 2635 | static int ip_rt_gc_interval __read_mostly = 60 * HZ; |
2574 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; | 2636 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; |
2575 | static int ip_rt_gc_elasticity __read_mostly = 8; | 2637 | static int ip_rt_gc_elasticity __read_mostly = 8; |