aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c262
1 files changed, 194 insertions, 68 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 96be336064fb..e4ab0ac94f92 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * ROUTE - implementation of the IP router. 6 * ROUTE - implementation of the IP router.
7 * 7 *
8 * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org> 10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
@@ -134,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
134 132
135static void rt_worker_func(struct work_struct *work); 133static void rt_worker_func(struct work_struct *work);
136static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); 134static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
137static struct timer_list rt_secret_timer;
138 135
139/* 136/*
140 * Interface to generic destination cache. 137 * Interface to generic destination cache.
@@ -253,20 +250,25 @@ static inline void rt_hash_lock_init(void)
253static struct rt_hash_bucket *rt_hash_table __read_mostly; 250static struct rt_hash_bucket *rt_hash_table __read_mostly;
254static unsigned rt_hash_mask __read_mostly; 251static unsigned rt_hash_mask __read_mostly;
255static unsigned int rt_hash_log __read_mostly; 252static unsigned int rt_hash_log __read_mostly;
256static atomic_t rt_genid __read_mostly;
257 253
258static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 254static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
259#define RT_CACHE_STAT_INC(field) \ 255#define RT_CACHE_STAT_INC(field) \
260 (__raw_get_cpu_var(rt_cache_stat).field++) 256 (__raw_get_cpu_var(rt_cache_stat).field++)
261 257
262static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx) 258static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
259 int genid)
263{ 260{
264 return jhash_3words((__force u32)(__be32)(daddr), 261 return jhash_3words((__force u32)(__be32)(daddr),
265 (__force u32)(__be32)(saddr), 262 (__force u32)(__be32)(saddr),
266 idx, atomic_read(&rt_genid)) 263 idx, genid)
267 & rt_hash_mask; 264 & rt_hash_mask;
268} 265}
269 266
267static inline int rt_genid(struct net *net)
268{
269 return atomic_read(&net->ipv4.rt_genid);
270}
271
270#ifdef CONFIG_PROC_FS 272#ifdef CONFIG_PROC_FS
271struct rt_cache_iter_state { 273struct rt_cache_iter_state {
272 struct seq_net_private p; 274 struct seq_net_private p;
@@ -336,7 +338,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
336 struct rt_cache_iter_state *st = seq->private; 338 struct rt_cache_iter_state *st = seq->private;
337 if (*pos) 339 if (*pos)
338 return rt_cache_get_idx(seq, *pos - 1); 340 return rt_cache_get_idx(seq, *pos - 1);
339 st->genid = atomic_read(&rt_genid); 341 st->genid = rt_genid(seq_file_net(seq));
340 return SEQ_START_TOKEN; 342 return SEQ_START_TOKEN;
341} 343}
342 344
@@ -683,6 +685,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
683 return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); 685 return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
684} 686}
685 687
688static inline int rt_is_expired(struct rtable *rth)
689{
690 return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
691}
692
686/* 693/*
687 * Perform a full scan of hash table and free all entries. 694 * Perform a full scan of hash table and free all entries.
688 * Can be called by a softirq or a process. 695 * Can be called by a softirq or a process.
@@ -692,6 +699,7 @@ static void rt_do_flush(int process_context)
692{ 699{
693 unsigned int i; 700 unsigned int i;
694 struct rtable *rth, *next; 701 struct rtable *rth, *next;
702 struct rtable * tail;
695 703
696 for (i = 0; i <= rt_hash_mask; i++) { 704 for (i = 0; i <= rt_hash_mask; i++) {
697 if (process_context && need_resched()) 705 if (process_context && need_resched())
@@ -701,11 +709,39 @@ static void rt_do_flush(int process_context)
701 continue; 709 continue;
702 710
703 spin_lock_bh(rt_hash_lock_addr(i)); 711 spin_lock_bh(rt_hash_lock_addr(i));
712#ifdef CONFIG_NET_NS
713 {
714 struct rtable ** prev, * p;
715
716 rth = rt_hash_table[i].chain;
717
718 /* defer releasing the head of the list after spin_unlock */
719 for (tail = rth; tail; tail = tail->u.dst.rt_next)
720 if (!rt_is_expired(tail))
721 break;
722 if (rth != tail)
723 rt_hash_table[i].chain = tail;
724
725 /* call rt_free on entries after the tail requiring flush */
726 prev = &rt_hash_table[i].chain;
727 for (p = *prev; p; p = next) {
728 next = p->u.dst.rt_next;
729 if (!rt_is_expired(p)) {
730 prev = &p->u.dst.rt_next;
731 } else {
732 *prev = next;
733 rt_free(p);
734 }
735 }
736 }
737#else
704 rth = rt_hash_table[i].chain; 738 rth = rt_hash_table[i].chain;
705 rt_hash_table[i].chain = NULL; 739 rt_hash_table[i].chain = NULL;
740 tail = NULL;
741#endif
706 spin_unlock_bh(rt_hash_lock_addr(i)); 742 spin_unlock_bh(rt_hash_lock_addr(i));
707 743
708 for (; rth; rth = next) { 744 for (; rth != tail; rth = next) {
709 next = rth->u.dst.rt_next; 745 next = rth->u.dst.rt_next;
710 rt_free(rth); 746 rt_free(rth);
711 } 747 }
@@ -738,7 +774,7 @@ static void rt_check_expire(void)
738 continue; 774 continue;
739 spin_lock_bh(rt_hash_lock_addr(i)); 775 spin_lock_bh(rt_hash_lock_addr(i));
740 while ((rth = *rthp) != NULL) { 776 while ((rth = *rthp) != NULL) {
741 if (rth->rt_genid != atomic_read(&rt_genid)) { 777 if (rt_is_expired(rth)) {
742 *rthp = rth->u.dst.rt_next; 778 *rthp = rth->u.dst.rt_next;
743 rt_free(rth); 779 rt_free(rth);
744 continue; 780 continue;
@@ -781,21 +817,21 @@ static void rt_worker_func(struct work_struct *work)
781 * many times (2^24) without giving recent rt_genid. 817 * many times (2^24) without giving recent rt_genid.
782 * Jenkins hash is strong enough that litle changes of rt_genid are OK. 818 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
783 */ 819 */
784static void rt_cache_invalidate(void) 820static void rt_cache_invalidate(struct net *net)
785{ 821{
786 unsigned char shuffle; 822 unsigned char shuffle;
787 823
788 get_random_bytes(&shuffle, sizeof(shuffle)); 824 get_random_bytes(&shuffle, sizeof(shuffle));
789 atomic_add(shuffle + 1U, &rt_genid); 825 atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
790} 826}
791 827
792/* 828/*
793 * delay < 0 : invalidate cache (fast : entries will be deleted later) 829 * delay < 0 : invalidate cache (fast : entries will be deleted later)
794 * delay >= 0 : invalidate & flush cache (can be long) 830 * delay >= 0 : invalidate & flush cache (can be long)
795 */ 831 */
796void rt_cache_flush(int delay) 832void rt_cache_flush(struct net *net, int delay)
797{ 833{
798 rt_cache_invalidate(); 834 rt_cache_invalidate(net);
799 if (delay >= 0) 835 if (delay >= 0)
800 rt_do_flush(!in_softirq()); 836 rt_do_flush(!in_softirq());
801} 837}
@@ -803,10 +839,11 @@ void rt_cache_flush(int delay)
803/* 839/*
804 * We change rt_genid and let gc do the cleanup 840 * We change rt_genid and let gc do the cleanup
805 */ 841 */
806static void rt_secret_rebuild(unsigned long dummy) 842static void rt_secret_rebuild(unsigned long __net)
807{ 843{
808 rt_cache_invalidate(); 844 struct net *net = (struct net *)__net;
809 mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); 845 rt_cache_invalidate(net);
846 mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
810} 847}
811 848
812/* 849/*
@@ -882,7 +919,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
882 rthp = &rt_hash_table[k].chain; 919 rthp = &rt_hash_table[k].chain;
883 spin_lock_bh(rt_hash_lock_addr(k)); 920 spin_lock_bh(rt_hash_lock_addr(k));
884 while ((rth = *rthp) != NULL) { 921 while ((rth = *rthp) != NULL) {
885 if (rth->rt_genid == atomic_read(&rt_genid) && 922 if (!rt_is_expired(rth) &&
886 !rt_may_expire(rth, tmo, expire)) { 923 !rt_may_expire(rth, tmo, expire)) {
887 tmo >>= 1; 924 tmo >>= 1;
888 rthp = &rth->u.dst.rt_next; 925 rthp = &rth->u.dst.rt_next;
@@ -964,7 +1001,7 @@ restart:
964 1001
965 spin_lock_bh(rt_hash_lock_addr(hash)); 1002 spin_lock_bh(rt_hash_lock_addr(hash));
966 while ((rth = *rthp) != NULL) { 1003 while ((rth = *rthp) != NULL) {
967 if (rth->rt_genid != atomic_read(&rt_genid)) { 1004 if (rt_is_expired(rth)) {
968 *rthp = rth->u.dst.rt_next; 1005 *rthp = rth->u.dst.rt_next;
969 rt_free(rth); 1006 rt_free(rth);
970 continue; 1007 continue;
@@ -1140,7 +1177,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
1140 spin_lock_bh(rt_hash_lock_addr(hash)); 1177 spin_lock_bh(rt_hash_lock_addr(hash));
1141 ip_rt_put(rt); 1178 ip_rt_put(rt);
1142 while ((aux = *rthp) != NULL) { 1179 while ((aux = *rthp) != NULL) {
1143 if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { 1180 if (aux == rt || rt_is_expired(aux)) {
1144 *rthp = aux->u.dst.rt_next; 1181 *rthp = aux->u.dst.rt_next;
1145 rt_free(aux); 1182 rt_free(aux);
1146 continue; 1183 continue;
@@ -1182,7 +1219,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1182 1219
1183 for (i = 0; i < 2; i++) { 1220 for (i = 0; i < 2; i++) {
1184 for (k = 0; k < 2; k++) { 1221 for (k = 0; k < 2; k++) {
1185 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); 1222 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1223 rt_genid(net));
1186 1224
1187 rthp=&rt_hash_table[hash].chain; 1225 rthp=&rt_hash_table[hash].chain;
1188 1226
@@ -1194,7 +1232,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1194 rth->fl.fl4_src != skeys[i] || 1232 rth->fl.fl4_src != skeys[i] ||
1195 rth->fl.oif != ikeys[k] || 1233 rth->fl.oif != ikeys[k] ||
1196 rth->fl.iif != 0 || 1234 rth->fl.iif != 0 ||
1197 rth->rt_genid != atomic_read(&rt_genid) || 1235 rt_is_expired(rth) ||
1198 !net_eq(dev_net(rth->u.dst.dev), net)) { 1236 !net_eq(dev_net(rth->u.dst.dev), net)) {
1199 rthp = &rth->u.dst.rt_next; 1237 rthp = &rth->u.dst.rt_next;
1200 continue; 1238 continue;
@@ -1233,7 +1271,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1233 rt->u.dst.neighbour = NULL; 1271 rt->u.dst.neighbour = NULL;
1234 rt->u.dst.hh = NULL; 1272 rt->u.dst.hh = NULL;
1235 rt->u.dst.xfrm = NULL; 1273 rt->u.dst.xfrm = NULL;
1236 rt->rt_genid = atomic_read(&rt_genid); 1274 rt->rt_genid = rt_genid(net);
1237 rt->rt_flags |= RTCF_REDIRECTED; 1275 rt->rt_flags |= RTCF_REDIRECTED;
1238 1276
1239 /* Gateway is different ... */ 1277 /* Gateway is different ... */
@@ -1297,7 +1335,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1297 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1335 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1298 rt->u.dst.expires) { 1336 rt->u.dst.expires) {
1299 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1337 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1300 rt->fl.oif); 1338 rt->fl.oif,
1339 rt_genid(dev_net(dst->dev)));
1301#if RT_CACHE_DEBUG >= 1 1340#if RT_CACHE_DEBUG >= 1
1302 printk(KERN_DEBUG "ipv4_negative_advice: redirect to " 1341 printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
1303 NIPQUAD_FMT "/%02x dropped\n", 1342 NIPQUAD_FMT "/%02x dropped\n",
@@ -1390,7 +1429,8 @@ static int ip_error(struct sk_buff *skb)
1390 break; 1429 break;
1391 case ENETUNREACH: 1430 case ENETUNREACH:
1392 code = ICMP_NET_UNREACH; 1431 code = ICMP_NET_UNREACH;
1393 IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); 1432 IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
1433 IPSTATS_MIB_INNOROUTES);
1394 break; 1434 break;
1395 case EACCES: 1435 case EACCES:
1396 code = ICMP_PKT_FILTERED; 1436 code = ICMP_PKT_FILTERED;
@@ -1446,7 +1486,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1446 1486
1447 for (k = 0; k < 2; k++) { 1487 for (k = 0; k < 2; k++) {
1448 for (i = 0; i < 2; i++) { 1488 for (i = 0; i < 2; i++) {
1449 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); 1489 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1490 rt_genid(net));
1450 1491
1451 rcu_read_lock(); 1492 rcu_read_lock();
1452 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1493 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1461,7 +1502,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1461 rth->fl.iif != 0 || 1502 rth->fl.iif != 0 ||
1462 dst_metric_locked(&rth->u.dst, RTAX_MTU) || 1503 dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
1463 !net_eq(dev_net(rth->u.dst.dev), net) || 1504 !net_eq(dev_net(rth->u.dst.dev), net) ||
1464 rth->rt_genid != atomic_read(&rt_genid)) 1505 !rt_is_expired(rth))
1465 continue; 1506 continue;
1466 1507
1467 if (new_mtu < 68 || new_mtu >= old_mtu) { 1508 if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -1696,7 +1737,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1696 rth->fl.oif = 0; 1737 rth->fl.oif = 0;
1697 rth->rt_gateway = daddr; 1738 rth->rt_gateway = daddr;
1698 rth->rt_spec_dst= spec_dst; 1739 rth->rt_spec_dst= spec_dst;
1699 rth->rt_genid = atomic_read(&rt_genid); 1740 rth->rt_genid = rt_genid(dev_net(dev));
1700 rth->rt_flags = RTCF_MULTICAST; 1741 rth->rt_flags = RTCF_MULTICAST;
1701 rth->rt_type = RTN_MULTICAST; 1742 rth->rt_type = RTN_MULTICAST;
1702 if (our) { 1743 if (our) {
@@ -1711,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1711 RT_CACHE_STAT_INC(in_slow_mc); 1752 RT_CACHE_STAT_INC(in_slow_mc);
1712 1753
1713 in_dev_put(in_dev); 1754 in_dev_put(in_dev);
1714 hash = rt_hash(daddr, saddr, dev->ifindex); 1755 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1715 return rt_intern_hash(hash, rth, &skb->rtable); 1756 return rt_intern_hash(hash, rth, &skb->rtable);
1716 1757
1717e_nobufs: 1758e_nobufs:
@@ -1837,7 +1878,7 @@ static int __mkroute_input(struct sk_buff *skb,
1837 1878
1838 rth->u.dst.input = ip_forward; 1879 rth->u.dst.input = ip_forward;
1839 rth->u.dst.output = ip_output; 1880 rth->u.dst.output = ip_output;
1840 rth->rt_genid = atomic_read(&rt_genid); 1881 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
1841 1882
1842 rt_set_nexthop(rth, res, itag); 1883 rt_set_nexthop(rth, res, itag);
1843 1884
@@ -1872,7 +1913,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
1872 return err; 1913 return err;
1873 1914
1874 /* put it into the cache */ 1915 /* put it into the cache */
1875 hash = rt_hash(daddr, saddr, fl->iif); 1916 hash = rt_hash(daddr, saddr, fl->iif,
1917 rt_genid(dev_net(rth->u.dst.dev)));
1876 return rt_intern_hash(hash, rth, &skb->rtable); 1918 return rt_intern_hash(hash, rth, &skb->rtable);
1877} 1919}
1878 1920
@@ -1998,7 +2040,7 @@ local_input:
1998 goto e_nobufs; 2040 goto e_nobufs;
1999 2041
2000 rth->u.dst.output= ip_rt_bug; 2042 rth->u.dst.output= ip_rt_bug;
2001 rth->rt_genid = atomic_read(&rt_genid); 2043 rth->rt_genid = rt_genid(net);
2002 2044
2003 atomic_set(&rth->u.dst.__refcnt, 1); 2045 atomic_set(&rth->u.dst.__refcnt, 1);
2004 rth->u.dst.flags= DST_HOST; 2046 rth->u.dst.flags= DST_HOST;
@@ -2028,7 +2070,7 @@ local_input:
2028 rth->rt_flags &= ~RTCF_LOCAL; 2070 rth->rt_flags &= ~RTCF_LOCAL;
2029 } 2071 }
2030 rth->rt_type = res.type; 2072 rth->rt_type = res.type;
2031 hash = rt_hash(daddr, saddr, fl.iif); 2073 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2032 err = rt_intern_hash(hash, rth, &skb->rtable); 2074 err = rt_intern_hash(hash, rth, &skb->rtable);
2033 goto done; 2075 goto done;
2034 2076
@@ -2079,7 +2121,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2079 2121
2080 net = dev_net(dev); 2122 net = dev_net(dev);
2081 tos &= IPTOS_RT_MASK; 2123 tos &= IPTOS_RT_MASK;
2082 hash = rt_hash(daddr, saddr, iif); 2124 hash = rt_hash(daddr, saddr, iif, rt_genid(net));
2083 2125
2084 rcu_read_lock(); 2126 rcu_read_lock();
2085 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2127 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2091,7 +2133,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2091 (rth->fl.fl4_tos ^ tos)) == 0 && 2133 (rth->fl.fl4_tos ^ tos)) == 0 &&
2092 rth->fl.mark == skb->mark && 2134 rth->fl.mark == skb->mark &&
2093 net_eq(dev_net(rth->u.dst.dev), net) && 2135 net_eq(dev_net(rth->u.dst.dev), net) &&
2094 rth->rt_genid == atomic_read(&rt_genid)) { 2136 !rt_is_expired(rth)) {
2095 dst_use(&rth->u.dst, jiffies); 2137 dst_use(&rth->u.dst, jiffies);
2096 RT_CACHE_STAT_INC(in_hit); 2138 RT_CACHE_STAT_INC(in_hit);
2097 rcu_read_unlock(); 2139 rcu_read_unlock();
@@ -2219,7 +2261,7 @@ static int __mkroute_output(struct rtable **result,
2219 rth->rt_spec_dst= fl->fl4_src; 2261 rth->rt_spec_dst= fl->fl4_src;
2220 2262
2221 rth->u.dst.output=ip_output; 2263 rth->u.dst.output=ip_output;
2222 rth->rt_genid = atomic_read(&rt_genid); 2264 rth->rt_genid = rt_genid(dev_net(dev_out));
2223 2265
2224 RT_CACHE_STAT_INC(out_slow_tot); 2266 RT_CACHE_STAT_INC(out_slow_tot);
2225 2267
@@ -2268,7 +2310,8 @@ static int ip_mkroute_output(struct rtable **rp,
2268 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2310 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2269 unsigned hash; 2311 unsigned hash;
2270 if (err == 0) { 2312 if (err == 0) {
2271 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); 2313 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2314 rt_genid(dev_net(dev_out)));
2272 err = rt_intern_hash(hash, rth, rp); 2315 err = rt_intern_hash(hash, rth, rp);
2273 } 2316 }
2274 2317
@@ -2480,7 +2523,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2480 unsigned hash; 2523 unsigned hash;
2481 struct rtable *rth; 2524 struct rtable *rth;
2482 2525
2483 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); 2526 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
2484 2527
2485 rcu_read_lock_bh(); 2528 rcu_read_lock_bh();
2486 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2529 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2493,7 +2536,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2493 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2536 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2494 (IPTOS_RT_MASK | RTO_ONLINK)) && 2537 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2495 net_eq(dev_net(rth->u.dst.dev), net) && 2538 net_eq(dev_net(rth->u.dst.dev), net) &&
2496 rth->rt_genid == atomic_read(&rt_genid)) { 2539 !rt_is_expired(rth)) {
2497 dst_use(&rth->u.dst, jiffies); 2540 dst_use(&rth->u.dst, jiffies);
2498 RT_CACHE_STAT_INC(out_hit); 2541 RT_CACHE_STAT_INC(out_hit);
2499 rcu_read_unlock_bh(); 2542 rcu_read_unlock_bh();
@@ -2524,7 +2567,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2524}; 2567};
2525 2568
2526 2569
2527static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) 2570static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
2528{ 2571{
2529 struct rtable *ort = *rp; 2572 struct rtable *ort = *rp;
2530 struct rtable *rt = (struct rtable *) 2573 struct rtable *rt = (struct rtable *)
@@ -2548,7 +2591,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
2548 rt->idev = ort->idev; 2591 rt->idev = ort->idev;
2549 if (rt->idev) 2592 if (rt->idev)
2550 in_dev_hold(rt->idev); 2593 in_dev_hold(rt->idev);
2551 rt->rt_genid = atomic_read(&rt_genid); 2594 rt->rt_genid = rt_genid(net);
2552 rt->rt_flags = ort->rt_flags; 2595 rt->rt_flags = ort->rt_flags;
2553 rt->rt_type = ort->rt_type; 2596 rt->rt_type = ort->rt_type;
2554 rt->rt_dst = ort->rt_dst; 2597 rt->rt_dst = ort->rt_dst;
@@ -2584,7 +2627,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2584 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, 2627 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
2585 flags ? XFRM_LOOKUP_WAIT : 0); 2628 flags ? XFRM_LOOKUP_WAIT : 0);
2586 if (err == -EREMOTE) 2629 if (err == -EREMOTE)
2587 err = ipv4_dst_blackhole(rp, flp); 2630 err = ipv4_dst_blackhole(net, rp, flp);
2588 2631
2589 return err; 2632 return err;
2590 } 2633 }
@@ -2803,7 +2846,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2803 rt = rcu_dereference(rt->u.dst.rt_next), idx++) { 2846 rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2804 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) 2847 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
2805 continue; 2848 continue;
2806 if (rt->rt_genid != atomic_read(&rt_genid)) 2849 if (rt_is_expired(rt))
2807 continue; 2850 continue;
2808 skb->dst = dst_clone(&rt->u.dst); 2851 skb->dst = dst_clone(&rt->u.dst);
2809 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2852 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
@@ -2827,19 +2870,25 @@ done:
2827 2870
2828void ip_rt_multicast_event(struct in_device *in_dev) 2871void ip_rt_multicast_event(struct in_device *in_dev)
2829{ 2872{
2830 rt_cache_flush(0); 2873 rt_cache_flush(dev_net(in_dev->dev), 0);
2831} 2874}
2832 2875
2833#ifdef CONFIG_SYSCTL 2876#ifdef CONFIG_SYSCTL
2834static int flush_delay; 2877static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
2835
2836static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
2837 struct file *filp, void __user *buffer, 2878 struct file *filp, void __user *buffer,
2838 size_t *lenp, loff_t *ppos) 2879 size_t *lenp, loff_t *ppos)
2839{ 2880{
2840 if (write) { 2881 if (write) {
2841 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2882 int flush_delay;
2842 rt_cache_flush(flush_delay); 2883 ctl_table ctl;
2884 struct net *net;
2885
2886 memcpy(&ctl, __ctl, sizeof(ctl));
2887 ctl.data = &flush_delay;
2888 proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
2889
2890 net = (struct net *)__ctl->extra1;
2891 rt_cache_flush(net, flush_delay);
2843 return 0; 2892 return 0;
2844 } 2893 }
2845 2894
@@ -2855,25 +2904,18 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
2855 size_t newlen) 2904 size_t newlen)
2856{ 2905{
2857 int delay; 2906 int delay;
2907 struct net *net;
2858 if (newlen != sizeof(int)) 2908 if (newlen != sizeof(int))
2859 return -EINVAL; 2909 return -EINVAL;
2860 if (get_user(delay, (int __user *)newval)) 2910 if (get_user(delay, (int __user *)newval))
2861 return -EFAULT; 2911 return -EFAULT;
2862 rt_cache_flush(delay); 2912 net = (struct net *)table->extra1;
2913 rt_cache_flush(net, delay);
2863 return 0; 2914 return 0;
2864} 2915}
2865 2916
2866ctl_table ipv4_route_table[] = { 2917ctl_table ipv4_route_table[] = {
2867 { 2918 {
2868 .ctl_name = NET_IPV4_ROUTE_FLUSH,
2869 .procname = "flush",
2870 .data = &flush_delay,
2871 .maxlen = sizeof(int),
2872 .mode = 0200,
2873 .proc_handler = &ipv4_sysctl_rtcache_flush,
2874 .strategy = &ipv4_sysctl_rtcache_flush_strategy,
2875 },
2876 {
2877 .ctl_name = NET_IPV4_ROUTE_GC_THRESH, 2919 .ctl_name = NET_IPV4_ROUTE_GC_THRESH,
2878 .procname = "gc_thresh", 2920 .procname = "gc_thresh",
2879 .data = &ipv4_dst_ops.gc_thresh, 2921 .data = &ipv4_dst_ops.gc_thresh,
@@ -3011,8 +3053,97 @@ ctl_table ipv4_route_table[] = {
3011 }, 3053 },
3012 { .ctl_name = 0 } 3054 { .ctl_name = 0 }
3013}; 3055};
3056
3057static __net_initdata struct ctl_path ipv4_route_path[] = {
3058 { .procname = "net", .ctl_name = CTL_NET, },
3059 { .procname = "ipv4", .ctl_name = NET_IPV4, },
3060 { .procname = "route", .ctl_name = NET_IPV4_ROUTE, },
3061 { },
3062};
3063
3064
3065static struct ctl_table ipv4_route_flush_table[] = {
3066 {
3067 .ctl_name = NET_IPV4_ROUTE_FLUSH,
3068 .procname = "flush",
3069 .maxlen = sizeof(int),
3070 .mode = 0200,
3071 .proc_handler = &ipv4_sysctl_rtcache_flush,
3072 .strategy = &ipv4_sysctl_rtcache_flush_strategy,
3073 },
3074 { .ctl_name = 0 },
3075};
3076
3077static __net_init int sysctl_route_net_init(struct net *net)
3078{
3079 struct ctl_table *tbl;
3080
3081 tbl = ipv4_route_flush_table;
3082 if (net != &init_net) {
3083 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
3084 if (tbl == NULL)
3085 goto err_dup;
3086 }
3087 tbl[0].extra1 = net;
3088
3089 net->ipv4.route_hdr =
3090 register_net_sysctl_table(net, ipv4_route_path, tbl);
3091 if (net->ipv4.route_hdr == NULL)
3092 goto err_reg;
3093 return 0;
3094
3095err_reg:
3096 if (tbl != ipv4_route_flush_table)
3097 kfree(tbl);
3098err_dup:
3099 return -ENOMEM;
3100}
3101
3102static __net_exit void sysctl_route_net_exit(struct net *net)
3103{
3104 struct ctl_table *tbl;
3105
3106 tbl = net->ipv4.route_hdr->ctl_table_arg;
3107 unregister_net_sysctl_table(net->ipv4.route_hdr);
3108 BUG_ON(tbl == ipv4_route_flush_table);
3109 kfree(tbl);
3110}
3111
3112static __net_initdata struct pernet_operations sysctl_route_ops = {
3113 .init = sysctl_route_net_init,
3114 .exit = sysctl_route_net_exit,
3115};
3014#endif 3116#endif
3015 3117
3118
3119static __net_init int rt_secret_timer_init(struct net *net)
3120{
3121 atomic_set(&net->ipv4.rt_genid,
3122 (int) ((num_physpages ^ (num_physpages>>8)) ^
3123 (jiffies ^ (jiffies >> 7))));
3124
3125 net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
3126 net->ipv4.rt_secret_timer.data = (unsigned long)net;
3127 init_timer_deferrable(&net->ipv4.rt_secret_timer);
3128
3129 net->ipv4.rt_secret_timer.expires =
3130 jiffies + net_random() % ip_rt_secret_interval +
3131 ip_rt_secret_interval;
3132 add_timer(&net->ipv4.rt_secret_timer);
3133 return 0;
3134}
3135
3136static __net_exit void rt_secret_timer_exit(struct net *net)
3137{
3138 del_timer_sync(&net->ipv4.rt_secret_timer);
3139}
3140
3141static __net_initdata struct pernet_operations rt_secret_timer_ops = {
3142 .init = rt_secret_timer_init,
3143 .exit = rt_secret_timer_exit,
3144};
3145
3146
3016#ifdef CONFIG_NET_CLS_ROUTE 3147#ifdef CONFIG_NET_CLS_ROUTE
3017struct ip_rt_acct *ip_rt_acct __read_mostly; 3148struct ip_rt_acct *ip_rt_acct __read_mostly;
3018#endif /* CONFIG_NET_CLS_ROUTE */ 3149#endif /* CONFIG_NET_CLS_ROUTE */
@@ -3031,9 +3162,6 @@ int __init ip_rt_init(void)
3031{ 3162{
3032 int rc = 0; 3163 int rc = 0;
3033 3164
3034 atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
3035 (jiffies ^ (jiffies >> 7))));
3036
3037#ifdef CONFIG_NET_CLS_ROUTE 3165#ifdef CONFIG_NET_CLS_ROUTE
3038 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); 3166 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
3039 if (!ip_rt_acct) 3167 if (!ip_rt_acct)
@@ -3065,19 +3193,14 @@ int __init ip_rt_init(void)
3065 devinet_init(); 3193 devinet_init();
3066 ip_fib_init(); 3194 ip_fib_init();
3067 3195
3068 rt_secret_timer.function = rt_secret_rebuild;
3069 rt_secret_timer.data = 0;
3070 init_timer_deferrable(&rt_secret_timer);
3071
3072 /* All the timers, started at system startup tend 3196 /* All the timers, started at system startup tend
3073 to synchronize. Perturb it a bit. 3197 to synchronize. Perturb it a bit.
3074 */ 3198 */
3075 schedule_delayed_work(&expires_work, 3199 schedule_delayed_work(&expires_work,
3076 net_random() % ip_rt_gc_interval + ip_rt_gc_interval); 3200 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3077 3201
3078 rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + 3202 if (register_pernet_subsys(&rt_secret_timer_ops))
3079 ip_rt_secret_interval; 3203 printk(KERN_ERR "Unable to setup rt_secret_timer\n");
3080 add_timer(&rt_secret_timer);
3081 3204
3082 if (ip_rt_proc_init()) 3205 if (ip_rt_proc_init())
3083 printk(KERN_ERR "Unable to create route proc files\n"); 3206 printk(KERN_ERR "Unable to create route proc files\n");
@@ -3087,6 +3210,9 @@ int __init ip_rt_init(void)
3087#endif 3210#endif
3088 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); 3211 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
3089 3212
3213#ifdef CONFIG_SYSCTL
3214 register_pernet_subsys(&sysctl_route_ops);
3215#endif
3090 return rc; 3216 return rc;
3091} 3217}
3092 3218