diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 262 |
1 files changed, 194 insertions, 68 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 96be336064fb..e4ab0ac94f92 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -5,8 +5,6 @@ | |||
5 | * | 5 | * |
6 | * ROUTE - implementation of the IP router. | 6 | * ROUTE - implementation of the IP router. |
7 | * | 7 | * |
8 | * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $ | ||
9 | * | ||
10 | * Authors: Ross Biro | 8 | * Authors: Ross Biro |
11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 9 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
12 | * Alan Cox, <gw4pts@gw4pts.ampr.org> | 10 | * Alan Cox, <gw4pts@gw4pts.ampr.org> |
@@ -134,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; | |||
134 | 132 | ||
135 | static void rt_worker_func(struct work_struct *work); | 133 | static void rt_worker_func(struct work_struct *work); |
136 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); | 134 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); |
137 | static struct timer_list rt_secret_timer; | ||
138 | 135 | ||
139 | /* | 136 | /* |
140 | * Interface to generic destination cache. | 137 | * Interface to generic destination cache. |
@@ -253,20 +250,25 @@ static inline void rt_hash_lock_init(void) | |||
253 | static struct rt_hash_bucket *rt_hash_table __read_mostly; | 250 | static struct rt_hash_bucket *rt_hash_table __read_mostly; |
254 | static unsigned rt_hash_mask __read_mostly; | 251 | static unsigned rt_hash_mask __read_mostly; |
255 | static unsigned int rt_hash_log __read_mostly; | 252 | static unsigned int rt_hash_log __read_mostly; |
256 | static atomic_t rt_genid __read_mostly; | ||
257 | 253 | ||
258 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 254 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
259 | #define RT_CACHE_STAT_INC(field) \ | 255 | #define RT_CACHE_STAT_INC(field) \ |
260 | (__raw_get_cpu_var(rt_cache_stat).field++) | 256 | (__raw_get_cpu_var(rt_cache_stat).field++) |
261 | 257 | ||
262 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx) | 258 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, |
259 | int genid) | ||
263 | { | 260 | { |
264 | return jhash_3words((__force u32)(__be32)(daddr), | 261 | return jhash_3words((__force u32)(__be32)(daddr), |
265 | (__force u32)(__be32)(saddr), | 262 | (__force u32)(__be32)(saddr), |
266 | idx, atomic_read(&rt_genid)) | 263 | idx, genid) |
267 | & rt_hash_mask; | 264 | & rt_hash_mask; |
268 | } | 265 | } |
269 | 266 | ||
267 | static inline int rt_genid(struct net *net) | ||
268 | { | ||
269 | return atomic_read(&net->ipv4.rt_genid); | ||
270 | } | ||
271 | |||
270 | #ifdef CONFIG_PROC_FS | 272 | #ifdef CONFIG_PROC_FS |
271 | struct rt_cache_iter_state { | 273 | struct rt_cache_iter_state { |
272 | struct seq_net_private p; | 274 | struct seq_net_private p; |
@@ -336,7 +338,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | |||
336 | struct rt_cache_iter_state *st = seq->private; | 338 | struct rt_cache_iter_state *st = seq->private; |
337 | if (*pos) | 339 | if (*pos) |
338 | return rt_cache_get_idx(seq, *pos - 1); | 340 | return rt_cache_get_idx(seq, *pos - 1); |
339 | st->genid = atomic_read(&rt_genid); | 341 | st->genid = rt_genid(seq_file_net(seq)); |
340 | return SEQ_START_TOKEN; | 342 | return SEQ_START_TOKEN; |
341 | } | 343 | } |
342 | 344 | ||
@@ -683,6 +685,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | |||
683 | return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); | 685 | return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); |
684 | } | 686 | } |
685 | 687 | ||
688 | static inline int rt_is_expired(struct rtable *rth) | ||
689 | { | ||
690 | return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); | ||
691 | } | ||
692 | |||
686 | /* | 693 | /* |
687 | * Perform a full scan of hash table and free all entries. | 694 | * Perform a full scan of hash table and free all entries. |
688 | * Can be called by a softirq or a process. | 695 | * Can be called by a softirq or a process. |
@@ -692,6 +699,7 @@ static void rt_do_flush(int process_context) | |||
692 | { | 699 | { |
693 | unsigned int i; | 700 | unsigned int i; |
694 | struct rtable *rth, *next; | 701 | struct rtable *rth, *next; |
702 | struct rtable * tail; | ||
695 | 703 | ||
696 | for (i = 0; i <= rt_hash_mask; i++) { | 704 | for (i = 0; i <= rt_hash_mask; i++) { |
697 | if (process_context && need_resched()) | 705 | if (process_context && need_resched()) |
@@ -701,11 +709,39 @@ static void rt_do_flush(int process_context) | |||
701 | continue; | 709 | continue; |
702 | 710 | ||
703 | spin_lock_bh(rt_hash_lock_addr(i)); | 711 | spin_lock_bh(rt_hash_lock_addr(i)); |
712 | #ifdef CONFIG_NET_NS | ||
713 | { | ||
714 | struct rtable ** prev, * p; | ||
715 | |||
716 | rth = rt_hash_table[i].chain; | ||
717 | |||
718 | /* defer releasing the head of the list after spin_unlock */ | ||
719 | for (tail = rth; tail; tail = tail->u.dst.rt_next) | ||
720 | if (!rt_is_expired(tail)) | ||
721 | break; | ||
722 | if (rth != tail) | ||
723 | rt_hash_table[i].chain = tail; | ||
724 | |||
725 | /* call rt_free on entries after the tail requiring flush */ | ||
726 | prev = &rt_hash_table[i].chain; | ||
727 | for (p = *prev; p; p = next) { | ||
728 | next = p->u.dst.rt_next; | ||
729 | if (!rt_is_expired(p)) { | ||
730 | prev = &p->u.dst.rt_next; | ||
731 | } else { | ||
732 | *prev = next; | ||
733 | rt_free(p); | ||
734 | } | ||
735 | } | ||
736 | } | ||
737 | #else | ||
704 | rth = rt_hash_table[i].chain; | 738 | rth = rt_hash_table[i].chain; |
705 | rt_hash_table[i].chain = NULL; | 739 | rt_hash_table[i].chain = NULL; |
740 | tail = NULL; | ||
741 | #endif | ||
706 | spin_unlock_bh(rt_hash_lock_addr(i)); | 742 | spin_unlock_bh(rt_hash_lock_addr(i)); |
707 | 743 | ||
708 | for (; rth; rth = next) { | 744 | for (; rth != tail; rth = next) { |
709 | next = rth->u.dst.rt_next; | 745 | next = rth->u.dst.rt_next; |
710 | rt_free(rth); | 746 | rt_free(rth); |
711 | } | 747 | } |
@@ -738,7 +774,7 @@ static void rt_check_expire(void) | |||
738 | continue; | 774 | continue; |
739 | spin_lock_bh(rt_hash_lock_addr(i)); | 775 | spin_lock_bh(rt_hash_lock_addr(i)); |
740 | while ((rth = *rthp) != NULL) { | 776 | while ((rth = *rthp) != NULL) { |
741 | if (rth->rt_genid != atomic_read(&rt_genid)) { | 777 | if (rt_is_expired(rth)) { |
742 | *rthp = rth->u.dst.rt_next; | 778 | *rthp = rth->u.dst.rt_next; |
743 | rt_free(rth); | 779 | rt_free(rth); |
744 | continue; | 780 | continue; |
@@ -781,21 +817,21 @@ static void rt_worker_func(struct work_struct *work) | |||
781 | * many times (2^24) without giving recent rt_genid. | 817 | * many times (2^24) without giving recent rt_genid. |
782 | * Jenkins hash is strong enough that litle changes of rt_genid are OK. | 818 | * Jenkins hash is strong enough that litle changes of rt_genid are OK. |
783 | */ | 819 | */ |
784 | static void rt_cache_invalidate(void) | 820 | static void rt_cache_invalidate(struct net *net) |
785 | { | 821 | { |
786 | unsigned char shuffle; | 822 | unsigned char shuffle; |
787 | 823 | ||
788 | get_random_bytes(&shuffle, sizeof(shuffle)); | 824 | get_random_bytes(&shuffle, sizeof(shuffle)); |
789 | atomic_add(shuffle + 1U, &rt_genid); | 825 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); |
790 | } | 826 | } |
791 | 827 | ||
792 | /* | 828 | /* |
793 | * delay < 0 : invalidate cache (fast : entries will be deleted later) | 829 | * delay < 0 : invalidate cache (fast : entries will be deleted later) |
794 | * delay >= 0 : invalidate & flush cache (can be long) | 830 | * delay >= 0 : invalidate & flush cache (can be long) |
795 | */ | 831 | */ |
796 | void rt_cache_flush(int delay) | 832 | void rt_cache_flush(struct net *net, int delay) |
797 | { | 833 | { |
798 | rt_cache_invalidate(); | 834 | rt_cache_invalidate(net); |
799 | if (delay >= 0) | 835 | if (delay >= 0) |
800 | rt_do_flush(!in_softirq()); | 836 | rt_do_flush(!in_softirq()); |
801 | } | 837 | } |
@@ -803,10 +839,11 @@ void rt_cache_flush(int delay) | |||
803 | /* | 839 | /* |
804 | * We change rt_genid and let gc do the cleanup | 840 | * We change rt_genid and let gc do the cleanup |
805 | */ | 841 | */ |
806 | static void rt_secret_rebuild(unsigned long dummy) | 842 | static void rt_secret_rebuild(unsigned long __net) |
807 | { | 843 | { |
808 | rt_cache_invalidate(); | 844 | struct net *net = (struct net *)__net; |
809 | mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); | 845 | rt_cache_invalidate(net); |
846 | mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); | ||
810 | } | 847 | } |
811 | 848 | ||
812 | /* | 849 | /* |
@@ -882,7 +919,7 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
882 | rthp = &rt_hash_table[k].chain; | 919 | rthp = &rt_hash_table[k].chain; |
883 | spin_lock_bh(rt_hash_lock_addr(k)); | 920 | spin_lock_bh(rt_hash_lock_addr(k)); |
884 | while ((rth = *rthp) != NULL) { | 921 | while ((rth = *rthp) != NULL) { |
885 | if (rth->rt_genid == atomic_read(&rt_genid) && | 922 | if (!rt_is_expired(rth) && |
886 | !rt_may_expire(rth, tmo, expire)) { | 923 | !rt_may_expire(rth, tmo, expire)) { |
887 | tmo >>= 1; | 924 | tmo >>= 1; |
888 | rthp = &rth->u.dst.rt_next; | 925 | rthp = &rth->u.dst.rt_next; |
@@ -964,7 +1001,7 @@ restart: | |||
964 | 1001 | ||
965 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1002 | spin_lock_bh(rt_hash_lock_addr(hash)); |
966 | while ((rth = *rthp) != NULL) { | 1003 | while ((rth = *rthp) != NULL) { |
967 | if (rth->rt_genid != atomic_read(&rt_genid)) { | 1004 | if (rt_is_expired(rth)) { |
968 | *rthp = rth->u.dst.rt_next; | 1005 | *rthp = rth->u.dst.rt_next; |
969 | rt_free(rth); | 1006 | rt_free(rth); |
970 | continue; | 1007 | continue; |
@@ -1140,7 +1177,7 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1140 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1177 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1141 | ip_rt_put(rt); | 1178 | ip_rt_put(rt); |
1142 | while ((aux = *rthp) != NULL) { | 1179 | while ((aux = *rthp) != NULL) { |
1143 | if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { | 1180 | if (aux == rt || rt_is_expired(aux)) { |
1144 | *rthp = aux->u.dst.rt_next; | 1181 | *rthp = aux->u.dst.rt_next; |
1145 | rt_free(aux); | 1182 | rt_free(aux); |
1146 | continue; | 1183 | continue; |
@@ -1182,7 +1219,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1182 | 1219 | ||
1183 | for (i = 0; i < 2; i++) { | 1220 | for (i = 0; i < 2; i++) { |
1184 | for (k = 0; k < 2; k++) { | 1221 | for (k = 0; k < 2; k++) { |
1185 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); | 1222 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], |
1223 | rt_genid(net)); | ||
1186 | 1224 | ||
1187 | rthp=&rt_hash_table[hash].chain; | 1225 | rthp=&rt_hash_table[hash].chain; |
1188 | 1226 | ||
@@ -1194,7 +1232,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1194 | rth->fl.fl4_src != skeys[i] || | 1232 | rth->fl.fl4_src != skeys[i] || |
1195 | rth->fl.oif != ikeys[k] || | 1233 | rth->fl.oif != ikeys[k] || |
1196 | rth->fl.iif != 0 || | 1234 | rth->fl.iif != 0 || |
1197 | rth->rt_genid != atomic_read(&rt_genid) || | 1235 | rt_is_expired(rth) || |
1198 | !net_eq(dev_net(rth->u.dst.dev), net)) { | 1236 | !net_eq(dev_net(rth->u.dst.dev), net)) { |
1199 | rthp = &rth->u.dst.rt_next; | 1237 | rthp = &rth->u.dst.rt_next; |
1200 | continue; | 1238 | continue; |
@@ -1233,7 +1271,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1233 | rt->u.dst.neighbour = NULL; | 1271 | rt->u.dst.neighbour = NULL; |
1234 | rt->u.dst.hh = NULL; | 1272 | rt->u.dst.hh = NULL; |
1235 | rt->u.dst.xfrm = NULL; | 1273 | rt->u.dst.xfrm = NULL; |
1236 | rt->rt_genid = atomic_read(&rt_genid); | 1274 | rt->rt_genid = rt_genid(net); |
1237 | rt->rt_flags |= RTCF_REDIRECTED; | 1275 | rt->rt_flags |= RTCF_REDIRECTED; |
1238 | 1276 | ||
1239 | /* Gateway is different ... */ | 1277 | /* Gateway is different ... */ |
@@ -1297,7 +1335,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1297 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1335 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || |
1298 | rt->u.dst.expires) { | 1336 | rt->u.dst.expires) { |
1299 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1337 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, |
1300 | rt->fl.oif); | 1338 | rt->fl.oif, |
1339 | rt_genid(dev_net(dst->dev))); | ||
1301 | #if RT_CACHE_DEBUG >= 1 | 1340 | #if RT_CACHE_DEBUG >= 1 |
1302 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to " | 1341 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to " |
1303 | NIPQUAD_FMT "/%02x dropped\n", | 1342 | NIPQUAD_FMT "/%02x dropped\n", |
@@ -1390,7 +1429,8 @@ static int ip_error(struct sk_buff *skb) | |||
1390 | break; | 1429 | break; |
1391 | case ENETUNREACH: | 1430 | case ENETUNREACH: |
1392 | code = ICMP_NET_UNREACH; | 1431 | code = ICMP_NET_UNREACH; |
1393 | IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); | 1432 | IP_INC_STATS_BH(dev_net(rt->u.dst.dev), |
1433 | IPSTATS_MIB_INNOROUTES); | ||
1394 | break; | 1434 | break; |
1395 | case EACCES: | 1435 | case EACCES: |
1396 | code = ICMP_PKT_FILTERED; | 1436 | code = ICMP_PKT_FILTERED; |
@@ -1446,7 +1486,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1446 | 1486 | ||
1447 | for (k = 0; k < 2; k++) { | 1487 | for (k = 0; k < 2; k++) { |
1448 | for (i = 0; i < 2; i++) { | 1488 | for (i = 0; i < 2; i++) { |
1449 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); | 1489 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], |
1490 | rt_genid(net)); | ||
1450 | 1491 | ||
1451 | rcu_read_lock(); | 1492 | rcu_read_lock(); |
1452 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 1493 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
@@ -1461,7 +1502,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1461 | rth->fl.iif != 0 || | 1502 | rth->fl.iif != 0 || |
1462 | dst_metric_locked(&rth->u.dst, RTAX_MTU) || | 1503 | dst_metric_locked(&rth->u.dst, RTAX_MTU) || |
1463 | !net_eq(dev_net(rth->u.dst.dev), net) || | 1504 | !net_eq(dev_net(rth->u.dst.dev), net) || |
1464 | rth->rt_genid != atomic_read(&rt_genid)) | 1505 | !rt_is_expired(rth)) |
1465 | continue; | 1506 | continue; |
1466 | 1507 | ||
1467 | if (new_mtu < 68 || new_mtu >= old_mtu) { | 1508 | if (new_mtu < 68 || new_mtu >= old_mtu) { |
@@ -1696,7 +1737,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1696 | rth->fl.oif = 0; | 1737 | rth->fl.oif = 0; |
1697 | rth->rt_gateway = daddr; | 1738 | rth->rt_gateway = daddr; |
1698 | rth->rt_spec_dst= spec_dst; | 1739 | rth->rt_spec_dst= spec_dst; |
1699 | rth->rt_genid = atomic_read(&rt_genid); | 1740 | rth->rt_genid = rt_genid(dev_net(dev)); |
1700 | rth->rt_flags = RTCF_MULTICAST; | 1741 | rth->rt_flags = RTCF_MULTICAST; |
1701 | rth->rt_type = RTN_MULTICAST; | 1742 | rth->rt_type = RTN_MULTICAST; |
1702 | if (our) { | 1743 | if (our) { |
@@ -1711,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1711 | RT_CACHE_STAT_INC(in_slow_mc); | 1752 | RT_CACHE_STAT_INC(in_slow_mc); |
1712 | 1753 | ||
1713 | in_dev_put(in_dev); | 1754 | in_dev_put(in_dev); |
1714 | hash = rt_hash(daddr, saddr, dev->ifindex); | 1755 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
1715 | return rt_intern_hash(hash, rth, &skb->rtable); | 1756 | return rt_intern_hash(hash, rth, &skb->rtable); |
1716 | 1757 | ||
1717 | e_nobufs: | 1758 | e_nobufs: |
@@ -1837,7 +1878,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1837 | 1878 | ||
1838 | rth->u.dst.input = ip_forward; | 1879 | rth->u.dst.input = ip_forward; |
1839 | rth->u.dst.output = ip_output; | 1880 | rth->u.dst.output = ip_output; |
1840 | rth->rt_genid = atomic_read(&rt_genid); | 1881 | rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); |
1841 | 1882 | ||
1842 | rt_set_nexthop(rth, res, itag); | 1883 | rt_set_nexthop(rth, res, itag); |
1843 | 1884 | ||
@@ -1872,7 +1913,8 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
1872 | return err; | 1913 | return err; |
1873 | 1914 | ||
1874 | /* put it into the cache */ | 1915 | /* put it into the cache */ |
1875 | hash = rt_hash(daddr, saddr, fl->iif); | 1916 | hash = rt_hash(daddr, saddr, fl->iif, |
1917 | rt_genid(dev_net(rth->u.dst.dev))); | ||
1876 | return rt_intern_hash(hash, rth, &skb->rtable); | 1918 | return rt_intern_hash(hash, rth, &skb->rtable); |
1877 | } | 1919 | } |
1878 | 1920 | ||
@@ -1998,7 +2040,7 @@ local_input: | |||
1998 | goto e_nobufs; | 2040 | goto e_nobufs; |
1999 | 2041 | ||
2000 | rth->u.dst.output= ip_rt_bug; | 2042 | rth->u.dst.output= ip_rt_bug; |
2001 | rth->rt_genid = atomic_read(&rt_genid); | 2043 | rth->rt_genid = rt_genid(net); |
2002 | 2044 | ||
2003 | atomic_set(&rth->u.dst.__refcnt, 1); | 2045 | atomic_set(&rth->u.dst.__refcnt, 1); |
2004 | rth->u.dst.flags= DST_HOST; | 2046 | rth->u.dst.flags= DST_HOST; |
@@ -2028,7 +2070,7 @@ local_input: | |||
2028 | rth->rt_flags &= ~RTCF_LOCAL; | 2070 | rth->rt_flags &= ~RTCF_LOCAL; |
2029 | } | 2071 | } |
2030 | rth->rt_type = res.type; | 2072 | rth->rt_type = res.type; |
2031 | hash = rt_hash(daddr, saddr, fl.iif); | 2073 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); |
2032 | err = rt_intern_hash(hash, rth, &skb->rtable); | 2074 | err = rt_intern_hash(hash, rth, &skb->rtable); |
2033 | goto done; | 2075 | goto done; |
2034 | 2076 | ||
@@ -2079,7 +2121,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2079 | 2121 | ||
2080 | net = dev_net(dev); | 2122 | net = dev_net(dev); |
2081 | tos &= IPTOS_RT_MASK; | 2123 | tos &= IPTOS_RT_MASK; |
2082 | hash = rt_hash(daddr, saddr, iif); | 2124 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); |
2083 | 2125 | ||
2084 | rcu_read_lock(); | 2126 | rcu_read_lock(); |
2085 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2127 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
@@ -2091,7 +2133,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2091 | (rth->fl.fl4_tos ^ tos)) == 0 && | 2133 | (rth->fl.fl4_tos ^ tos)) == 0 && |
2092 | rth->fl.mark == skb->mark && | 2134 | rth->fl.mark == skb->mark && |
2093 | net_eq(dev_net(rth->u.dst.dev), net) && | 2135 | net_eq(dev_net(rth->u.dst.dev), net) && |
2094 | rth->rt_genid == atomic_read(&rt_genid)) { | 2136 | !rt_is_expired(rth)) { |
2095 | dst_use(&rth->u.dst, jiffies); | 2137 | dst_use(&rth->u.dst, jiffies); |
2096 | RT_CACHE_STAT_INC(in_hit); | 2138 | RT_CACHE_STAT_INC(in_hit); |
2097 | rcu_read_unlock(); | 2139 | rcu_read_unlock(); |
@@ -2219,7 +2261,7 @@ static int __mkroute_output(struct rtable **result, | |||
2219 | rth->rt_spec_dst= fl->fl4_src; | 2261 | rth->rt_spec_dst= fl->fl4_src; |
2220 | 2262 | ||
2221 | rth->u.dst.output=ip_output; | 2263 | rth->u.dst.output=ip_output; |
2222 | rth->rt_genid = atomic_read(&rt_genid); | 2264 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2223 | 2265 | ||
2224 | RT_CACHE_STAT_INC(out_slow_tot); | 2266 | RT_CACHE_STAT_INC(out_slow_tot); |
2225 | 2267 | ||
@@ -2268,7 +2310,8 @@ static int ip_mkroute_output(struct rtable **rp, | |||
2268 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); | 2310 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); |
2269 | unsigned hash; | 2311 | unsigned hash; |
2270 | if (err == 0) { | 2312 | if (err == 0) { |
2271 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); | 2313 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, |
2314 | rt_genid(dev_net(dev_out))); | ||
2272 | err = rt_intern_hash(hash, rth, rp); | 2315 | err = rt_intern_hash(hash, rth, rp); |
2273 | } | 2316 | } |
2274 | 2317 | ||
@@ -2480,7 +2523,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2480 | unsigned hash; | 2523 | unsigned hash; |
2481 | struct rtable *rth; | 2524 | struct rtable *rth; |
2482 | 2525 | ||
2483 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); | 2526 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); |
2484 | 2527 | ||
2485 | rcu_read_lock_bh(); | 2528 | rcu_read_lock_bh(); |
2486 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2529 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
@@ -2493,7 +2536,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2493 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2536 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
2494 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2537 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2495 | net_eq(dev_net(rth->u.dst.dev), net) && | 2538 | net_eq(dev_net(rth->u.dst.dev), net) && |
2496 | rth->rt_genid == atomic_read(&rt_genid)) { | 2539 | !rt_is_expired(rth)) { |
2497 | dst_use(&rth->u.dst, jiffies); | 2540 | dst_use(&rth->u.dst, jiffies); |
2498 | RT_CACHE_STAT_INC(out_hit); | 2541 | RT_CACHE_STAT_INC(out_hit); |
2499 | rcu_read_unlock_bh(); | 2542 | rcu_read_unlock_bh(); |
@@ -2524,7 +2567,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2524 | }; | 2567 | }; |
2525 | 2568 | ||
2526 | 2569 | ||
2527 | static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) | 2570 | static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) |
2528 | { | 2571 | { |
2529 | struct rtable *ort = *rp; | 2572 | struct rtable *ort = *rp; |
2530 | struct rtable *rt = (struct rtable *) | 2573 | struct rtable *rt = (struct rtable *) |
@@ -2548,7 +2591,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) | |||
2548 | rt->idev = ort->idev; | 2591 | rt->idev = ort->idev; |
2549 | if (rt->idev) | 2592 | if (rt->idev) |
2550 | in_dev_hold(rt->idev); | 2593 | in_dev_hold(rt->idev); |
2551 | rt->rt_genid = atomic_read(&rt_genid); | 2594 | rt->rt_genid = rt_genid(net); |
2552 | rt->rt_flags = ort->rt_flags; | 2595 | rt->rt_flags = ort->rt_flags; |
2553 | rt->rt_type = ort->rt_type; | 2596 | rt->rt_type = ort->rt_type; |
2554 | rt->rt_dst = ort->rt_dst; | 2597 | rt->rt_dst = ort->rt_dst; |
@@ -2584,7 +2627,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | |||
2584 | err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, | 2627 | err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, |
2585 | flags ? XFRM_LOOKUP_WAIT : 0); | 2628 | flags ? XFRM_LOOKUP_WAIT : 0); |
2586 | if (err == -EREMOTE) | 2629 | if (err == -EREMOTE) |
2587 | err = ipv4_dst_blackhole(rp, flp); | 2630 | err = ipv4_dst_blackhole(net, rp, flp); |
2588 | 2631 | ||
2589 | return err; | 2632 | return err; |
2590 | } | 2633 | } |
@@ -2803,7 +2846,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2803 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 2846 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { |
2804 | if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) | 2847 | if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) |
2805 | continue; | 2848 | continue; |
2806 | if (rt->rt_genid != atomic_read(&rt_genid)) | 2849 | if (rt_is_expired(rt)) |
2807 | continue; | 2850 | continue; |
2808 | skb->dst = dst_clone(&rt->u.dst); | 2851 | skb->dst = dst_clone(&rt->u.dst); |
2809 | if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, | 2852 | if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, |
@@ -2827,19 +2870,25 @@ done: | |||
2827 | 2870 | ||
2828 | void ip_rt_multicast_event(struct in_device *in_dev) | 2871 | void ip_rt_multicast_event(struct in_device *in_dev) |
2829 | { | 2872 | { |
2830 | rt_cache_flush(0); | 2873 | rt_cache_flush(dev_net(in_dev->dev), 0); |
2831 | } | 2874 | } |
2832 | 2875 | ||
2833 | #ifdef CONFIG_SYSCTL | 2876 | #ifdef CONFIG_SYSCTL |
2834 | static int flush_delay; | 2877 | static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, |
2835 | |||
2836 | static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, | ||
2837 | struct file *filp, void __user *buffer, | 2878 | struct file *filp, void __user *buffer, |
2838 | size_t *lenp, loff_t *ppos) | 2879 | size_t *lenp, loff_t *ppos) |
2839 | { | 2880 | { |
2840 | if (write) { | 2881 | if (write) { |
2841 | proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 2882 | int flush_delay; |
2842 | rt_cache_flush(flush_delay); | 2883 | ctl_table ctl; |
2884 | struct net *net; | ||
2885 | |||
2886 | memcpy(&ctl, __ctl, sizeof(ctl)); | ||
2887 | ctl.data = &flush_delay; | ||
2888 | proc_dointvec(&ctl, write, filp, buffer, lenp, ppos); | ||
2889 | |||
2890 | net = (struct net *)__ctl->extra1; | ||
2891 | rt_cache_flush(net, flush_delay); | ||
2843 | return 0; | 2892 | return 0; |
2844 | } | 2893 | } |
2845 | 2894 | ||
@@ -2855,25 +2904,18 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, | |||
2855 | size_t newlen) | 2904 | size_t newlen) |
2856 | { | 2905 | { |
2857 | int delay; | 2906 | int delay; |
2907 | struct net *net; | ||
2858 | if (newlen != sizeof(int)) | 2908 | if (newlen != sizeof(int)) |
2859 | return -EINVAL; | 2909 | return -EINVAL; |
2860 | if (get_user(delay, (int __user *)newval)) | 2910 | if (get_user(delay, (int __user *)newval)) |
2861 | return -EFAULT; | 2911 | return -EFAULT; |
2862 | rt_cache_flush(delay); | 2912 | net = (struct net *)table->extra1; |
2913 | rt_cache_flush(net, delay); | ||
2863 | return 0; | 2914 | return 0; |
2864 | } | 2915 | } |
2865 | 2916 | ||
2866 | ctl_table ipv4_route_table[] = { | 2917 | ctl_table ipv4_route_table[] = { |
2867 | { | 2918 | { |
2868 | .ctl_name = NET_IPV4_ROUTE_FLUSH, | ||
2869 | .procname = "flush", | ||
2870 | .data = &flush_delay, | ||
2871 | .maxlen = sizeof(int), | ||
2872 | .mode = 0200, | ||
2873 | .proc_handler = &ipv4_sysctl_rtcache_flush, | ||
2874 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, | ||
2875 | }, | ||
2876 | { | ||
2877 | .ctl_name = NET_IPV4_ROUTE_GC_THRESH, | 2919 | .ctl_name = NET_IPV4_ROUTE_GC_THRESH, |
2878 | .procname = "gc_thresh", | 2920 | .procname = "gc_thresh", |
2879 | .data = &ipv4_dst_ops.gc_thresh, | 2921 | .data = &ipv4_dst_ops.gc_thresh, |
@@ -3011,8 +3053,97 @@ ctl_table ipv4_route_table[] = { | |||
3011 | }, | 3053 | }, |
3012 | { .ctl_name = 0 } | 3054 | { .ctl_name = 0 } |
3013 | }; | 3055 | }; |
3056 | |||
3057 | static __net_initdata struct ctl_path ipv4_route_path[] = { | ||
3058 | { .procname = "net", .ctl_name = CTL_NET, }, | ||
3059 | { .procname = "ipv4", .ctl_name = NET_IPV4, }, | ||
3060 | { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, | ||
3061 | { }, | ||
3062 | }; | ||
3063 | |||
3064 | |||
3065 | static struct ctl_table ipv4_route_flush_table[] = { | ||
3066 | { | ||
3067 | .ctl_name = NET_IPV4_ROUTE_FLUSH, | ||
3068 | .procname = "flush", | ||
3069 | .maxlen = sizeof(int), | ||
3070 | .mode = 0200, | ||
3071 | .proc_handler = &ipv4_sysctl_rtcache_flush, | ||
3072 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, | ||
3073 | }, | ||
3074 | { .ctl_name = 0 }, | ||
3075 | }; | ||
3076 | |||
3077 | static __net_init int sysctl_route_net_init(struct net *net) | ||
3078 | { | ||
3079 | struct ctl_table *tbl; | ||
3080 | |||
3081 | tbl = ipv4_route_flush_table; | ||
3082 | if (net != &init_net) { | ||
3083 | tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); | ||
3084 | if (tbl == NULL) | ||
3085 | goto err_dup; | ||
3086 | } | ||
3087 | tbl[0].extra1 = net; | ||
3088 | |||
3089 | net->ipv4.route_hdr = | ||
3090 | register_net_sysctl_table(net, ipv4_route_path, tbl); | ||
3091 | if (net->ipv4.route_hdr == NULL) | ||
3092 | goto err_reg; | ||
3093 | return 0; | ||
3094 | |||
3095 | err_reg: | ||
3096 | if (tbl != ipv4_route_flush_table) | ||
3097 | kfree(tbl); | ||
3098 | err_dup: | ||
3099 | return -ENOMEM; | ||
3100 | } | ||
3101 | |||
3102 | static __net_exit void sysctl_route_net_exit(struct net *net) | ||
3103 | { | ||
3104 | struct ctl_table *tbl; | ||
3105 | |||
3106 | tbl = net->ipv4.route_hdr->ctl_table_arg; | ||
3107 | unregister_net_sysctl_table(net->ipv4.route_hdr); | ||
3108 | BUG_ON(tbl == ipv4_route_flush_table); | ||
3109 | kfree(tbl); | ||
3110 | } | ||
3111 | |||
3112 | static __net_initdata struct pernet_operations sysctl_route_ops = { | ||
3113 | .init = sysctl_route_net_init, | ||
3114 | .exit = sysctl_route_net_exit, | ||
3115 | }; | ||
3014 | #endif | 3116 | #endif |
3015 | 3117 | ||
3118 | |||
3119 | static __net_init int rt_secret_timer_init(struct net *net) | ||
3120 | { | ||
3121 | atomic_set(&net->ipv4.rt_genid, | ||
3122 | (int) ((num_physpages ^ (num_physpages>>8)) ^ | ||
3123 | (jiffies ^ (jiffies >> 7)))); | ||
3124 | |||
3125 | net->ipv4.rt_secret_timer.function = rt_secret_rebuild; | ||
3126 | net->ipv4.rt_secret_timer.data = (unsigned long)net; | ||
3127 | init_timer_deferrable(&net->ipv4.rt_secret_timer); | ||
3128 | |||
3129 | net->ipv4.rt_secret_timer.expires = | ||
3130 | jiffies + net_random() % ip_rt_secret_interval + | ||
3131 | ip_rt_secret_interval; | ||
3132 | add_timer(&net->ipv4.rt_secret_timer); | ||
3133 | return 0; | ||
3134 | } | ||
3135 | |||
3136 | static __net_exit void rt_secret_timer_exit(struct net *net) | ||
3137 | { | ||
3138 | del_timer_sync(&net->ipv4.rt_secret_timer); | ||
3139 | } | ||
3140 | |||
3141 | static __net_initdata struct pernet_operations rt_secret_timer_ops = { | ||
3142 | .init = rt_secret_timer_init, | ||
3143 | .exit = rt_secret_timer_exit, | ||
3144 | }; | ||
3145 | |||
3146 | |||
3016 | #ifdef CONFIG_NET_CLS_ROUTE | 3147 | #ifdef CONFIG_NET_CLS_ROUTE |
3017 | struct ip_rt_acct *ip_rt_acct __read_mostly; | 3148 | struct ip_rt_acct *ip_rt_acct __read_mostly; |
3018 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3149 | #endif /* CONFIG_NET_CLS_ROUTE */ |
@@ -3031,9 +3162,6 @@ int __init ip_rt_init(void) | |||
3031 | { | 3162 | { |
3032 | int rc = 0; | 3163 | int rc = 0; |
3033 | 3164 | ||
3034 | atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ | ||
3035 | (jiffies ^ (jiffies >> 7)))); | ||
3036 | |||
3037 | #ifdef CONFIG_NET_CLS_ROUTE | 3165 | #ifdef CONFIG_NET_CLS_ROUTE |
3038 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); | 3166 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); |
3039 | if (!ip_rt_acct) | 3167 | if (!ip_rt_acct) |
@@ -3065,19 +3193,14 @@ int __init ip_rt_init(void) | |||
3065 | devinet_init(); | 3193 | devinet_init(); |
3066 | ip_fib_init(); | 3194 | ip_fib_init(); |
3067 | 3195 | ||
3068 | rt_secret_timer.function = rt_secret_rebuild; | ||
3069 | rt_secret_timer.data = 0; | ||
3070 | init_timer_deferrable(&rt_secret_timer); | ||
3071 | |||
3072 | /* All the timers, started at system startup tend | 3196 | /* All the timers, started at system startup tend |
3073 | to synchronize. Perturb it a bit. | 3197 | to synchronize. Perturb it a bit. |
3074 | */ | 3198 | */ |
3075 | schedule_delayed_work(&expires_work, | 3199 | schedule_delayed_work(&expires_work, |
3076 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | 3200 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); |
3077 | 3201 | ||
3078 | rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + | 3202 | if (register_pernet_subsys(&rt_secret_timer_ops)) |
3079 | ip_rt_secret_interval; | 3203 | printk(KERN_ERR "Unable to setup rt_secret_timer\n"); |
3080 | add_timer(&rt_secret_timer); | ||
3081 | 3204 | ||
3082 | if (ip_rt_proc_init()) | 3205 | if (ip_rt_proc_init()) |
3083 | printk(KERN_ERR "Unable to create route proc files\n"); | 3206 | printk(KERN_ERR "Unable to create route proc files\n"); |
@@ -3087,6 +3210,9 @@ int __init ip_rt_init(void) | |||
3087 | #endif | 3210 | #endif |
3088 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); | 3211 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); |
3089 | 3212 | ||
3213 | #ifdef CONFIG_SYSCTL | ||
3214 | register_pernet_subsys(&sysctl_route_ops); | ||
3215 | #endif | ||
3090 | return rc; | 3216 | return rc; |
3091 | } | 3217 | } |
3092 | 3218 | ||