diff options
-rw-r--r-- | Documentation/filesystems/proc.txt | 5 | ||||
-rw-r--r-- | include/linux/sysctl.h | 4 | ||||
-rw-r--r-- | include/net/route.h | 1 | ||||
-rw-r--r-- | net/ipv4/route.c | 209 |
4 files changed, 92 insertions, 127 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 4413a2d4646f..11fe51c036bf 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -1919,11 +1919,6 @@ max_size | |||
1919 | Maximum size of the routing cache. Old entries will be purged once the cache | 1919 | Maximum size of the routing cache. Old entries will be purged once the cache |
1920 | reached has this size. | 1920 | reached has this size. |
1921 | 1921 | ||
1922 | max_delay, min_delay | ||
1923 | -------------------- | ||
1924 | |||
1925 | Delays for flushing the routing cache. | ||
1926 | |||
1927 | redirect_load, redirect_number | 1922 | redirect_load, redirect_number |
1928 | ------------------------------ | 1923 | ------------------------------ |
1929 | 1924 | ||
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 89faebfe48b8..bf4ae4e138f7 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -440,8 +440,8 @@ enum | |||
440 | 440 | ||
441 | enum { | 441 | enum { |
442 | NET_IPV4_ROUTE_FLUSH=1, | 442 | NET_IPV4_ROUTE_FLUSH=1, |
443 | NET_IPV4_ROUTE_MIN_DELAY=2, | 443 | NET_IPV4_ROUTE_MIN_DELAY=2, /* obsolete since 2.6.25 */ |
444 | NET_IPV4_ROUTE_MAX_DELAY=3, | 444 | NET_IPV4_ROUTE_MAX_DELAY=3, /* obsolete since 2.6.25 */ |
445 | NET_IPV4_ROUTE_GC_THRESH=4, | 445 | NET_IPV4_ROUTE_GC_THRESH=4, |
446 | NET_IPV4_ROUTE_MAX_SIZE=5, | 446 | NET_IPV4_ROUTE_MAX_SIZE=5, |
447 | NET_IPV4_ROUTE_GC_MIN_INTERVAL=6, | 447 | NET_IPV4_ROUTE_GC_MIN_INTERVAL=6, |
diff --git a/include/net/route.h b/include/net/route.h index fcc6d5b35863..eadad5901429 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -62,6 +62,7 @@ struct rtable | |||
62 | 62 | ||
63 | struct in_device *idev; | 63 | struct in_device *idev; |
64 | 64 | ||
65 | int rt_genid; | ||
65 | unsigned rt_flags; | 66 | unsigned rt_flags; |
66 | __u16 rt_type; | 67 | __u16 rt_type; |
67 | 68 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 163086b2f058..8842ecb9be48 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -117,8 +117,6 @@ | |||
117 | 117 | ||
118 | #define RT_GC_TIMEOUT (300*HZ) | 118 | #define RT_GC_TIMEOUT (300*HZ) |
119 | 119 | ||
120 | static int ip_rt_min_delay = 2 * HZ; | ||
121 | static int ip_rt_max_delay = 10 * HZ; | ||
122 | static int ip_rt_max_size; | 120 | static int ip_rt_max_size; |
123 | static int ip_rt_gc_timeout = RT_GC_TIMEOUT; | 121 | static int ip_rt_gc_timeout = RT_GC_TIMEOUT; |
124 | static int ip_rt_gc_interval = 60 * HZ; | 122 | static int ip_rt_gc_interval = 60 * HZ; |
@@ -133,12 +131,9 @@ static int ip_rt_mtu_expires = 10 * 60 * HZ; | |||
133 | static int ip_rt_min_pmtu = 512 + 20 + 20; | 131 | static int ip_rt_min_pmtu = 512 + 20 + 20; |
134 | static int ip_rt_min_advmss = 256; | 132 | static int ip_rt_min_advmss = 256; |
135 | static int ip_rt_secret_interval = 10 * 60 * HZ; | 133 | static int ip_rt_secret_interval = 10 * 60 * HZ; |
136 | static int ip_rt_flush_expected; | ||
137 | static unsigned long rt_deadline; | ||
138 | 134 | ||
139 | #define RTprint(a...) printk(KERN_DEBUG a) | 135 | #define RTprint(a...) printk(KERN_DEBUG a) |
140 | 136 | ||
141 | static struct timer_list rt_flush_timer; | ||
142 | static void rt_worker_func(struct work_struct *work); | 137 | static void rt_worker_func(struct work_struct *work); |
143 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); | 138 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); |
144 | static struct timer_list rt_secret_timer; | 139 | static struct timer_list rt_secret_timer; |
@@ -260,19 +255,16 @@ static inline void rt_hash_lock_init(void) | |||
260 | static struct rt_hash_bucket *rt_hash_table; | 255 | static struct rt_hash_bucket *rt_hash_table; |
261 | static unsigned rt_hash_mask; | 256 | static unsigned rt_hash_mask; |
262 | static unsigned int rt_hash_log; | 257 | static unsigned int rt_hash_log; |
263 | static unsigned int rt_hash_rnd; | 258 | static atomic_t rt_genid; |
264 | 259 | ||
265 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 260 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
266 | #define RT_CACHE_STAT_INC(field) \ | 261 | #define RT_CACHE_STAT_INC(field) \ |
267 | (__raw_get_cpu_var(rt_cache_stat).field++) | 262 | (__raw_get_cpu_var(rt_cache_stat).field++) |
268 | 263 | ||
269 | static int rt_intern_hash(unsigned hash, struct rtable *rth, | ||
270 | struct rtable **res); | ||
271 | |||
272 | static unsigned int rt_hash_code(u32 daddr, u32 saddr) | 264 | static unsigned int rt_hash_code(u32 daddr, u32 saddr) |
273 | { | 265 | { |
274 | return (jhash_2words(daddr, saddr, rt_hash_rnd) | 266 | return jhash_2words(daddr, saddr, atomic_read(&rt_genid)) |
275 | & rt_hash_mask); | 267 | & rt_hash_mask; |
276 | } | 268 | } |
277 | 269 | ||
278 | #define rt_hash(daddr, saddr, idx) \ | 270 | #define rt_hash(daddr, saddr, idx) \ |
@@ -282,27 +274,28 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) | |||
282 | #ifdef CONFIG_PROC_FS | 274 | #ifdef CONFIG_PROC_FS |
283 | struct rt_cache_iter_state { | 275 | struct rt_cache_iter_state { |
284 | int bucket; | 276 | int bucket; |
277 | int genid; | ||
285 | }; | 278 | }; |
286 | 279 | ||
287 | static struct rtable *rt_cache_get_first(struct seq_file *seq) | 280 | static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) |
288 | { | 281 | { |
289 | struct rtable *r = NULL; | 282 | struct rtable *r = NULL; |
290 | struct rt_cache_iter_state *st = seq->private; | ||
291 | 283 | ||
292 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | 284 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { |
293 | rcu_read_lock_bh(); | 285 | rcu_read_lock_bh(); |
294 | r = rt_hash_table[st->bucket].chain; | 286 | r = rcu_dereference(rt_hash_table[st->bucket].chain); |
295 | if (r) | 287 | while (r) { |
296 | break; | 288 | if (r->rt_genid == st->genid) |
289 | return r; | ||
290 | r = rcu_dereference(r->u.dst.rt_next); | ||
291 | } | ||
297 | rcu_read_unlock_bh(); | 292 | rcu_read_unlock_bh(); |
298 | } | 293 | } |
299 | return rcu_dereference(r); | 294 | return r; |
300 | } | 295 | } |
301 | 296 | ||
302 | static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) | 297 | static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) |
303 | { | 298 | { |
304 | struct rt_cache_iter_state *st = seq->private; | ||
305 | |||
306 | r = r->u.dst.rt_next; | 299 | r = r->u.dst.rt_next; |
307 | while (!r) { | 300 | while (!r) { |
308 | rcu_read_unlock_bh(); | 301 | rcu_read_unlock_bh(); |
@@ -314,29 +307,38 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) | |||
314 | return rcu_dereference(r); | 307 | return rcu_dereference(r); |
315 | } | 308 | } |
316 | 309 | ||
317 | static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) | 310 | static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) |
318 | { | 311 | { |
319 | struct rtable *r = rt_cache_get_first(seq); | 312 | struct rtable *r = rt_cache_get_first(st); |
320 | 313 | ||
321 | if (r) | 314 | if (r) |
322 | while (pos && (r = rt_cache_get_next(seq, r))) | 315 | while (pos && (r = rt_cache_get_next(st, r))) { |
316 | if (r->rt_genid != st->genid) | ||
317 | continue; | ||
323 | --pos; | 318 | --pos; |
319 | } | ||
324 | return pos ? NULL : r; | 320 | return pos ? NULL : r; |
325 | } | 321 | } |
326 | 322 | ||
327 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | 323 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) |
328 | { | 324 | { |
329 | return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 325 | struct rt_cache_iter_state *st = seq->private; |
326 | |||
327 | if (*pos) | ||
328 | return rt_cache_get_idx(st, *pos - 1); | ||
329 | st->genid = atomic_read(&rt_genid); | ||
330 | return SEQ_START_TOKEN; | ||
330 | } | 331 | } |
331 | 332 | ||
332 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 333 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
333 | { | 334 | { |
334 | struct rtable *r = NULL; | 335 | struct rtable *r; |
336 | struct rt_cache_iter_state *st = seq->private; | ||
335 | 337 | ||
336 | if (v == SEQ_START_TOKEN) | 338 | if (v == SEQ_START_TOKEN) |
337 | r = rt_cache_get_first(seq); | 339 | r = rt_cache_get_first(st); |
338 | else | 340 | else |
339 | r = rt_cache_get_next(seq, v); | 341 | r = rt_cache_get_next(st, v); |
340 | ++*pos; | 342 | ++*pos; |
341 | return r; | 343 | return r; |
342 | } | 344 | } |
@@ -709,6 +711,11 @@ static void rt_check_expire(void) | |||
709 | continue; | 711 | continue; |
710 | spin_lock_bh(rt_hash_lock_addr(i)); | 712 | spin_lock_bh(rt_hash_lock_addr(i)); |
711 | while ((rth = *rthp) != NULL) { | 713 | while ((rth = *rthp) != NULL) { |
714 | if (rth->rt_genid != atomic_read(&rt_genid)) { | ||
715 | *rthp = rth->u.dst.rt_next; | ||
716 | rt_free(rth); | ||
717 | continue; | ||
718 | } | ||
712 | if (rth->u.dst.expires) { | 719 | if (rth->u.dst.expires) { |
713 | /* Entry is expired even if it is in use */ | 720 | /* Entry is expired even if it is in use */ |
714 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 721 | if (time_before_eq(jiffies, rth->u.dst.expires)) { |
@@ -733,83 +740,45 @@ static void rt_check_expire(void) | |||
733 | 740 | ||
734 | /* | 741 | /* |
735 | * rt_worker_func() is run in process context. | 742 | * rt_worker_func() is run in process context. |
736 | * If a whole flush was scheduled, it is done. | 743 | * we call rt_check_expire() to scan part of the hash table |
737 | * Else, we call rt_check_expire() to scan part of the hash table | ||
738 | */ | 744 | */ |
739 | static void rt_worker_func(struct work_struct *work) | 745 | static void rt_worker_func(struct work_struct *work) |
740 | { | 746 | { |
741 | if (ip_rt_flush_expected) { | 747 | rt_check_expire(); |
742 | ip_rt_flush_expected = 0; | ||
743 | rt_do_flush(1); | ||
744 | } else | ||
745 | rt_check_expire(); | ||
746 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | 748 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); |
747 | } | 749 | } |
748 | 750 | ||
749 | /* This can run from both BH and non-BH contexts, the latter | 751 | /* |
750 | * in the case of a forced flush event. | 752 | * Pertubation of rt_genid by a small quantity [1..256] |
753 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | ||
754 | * many times (2^24) without giving recent rt_genid. | ||
755 | * Jenkins hash is strong enough that litle changes of rt_genid are OK. | ||
751 | */ | 756 | */ |
752 | static void rt_run_flush(unsigned long process_context) | 757 | static void rt_cache_invalidate(void) |
753 | { | 758 | { |
754 | rt_deadline = 0; | 759 | unsigned char shuffle; |
755 | |||
756 | get_random_bytes(&rt_hash_rnd, 4); | ||
757 | 760 | ||
758 | rt_do_flush(process_context); | 761 | get_random_bytes(&shuffle, sizeof(shuffle)); |
762 | atomic_add(shuffle + 1U, &rt_genid); | ||
759 | } | 763 | } |
760 | 764 | ||
761 | static DEFINE_SPINLOCK(rt_flush_lock); | 765 | /* |
762 | 766 | * delay < 0 : invalidate cache (fast : entries will be deleted later) | |
767 | * delay >= 0 : invalidate & flush cache (can be long) | ||
768 | */ | ||
763 | void rt_cache_flush(int delay) | 769 | void rt_cache_flush(int delay) |
764 | { | 770 | { |
765 | unsigned long now = jiffies; | 771 | rt_cache_invalidate(); |
766 | int user_mode = !in_softirq(); | 772 | if (delay >= 0) |
767 | 773 | rt_do_flush(!in_softirq()); | |
768 | if (delay < 0) | ||
769 | delay = ip_rt_min_delay; | ||
770 | |||
771 | spin_lock_bh(&rt_flush_lock); | ||
772 | |||
773 | if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { | ||
774 | long tmo = (long)(rt_deadline - now); | ||
775 | |||
776 | /* If flush timer is already running | ||
777 | and flush request is not immediate (delay > 0): | ||
778 | |||
779 | if deadline is not achieved, prolongate timer to "delay", | ||
780 | otherwise fire it at deadline time. | ||
781 | */ | ||
782 | |||
783 | if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay) | ||
784 | tmo = 0; | ||
785 | |||
786 | if (delay > tmo) | ||
787 | delay = tmo; | ||
788 | } | ||
789 | |||
790 | if (delay <= 0) { | ||
791 | spin_unlock_bh(&rt_flush_lock); | ||
792 | rt_run_flush(user_mode); | ||
793 | return; | ||
794 | } | ||
795 | |||
796 | if (rt_deadline == 0) | ||
797 | rt_deadline = now + ip_rt_max_delay; | ||
798 | |||
799 | mod_timer(&rt_flush_timer, now+delay); | ||
800 | spin_unlock_bh(&rt_flush_lock); | ||
801 | } | 774 | } |
802 | 775 | ||
803 | /* | 776 | /* |
804 | * We change rt_hash_rnd and ask next rt_worker_func() invocation | 777 | * We change rt_genid and let gc do the cleanup |
805 | * to perform a flush in process context | ||
806 | */ | 778 | */ |
807 | static void rt_secret_rebuild(unsigned long dummy) | 779 | static void rt_secret_rebuild(unsigned long dummy) |
808 | { | 780 | { |
809 | get_random_bytes(&rt_hash_rnd, 4); | 781 | rt_cache_invalidate(); |
810 | ip_rt_flush_expected = 1; | ||
811 | cancel_delayed_work(&expires_work); | ||
812 | schedule_delayed_work(&expires_work, HZ/10); | ||
813 | mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); | 782 | mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); |
814 | } | 783 | } |
815 | 784 | ||
@@ -886,7 +855,8 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
886 | rthp = &rt_hash_table[k].chain; | 855 | rthp = &rt_hash_table[k].chain; |
887 | spin_lock_bh(rt_hash_lock_addr(k)); | 856 | spin_lock_bh(rt_hash_lock_addr(k)); |
888 | while ((rth = *rthp) != NULL) { | 857 | while ((rth = *rthp) != NULL) { |
889 | if (!rt_may_expire(rth, tmo, expire)) { | 858 | if (rth->rt_genid == atomic_read(&rt_genid) && |
859 | !rt_may_expire(rth, tmo, expire)) { | ||
890 | tmo >>= 1; | 860 | tmo >>= 1; |
891 | rthp = &rth->u.dst.rt_next; | 861 | rthp = &rth->u.dst.rt_next; |
892 | continue; | 862 | continue; |
@@ -967,6 +937,11 @@ restart: | |||
967 | 937 | ||
968 | spin_lock_bh(rt_hash_lock_addr(hash)); | 938 | spin_lock_bh(rt_hash_lock_addr(hash)); |
969 | while ((rth = *rthp) != NULL) { | 939 | while ((rth = *rthp) != NULL) { |
940 | if (rth->rt_genid != atomic_read(&rt_genid)) { | ||
941 | *rthp = rth->u.dst.rt_next; | ||
942 | rt_free(rth); | ||
943 | continue; | ||
944 | } | ||
970 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 945 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { |
971 | /* Put it first */ | 946 | /* Put it first */ |
972 | *rthp = rth->u.dst.rt_next; | 947 | *rthp = rth->u.dst.rt_next; |
@@ -1132,17 +1107,19 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1132 | 1107 | ||
1133 | static void rt_del(unsigned hash, struct rtable *rt) | 1108 | static void rt_del(unsigned hash, struct rtable *rt) |
1134 | { | 1109 | { |
1135 | struct rtable **rthp; | 1110 | struct rtable **rthp, *aux; |
1136 | 1111 | ||
1112 | rthp = &rt_hash_table[hash].chain; | ||
1137 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1113 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1138 | ip_rt_put(rt); | 1114 | ip_rt_put(rt); |
1139 | for (rthp = &rt_hash_table[hash].chain; *rthp; | 1115 | while ((aux = *rthp) != NULL) { |
1140 | rthp = &(*rthp)->u.dst.rt_next) | 1116 | if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { |
1141 | if (*rthp == rt) { | 1117 | *rthp = aux->u.dst.rt_next; |
1142 | *rthp = rt->u.dst.rt_next; | 1118 | rt_free(aux); |
1143 | rt_free(rt); | 1119 | continue; |
1144 | break; | ||
1145 | } | 1120 | } |
1121 | rthp = &aux->u.dst.rt_next; | ||
1122 | } | ||
1146 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1123 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1147 | } | 1124 | } |
1148 | 1125 | ||
@@ -1187,7 +1164,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1187 | if (rth->fl.fl4_dst != daddr || | 1164 | if (rth->fl.fl4_dst != daddr || |
1188 | rth->fl.fl4_src != skeys[i] || | 1165 | rth->fl.fl4_src != skeys[i] || |
1189 | rth->fl.oif != ikeys[k] || | 1166 | rth->fl.oif != ikeys[k] || |
1190 | rth->fl.iif != 0) { | 1167 | rth->fl.iif != 0 || |
1168 | rth->rt_genid != atomic_read(&rt_genid)) { | ||
1191 | rthp = &rth->u.dst.rt_next; | 1169 | rthp = &rth->u.dst.rt_next; |
1192 | continue; | 1170 | continue; |
1193 | } | 1171 | } |
@@ -1225,7 +1203,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1225 | rt->u.dst.neighbour = NULL; | 1203 | rt->u.dst.neighbour = NULL; |
1226 | rt->u.dst.hh = NULL; | 1204 | rt->u.dst.hh = NULL; |
1227 | rt->u.dst.xfrm = NULL; | 1205 | rt->u.dst.xfrm = NULL; |
1228 | 1206 | rt->rt_genid = atomic_read(&rt_genid); | |
1229 | rt->rt_flags |= RTCF_REDIRECTED; | 1207 | rt->rt_flags |= RTCF_REDIRECTED; |
1230 | 1208 | ||
1231 | /* Gateway is different ... */ | 1209 | /* Gateway is different ... */ |
@@ -1446,7 +1424,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1446 | rth->rt_src == iph->saddr && | 1424 | rth->rt_src == iph->saddr && |
1447 | rth->fl.iif == 0 && | 1425 | rth->fl.iif == 0 && |
1448 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && | 1426 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && |
1449 | rth->u.dst.dev->nd_net == net) { | 1427 | rth->u.dst.dev->nd_net == net && |
1428 | rth->rt_genid == atomic_read(&rt_genid)) { | ||
1450 | unsigned short mtu = new_mtu; | 1429 | unsigned short mtu = new_mtu; |
1451 | 1430 | ||
1452 | if (new_mtu < 68 || new_mtu >= old_mtu) { | 1431 | if (new_mtu < 68 || new_mtu >= old_mtu) { |
@@ -1681,8 +1660,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1681 | rth->fl.oif = 0; | 1660 | rth->fl.oif = 0; |
1682 | rth->rt_gateway = daddr; | 1661 | rth->rt_gateway = daddr; |
1683 | rth->rt_spec_dst= spec_dst; | 1662 | rth->rt_spec_dst= spec_dst; |
1684 | rth->rt_type = RTN_MULTICAST; | 1663 | rth->rt_genid = atomic_read(&rt_genid); |
1685 | rth->rt_flags = RTCF_MULTICAST; | 1664 | rth->rt_flags = RTCF_MULTICAST; |
1665 | rth->rt_type = RTN_MULTICAST; | ||
1686 | if (our) { | 1666 | if (our) { |
1687 | rth->u.dst.input= ip_local_deliver; | 1667 | rth->u.dst.input= ip_local_deliver; |
1688 | rth->rt_flags |= RTCF_LOCAL; | 1668 | rth->rt_flags |= RTCF_LOCAL; |
@@ -1821,6 +1801,7 @@ static inline int __mkroute_input(struct sk_buff *skb, | |||
1821 | 1801 | ||
1822 | rth->u.dst.input = ip_forward; | 1802 | rth->u.dst.input = ip_forward; |
1823 | rth->u.dst.output = ip_output; | 1803 | rth->u.dst.output = ip_output; |
1804 | rth->rt_genid = atomic_read(&rt_genid); | ||
1824 | 1805 | ||
1825 | rt_set_nexthop(rth, res, itag); | 1806 | rt_set_nexthop(rth, res, itag); |
1826 | 1807 | ||
@@ -1981,6 +1962,7 @@ local_input: | |||
1981 | goto e_nobufs; | 1962 | goto e_nobufs; |
1982 | 1963 | ||
1983 | rth->u.dst.output= ip_rt_bug; | 1964 | rth->u.dst.output= ip_rt_bug; |
1965 | rth->rt_genid = atomic_read(&rt_genid); | ||
1984 | 1966 | ||
1985 | atomic_set(&rth->u.dst.__refcnt, 1); | 1967 | atomic_set(&rth->u.dst.__refcnt, 1); |
1986 | rth->u.dst.flags= DST_HOST; | 1968 | rth->u.dst.flags= DST_HOST; |
@@ -2072,7 +2054,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2072 | rth->fl.oif == 0 && | 2054 | rth->fl.oif == 0 && |
2073 | rth->fl.mark == skb->mark && | 2055 | rth->fl.mark == skb->mark && |
2074 | rth->fl.fl4_tos == tos && | 2056 | rth->fl.fl4_tos == tos && |
2075 | rth->u.dst.dev->nd_net == net) { | 2057 | rth->u.dst.dev->nd_net == net && |
2058 | rth->rt_genid == atomic_read(&rt_genid)) { | ||
2076 | dst_use(&rth->u.dst, jiffies); | 2059 | dst_use(&rth->u.dst, jiffies); |
2077 | RT_CACHE_STAT_INC(in_hit); | 2060 | RT_CACHE_STAT_INC(in_hit); |
2078 | rcu_read_unlock(); | 2061 | rcu_read_unlock(); |
@@ -2200,6 +2183,7 @@ static inline int __mkroute_output(struct rtable **result, | |||
2200 | rth->rt_spec_dst= fl->fl4_src; | 2183 | rth->rt_spec_dst= fl->fl4_src; |
2201 | 2184 | ||
2202 | rth->u.dst.output=ip_output; | 2185 | rth->u.dst.output=ip_output; |
2186 | rth->rt_genid = atomic_read(&rt_genid); | ||
2203 | 2187 | ||
2204 | RT_CACHE_STAT_INC(out_slow_tot); | 2188 | RT_CACHE_STAT_INC(out_slow_tot); |
2205 | 2189 | ||
@@ -2472,7 +2456,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2472 | rth->fl.mark == flp->mark && | 2456 | rth->fl.mark == flp->mark && |
2473 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2457 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
2474 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2458 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2475 | rth->u.dst.dev->nd_net == net) { | 2459 | rth->u.dst.dev->nd_net == net && |
2460 | rth->rt_genid == atomic_read(&rt_genid)) { | ||
2476 | dst_use(&rth->u.dst, jiffies); | 2461 | dst_use(&rth->u.dst, jiffies); |
2477 | RT_CACHE_STAT_INC(out_hit); | 2462 | RT_CACHE_STAT_INC(out_hit); |
2478 | rcu_read_unlock_bh(); | 2463 | rcu_read_unlock_bh(); |
@@ -2527,6 +2512,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock | |||
2527 | rt->idev = ort->idev; | 2512 | rt->idev = ort->idev; |
2528 | if (rt->idev) | 2513 | if (rt->idev) |
2529 | in_dev_hold(rt->idev); | 2514 | in_dev_hold(rt->idev); |
2515 | rt->rt_genid = atomic_read(&rt_genid); | ||
2530 | rt->rt_flags = ort->rt_flags; | 2516 | rt->rt_flags = ort->rt_flags; |
2531 | rt->rt_type = ort->rt_type; | 2517 | rt->rt_type = ort->rt_type; |
2532 | rt->rt_dst = ort->rt_dst; | 2518 | rt->rt_dst = ort->rt_dst; |
@@ -2781,6 +2767,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2781 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 2767 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { |
2782 | if (idx < s_idx) | 2768 | if (idx < s_idx) |
2783 | continue; | 2769 | continue; |
2770 | if (rt->rt_genid != atomic_read(&rt_genid)) | ||
2771 | continue; | ||
2784 | skb->dst = dst_clone(&rt->u.dst); | 2772 | skb->dst = dst_clone(&rt->u.dst); |
2785 | if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, | 2773 | if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, |
2786 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | 2774 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, |
@@ -2850,24 +2838,6 @@ ctl_table ipv4_route_table[] = { | |||
2850 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, | 2838 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, |
2851 | }, | 2839 | }, |
2852 | { | 2840 | { |
2853 | .ctl_name = NET_IPV4_ROUTE_MIN_DELAY, | ||
2854 | .procname = "min_delay", | ||
2855 | .data = &ip_rt_min_delay, | ||
2856 | .maxlen = sizeof(int), | ||
2857 | .mode = 0644, | ||
2858 | .proc_handler = &proc_dointvec_jiffies, | ||
2859 | .strategy = &sysctl_jiffies, | ||
2860 | }, | ||
2861 | { | ||
2862 | .ctl_name = NET_IPV4_ROUTE_MAX_DELAY, | ||
2863 | .procname = "max_delay", | ||
2864 | .data = &ip_rt_max_delay, | ||
2865 | .maxlen = sizeof(int), | ||
2866 | .mode = 0644, | ||
2867 | .proc_handler = &proc_dointvec_jiffies, | ||
2868 | .strategy = &sysctl_jiffies, | ||
2869 | }, | ||
2870 | { | ||
2871 | .ctl_name = NET_IPV4_ROUTE_GC_THRESH, | 2841 | .ctl_name = NET_IPV4_ROUTE_GC_THRESH, |
2872 | .procname = "gc_thresh", | 2842 | .procname = "gc_thresh", |
2873 | .data = &ipv4_dst_ops.gc_thresh, | 2843 | .data = &ipv4_dst_ops.gc_thresh, |
@@ -3025,8 +2995,8 @@ int __init ip_rt_init(void) | |||
3025 | { | 2995 | { |
3026 | int rc = 0; | 2996 | int rc = 0; |
3027 | 2997 | ||
3028 | rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ | 2998 | atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ |
3029 | (jiffies ^ (jiffies >> 7))); | 2999 | (jiffies ^ (jiffies >> 7)))); |
3030 | 3000 | ||
3031 | #ifdef CONFIG_NET_CLS_ROUTE | 3001 | #ifdef CONFIG_NET_CLS_ROUTE |
3032 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); | 3002 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); |
@@ -3059,7 +3029,6 @@ int __init ip_rt_init(void) | |||
3059 | devinet_init(); | 3029 | devinet_init(); |
3060 | ip_fib_init(); | 3030 | ip_fib_init(); |
3061 | 3031 | ||
3062 | setup_timer(&rt_flush_timer, rt_run_flush, 0); | ||
3063 | setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); | 3032 | setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); |
3064 | 3033 | ||
3065 | /* All the timers, started at system startup tend | 3034 | /* All the timers, started at system startup tend |