diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 211 |
1 files changed, 91 insertions, 120 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 896c768e41a2..8842ecb9be48 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -117,8 +117,6 @@ | |||
117 | 117 | ||
118 | #define RT_GC_TIMEOUT (300*HZ) | 118 | #define RT_GC_TIMEOUT (300*HZ) |
119 | 119 | ||
120 | static int ip_rt_min_delay = 2 * HZ; | ||
121 | static int ip_rt_max_delay = 10 * HZ; | ||
122 | static int ip_rt_max_size; | 120 | static int ip_rt_max_size; |
123 | static int ip_rt_gc_timeout = RT_GC_TIMEOUT; | 121 | static int ip_rt_gc_timeout = RT_GC_TIMEOUT; |
124 | static int ip_rt_gc_interval = 60 * HZ; | 122 | static int ip_rt_gc_interval = 60 * HZ; |
@@ -133,12 +131,9 @@ static int ip_rt_mtu_expires = 10 * 60 * HZ; | |||
133 | static int ip_rt_min_pmtu = 512 + 20 + 20; | 131 | static int ip_rt_min_pmtu = 512 + 20 + 20; |
134 | static int ip_rt_min_advmss = 256; | 132 | static int ip_rt_min_advmss = 256; |
135 | static int ip_rt_secret_interval = 10 * 60 * HZ; | 133 | static int ip_rt_secret_interval = 10 * 60 * HZ; |
136 | static int ip_rt_flush_expected; | ||
137 | static unsigned long rt_deadline; | ||
138 | 134 | ||
139 | #define RTprint(a...) printk(KERN_DEBUG a) | 135 | #define RTprint(a...) printk(KERN_DEBUG a) |
140 | 136 | ||
141 | static struct timer_list rt_flush_timer; | ||
142 | static void rt_worker_func(struct work_struct *work); | 137 | static void rt_worker_func(struct work_struct *work); |
143 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); | 138 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); |
144 | static struct timer_list rt_secret_timer; | 139 | static struct timer_list rt_secret_timer; |
@@ -169,6 +164,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
169 | .update_pmtu = ip_rt_update_pmtu, | 164 | .update_pmtu = ip_rt_update_pmtu, |
170 | .local_out = ip_local_out, | 165 | .local_out = ip_local_out, |
171 | .entry_size = sizeof(struct rtable), | 166 | .entry_size = sizeof(struct rtable), |
167 | .entries = ATOMIC_INIT(0), | ||
172 | }; | 168 | }; |
173 | 169 | ||
174 | #define ECN_OR_COST(class) TC_PRIO_##class | 170 | #define ECN_OR_COST(class) TC_PRIO_##class |
@@ -259,19 +255,16 @@ static inline void rt_hash_lock_init(void) | |||
259 | static struct rt_hash_bucket *rt_hash_table; | 255 | static struct rt_hash_bucket *rt_hash_table; |
260 | static unsigned rt_hash_mask; | 256 | static unsigned rt_hash_mask; |
261 | static unsigned int rt_hash_log; | 257 | static unsigned int rt_hash_log; |
262 | static unsigned int rt_hash_rnd; | 258 | static atomic_t rt_genid; |
263 | 259 | ||
264 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 260 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
265 | #define RT_CACHE_STAT_INC(field) \ | 261 | #define RT_CACHE_STAT_INC(field) \ |
266 | (__raw_get_cpu_var(rt_cache_stat).field++) | 262 | (__raw_get_cpu_var(rt_cache_stat).field++) |
267 | 263 | ||
268 | static int rt_intern_hash(unsigned hash, struct rtable *rth, | ||
269 | struct rtable **res); | ||
270 | |||
271 | static unsigned int rt_hash_code(u32 daddr, u32 saddr) | 264 | static unsigned int rt_hash_code(u32 daddr, u32 saddr) |
272 | { | 265 | { |
273 | return (jhash_2words(daddr, saddr, rt_hash_rnd) | 266 | return jhash_2words(daddr, saddr, atomic_read(&rt_genid)) |
274 | & rt_hash_mask); | 267 | & rt_hash_mask; |
275 | } | 268 | } |
276 | 269 | ||
277 | #define rt_hash(daddr, saddr, idx) \ | 270 | #define rt_hash(daddr, saddr, idx) \ |
@@ -281,27 +274,28 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) | |||
281 | #ifdef CONFIG_PROC_FS | 274 | #ifdef CONFIG_PROC_FS |
282 | struct rt_cache_iter_state { | 275 | struct rt_cache_iter_state { |
283 | int bucket; | 276 | int bucket; |
277 | int genid; | ||
284 | }; | 278 | }; |
285 | 279 | ||
286 | static struct rtable *rt_cache_get_first(struct seq_file *seq) | 280 | static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) |
287 | { | 281 | { |
288 | struct rtable *r = NULL; | 282 | struct rtable *r = NULL; |
289 | struct rt_cache_iter_state *st = seq->private; | ||
290 | 283 | ||
291 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | 284 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { |
292 | rcu_read_lock_bh(); | 285 | rcu_read_lock_bh(); |
293 | r = rt_hash_table[st->bucket].chain; | 286 | r = rcu_dereference(rt_hash_table[st->bucket].chain); |
294 | if (r) | 287 | while (r) { |
295 | break; | 288 | if (r->rt_genid == st->genid) |
289 | return r; | ||
290 | r = rcu_dereference(r->u.dst.rt_next); | ||
291 | } | ||
296 | rcu_read_unlock_bh(); | 292 | rcu_read_unlock_bh(); |
297 | } | 293 | } |
298 | return rcu_dereference(r); | 294 | return r; |
299 | } | 295 | } |
300 | 296 | ||
301 | static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) | 297 | static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) |
302 | { | 298 | { |
303 | struct rt_cache_iter_state *st = seq->private; | ||
304 | |||
305 | r = r->u.dst.rt_next; | 299 | r = r->u.dst.rt_next; |
306 | while (!r) { | 300 | while (!r) { |
307 | rcu_read_unlock_bh(); | 301 | rcu_read_unlock_bh(); |
@@ -313,29 +307,38 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) | |||
313 | return rcu_dereference(r); | 307 | return rcu_dereference(r); |
314 | } | 308 | } |
315 | 309 | ||
316 | static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) | 310 | static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) |
317 | { | 311 | { |
318 | struct rtable *r = rt_cache_get_first(seq); | 312 | struct rtable *r = rt_cache_get_first(st); |
319 | 313 | ||
320 | if (r) | 314 | if (r) |
321 | while (pos && (r = rt_cache_get_next(seq, r))) | 315 | while (pos && (r = rt_cache_get_next(st, r))) { |
316 | if (r->rt_genid != st->genid) | ||
317 | continue; | ||
322 | --pos; | 318 | --pos; |
319 | } | ||
323 | return pos ? NULL : r; | 320 | return pos ? NULL : r; |
324 | } | 321 | } |
325 | 322 | ||
326 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | 323 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) |
327 | { | 324 | { |
328 | return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 325 | struct rt_cache_iter_state *st = seq->private; |
326 | |||
327 | if (*pos) | ||
328 | return rt_cache_get_idx(st, *pos - 1); | ||
329 | st->genid = atomic_read(&rt_genid); | ||
330 | return SEQ_START_TOKEN; | ||
329 | } | 331 | } |
330 | 332 | ||
331 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 333 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
332 | { | 334 | { |
333 | struct rtable *r = NULL; | 335 | struct rtable *r; |
336 | struct rt_cache_iter_state *st = seq->private; | ||
334 | 337 | ||
335 | if (v == SEQ_START_TOKEN) | 338 | if (v == SEQ_START_TOKEN) |
336 | r = rt_cache_get_first(seq); | 339 | r = rt_cache_get_first(st); |
337 | else | 340 | else |
338 | r = rt_cache_get_next(seq, v); | 341 | r = rt_cache_get_next(st, v); |
339 | ++*pos; | 342 | ++*pos; |
340 | return r; | 343 | return r; |
341 | } | 344 | } |
@@ -708,6 +711,11 @@ static void rt_check_expire(void) | |||
708 | continue; | 711 | continue; |
709 | spin_lock_bh(rt_hash_lock_addr(i)); | 712 | spin_lock_bh(rt_hash_lock_addr(i)); |
710 | while ((rth = *rthp) != NULL) { | 713 | while ((rth = *rthp) != NULL) { |
714 | if (rth->rt_genid != atomic_read(&rt_genid)) { | ||
715 | *rthp = rth->u.dst.rt_next; | ||
716 | rt_free(rth); | ||
717 | continue; | ||
718 | } | ||
711 | if (rth->u.dst.expires) { | 719 | if (rth->u.dst.expires) { |
712 | /* Entry is expired even if it is in use */ | 720 | /* Entry is expired even if it is in use */ |
713 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 721 | if (time_before_eq(jiffies, rth->u.dst.expires)) { |
@@ -732,83 +740,45 @@ static void rt_check_expire(void) | |||
732 | 740 | ||
733 | /* | 741 | /* |
734 | * rt_worker_func() is run in process context. | 742 | * rt_worker_func() is run in process context. |
735 | * If a whole flush was scheduled, it is done. | 743 | * we call rt_check_expire() to scan part of the hash table |
736 | * Else, we call rt_check_expire() to scan part of the hash table | ||
737 | */ | 744 | */ |
738 | static void rt_worker_func(struct work_struct *work) | 745 | static void rt_worker_func(struct work_struct *work) |
739 | { | 746 | { |
740 | if (ip_rt_flush_expected) { | 747 | rt_check_expire(); |
741 | ip_rt_flush_expected = 0; | ||
742 | rt_do_flush(1); | ||
743 | } else | ||
744 | rt_check_expire(); | ||
745 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | 748 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); |
746 | } | 749 | } |
747 | 750 | ||
748 | /* This can run from both BH and non-BH contexts, the latter | 751 | /* |
749 | * in the case of a forced flush event. | 752 | * Pertubation of rt_genid by a small quantity [1..256] |
753 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | ||
754 | * many times (2^24) without giving recent rt_genid. | ||
755 | * Jenkins hash is strong enough that litle changes of rt_genid are OK. | ||
750 | */ | 756 | */ |
751 | static void rt_run_flush(unsigned long process_context) | 757 | static void rt_cache_invalidate(void) |
752 | { | 758 | { |
753 | rt_deadline = 0; | 759 | unsigned char shuffle; |
754 | |||
755 | get_random_bytes(&rt_hash_rnd, 4); | ||
756 | 760 | ||
757 | rt_do_flush(process_context); | 761 | get_random_bytes(&shuffle, sizeof(shuffle)); |
762 | atomic_add(shuffle + 1U, &rt_genid); | ||
758 | } | 763 | } |
759 | 764 | ||
760 | static DEFINE_SPINLOCK(rt_flush_lock); | 765 | /* |
761 | 766 | * delay < 0 : invalidate cache (fast : entries will be deleted later) | |
767 | * delay >= 0 : invalidate & flush cache (can be long) | ||
768 | */ | ||
762 | void rt_cache_flush(int delay) | 769 | void rt_cache_flush(int delay) |
763 | { | 770 | { |
764 | unsigned long now = jiffies; | 771 | rt_cache_invalidate(); |
765 | int user_mode = !in_softirq(); | 772 | if (delay >= 0) |
766 | 773 | rt_do_flush(!in_softirq()); | |
767 | if (delay < 0) | ||
768 | delay = ip_rt_min_delay; | ||
769 | |||
770 | spin_lock_bh(&rt_flush_lock); | ||
771 | |||
772 | if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { | ||
773 | long tmo = (long)(rt_deadline - now); | ||
774 | |||
775 | /* If flush timer is already running | ||
776 | and flush request is not immediate (delay > 0): | ||
777 | |||
778 | if deadline is not achieved, prolongate timer to "delay", | ||
779 | otherwise fire it at deadline time. | ||
780 | */ | ||
781 | |||
782 | if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay) | ||
783 | tmo = 0; | ||
784 | |||
785 | if (delay > tmo) | ||
786 | delay = tmo; | ||
787 | } | ||
788 | |||
789 | if (delay <= 0) { | ||
790 | spin_unlock_bh(&rt_flush_lock); | ||
791 | rt_run_flush(user_mode); | ||
792 | return; | ||
793 | } | ||
794 | |||
795 | if (rt_deadline == 0) | ||
796 | rt_deadline = now + ip_rt_max_delay; | ||
797 | |||
798 | mod_timer(&rt_flush_timer, now+delay); | ||
799 | spin_unlock_bh(&rt_flush_lock); | ||
800 | } | 774 | } |
801 | 775 | ||
802 | /* | 776 | /* |
803 | * We change rt_hash_rnd and ask next rt_worker_func() invocation | 777 | * We change rt_genid and let gc do the cleanup |
804 | * to perform a flush in process context | ||
805 | */ | 778 | */ |
806 | static void rt_secret_rebuild(unsigned long dummy) | 779 | static void rt_secret_rebuild(unsigned long dummy) |
807 | { | 780 | { |
808 | get_random_bytes(&rt_hash_rnd, 4); | 781 | rt_cache_invalidate(); |
809 | ip_rt_flush_expected = 1; | ||
810 | cancel_delayed_work(&expires_work); | ||
811 | schedule_delayed_work(&expires_work, HZ/10); | ||
812 | mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); | 782 | mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); |
813 | } | 783 | } |
814 | 784 | ||
@@ -885,7 +855,8 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
885 | rthp = &rt_hash_table[k].chain; | 855 | rthp = &rt_hash_table[k].chain; |
886 | spin_lock_bh(rt_hash_lock_addr(k)); | 856 | spin_lock_bh(rt_hash_lock_addr(k)); |
887 | while ((rth = *rthp) != NULL) { | 857 | while ((rth = *rthp) != NULL) { |
888 | if (!rt_may_expire(rth, tmo, expire)) { | 858 | if (rth->rt_genid == atomic_read(&rt_genid) && |
859 | !rt_may_expire(rth, tmo, expire)) { | ||
889 | tmo >>= 1; | 860 | tmo >>= 1; |
890 | rthp = &rth->u.dst.rt_next; | 861 | rthp = &rth->u.dst.rt_next; |
891 | continue; | 862 | continue; |
@@ -966,6 +937,11 @@ restart: | |||
966 | 937 | ||
967 | spin_lock_bh(rt_hash_lock_addr(hash)); | 938 | spin_lock_bh(rt_hash_lock_addr(hash)); |
968 | while ((rth = *rthp) != NULL) { | 939 | while ((rth = *rthp) != NULL) { |
940 | if (rth->rt_genid != atomic_read(&rt_genid)) { | ||
941 | *rthp = rth->u.dst.rt_next; | ||
942 | rt_free(rth); | ||
943 | continue; | ||
944 | } | ||
969 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 945 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { |
970 | /* Put it first */ | 946 | /* Put it first */ |
971 | *rthp = rth->u.dst.rt_next; | 947 | *rthp = rth->u.dst.rt_next; |
@@ -1131,17 +1107,19 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1131 | 1107 | ||
1132 | static void rt_del(unsigned hash, struct rtable *rt) | 1108 | static void rt_del(unsigned hash, struct rtable *rt) |
1133 | { | 1109 | { |
1134 | struct rtable **rthp; | 1110 | struct rtable **rthp, *aux; |
1135 | 1111 | ||
1112 | rthp = &rt_hash_table[hash].chain; | ||
1136 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1113 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1137 | ip_rt_put(rt); | 1114 | ip_rt_put(rt); |
1138 | for (rthp = &rt_hash_table[hash].chain; *rthp; | 1115 | while ((aux = *rthp) != NULL) { |
1139 | rthp = &(*rthp)->u.dst.rt_next) | 1116 | if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { |
1140 | if (*rthp == rt) { | 1117 | *rthp = aux->u.dst.rt_next; |
1141 | *rthp = rt->u.dst.rt_next; | 1118 | rt_free(aux); |
1142 | rt_free(rt); | 1119 | continue; |
1143 | break; | ||
1144 | } | 1120 | } |
1121 | rthp = &aux->u.dst.rt_next; | ||
1122 | } | ||
1145 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1123 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1146 | } | 1124 | } |
1147 | 1125 | ||
@@ -1186,7 +1164,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1186 | if (rth->fl.fl4_dst != daddr || | 1164 | if (rth->fl.fl4_dst != daddr || |
1187 | rth->fl.fl4_src != skeys[i] || | 1165 | rth->fl.fl4_src != skeys[i] || |
1188 | rth->fl.oif != ikeys[k] || | 1166 | rth->fl.oif != ikeys[k] || |
1189 | rth->fl.iif != 0) { | 1167 | rth->fl.iif != 0 || |
1168 | rth->rt_genid != atomic_read(&rt_genid)) { | ||
1190 | rthp = &rth->u.dst.rt_next; | 1169 | rthp = &rth->u.dst.rt_next; |
1191 | continue; | 1170 | continue; |
1192 | } | 1171 | } |
@@ -1224,7 +1203,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1224 | rt->u.dst.neighbour = NULL; | 1203 | rt->u.dst.neighbour = NULL; |
1225 | rt->u.dst.hh = NULL; | 1204 | rt->u.dst.hh = NULL; |
1226 | rt->u.dst.xfrm = NULL; | 1205 | rt->u.dst.xfrm = NULL; |
1227 | 1206 | rt->rt_genid = atomic_read(&rt_genid); | |
1228 | rt->rt_flags |= RTCF_REDIRECTED; | 1207 | rt->rt_flags |= RTCF_REDIRECTED; |
1229 | 1208 | ||
1230 | /* Gateway is different ... */ | 1209 | /* Gateway is different ... */ |
@@ -1445,7 +1424,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1445 | rth->rt_src == iph->saddr && | 1424 | rth->rt_src == iph->saddr && |
1446 | rth->fl.iif == 0 && | 1425 | rth->fl.iif == 0 && |
1447 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && | 1426 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && |
1448 | rth->u.dst.dev->nd_net == net) { | 1427 | rth->u.dst.dev->nd_net == net && |
1428 | rth->rt_genid == atomic_read(&rt_genid)) { | ||
1449 | unsigned short mtu = new_mtu; | 1429 | unsigned short mtu = new_mtu; |
1450 | 1430 | ||
1451 | if (new_mtu < 68 || new_mtu >= old_mtu) { | 1431 | if (new_mtu < 68 || new_mtu >= old_mtu) { |
@@ -1680,8 +1660,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1680 | rth->fl.oif = 0; | 1660 | rth->fl.oif = 0; |
1681 | rth->rt_gateway = daddr; | 1661 | rth->rt_gateway = daddr; |
1682 | rth->rt_spec_dst= spec_dst; | 1662 | rth->rt_spec_dst= spec_dst; |
1683 | rth->rt_type = RTN_MULTICAST; | 1663 | rth->rt_genid = atomic_read(&rt_genid); |
1684 | rth->rt_flags = RTCF_MULTICAST; | 1664 | rth->rt_flags = RTCF_MULTICAST; |
1665 | rth->rt_type = RTN_MULTICAST; | ||
1685 | if (our) { | 1666 | if (our) { |
1686 | rth->u.dst.input= ip_local_deliver; | 1667 | rth->u.dst.input= ip_local_deliver; |
1687 | rth->rt_flags |= RTCF_LOCAL; | 1668 | rth->rt_flags |= RTCF_LOCAL; |
@@ -1820,6 +1801,7 @@ static inline int __mkroute_input(struct sk_buff *skb, | |||
1820 | 1801 | ||
1821 | rth->u.dst.input = ip_forward; | 1802 | rth->u.dst.input = ip_forward; |
1822 | rth->u.dst.output = ip_output; | 1803 | rth->u.dst.output = ip_output; |
1804 | rth->rt_genid = atomic_read(&rt_genid); | ||
1823 | 1805 | ||
1824 | rt_set_nexthop(rth, res, itag); | 1806 | rt_set_nexthop(rth, res, itag); |
1825 | 1807 | ||
@@ -1980,6 +1962,7 @@ local_input: | |||
1980 | goto e_nobufs; | 1962 | goto e_nobufs; |
1981 | 1963 | ||
1982 | rth->u.dst.output= ip_rt_bug; | 1964 | rth->u.dst.output= ip_rt_bug; |
1965 | rth->rt_genid = atomic_read(&rt_genid); | ||
1983 | 1966 | ||
1984 | atomic_set(&rth->u.dst.__refcnt, 1); | 1967 | atomic_set(&rth->u.dst.__refcnt, 1); |
1985 | rth->u.dst.flags= DST_HOST; | 1968 | rth->u.dst.flags= DST_HOST; |
@@ -2071,7 +2054,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2071 | rth->fl.oif == 0 && | 2054 | rth->fl.oif == 0 && |
2072 | rth->fl.mark == skb->mark && | 2055 | rth->fl.mark == skb->mark && |
2073 | rth->fl.fl4_tos == tos && | 2056 | rth->fl.fl4_tos == tos && |
2074 | rth->u.dst.dev->nd_net == net) { | 2057 | rth->u.dst.dev->nd_net == net && |
2058 | rth->rt_genid == atomic_read(&rt_genid)) { | ||
2075 | dst_use(&rth->u.dst, jiffies); | 2059 | dst_use(&rth->u.dst, jiffies); |
2076 | RT_CACHE_STAT_INC(in_hit); | 2060 | RT_CACHE_STAT_INC(in_hit); |
2077 | rcu_read_unlock(); | 2061 | rcu_read_unlock(); |
@@ -2199,6 +2183,7 @@ static inline int __mkroute_output(struct rtable **result, | |||
2199 | rth->rt_spec_dst= fl->fl4_src; | 2183 | rth->rt_spec_dst= fl->fl4_src; |
2200 | 2184 | ||
2201 | rth->u.dst.output=ip_output; | 2185 | rth->u.dst.output=ip_output; |
2186 | rth->rt_genid = atomic_read(&rt_genid); | ||
2202 | 2187 | ||
2203 | RT_CACHE_STAT_INC(out_slow_tot); | 2188 | RT_CACHE_STAT_INC(out_slow_tot); |
2204 | 2189 | ||
@@ -2471,7 +2456,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2471 | rth->fl.mark == flp->mark && | 2456 | rth->fl.mark == flp->mark && |
2472 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2457 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
2473 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2458 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2474 | rth->u.dst.dev->nd_net == net) { | 2459 | rth->u.dst.dev->nd_net == net && |
2460 | rth->rt_genid == atomic_read(&rt_genid)) { | ||
2475 | dst_use(&rth->u.dst, jiffies); | 2461 | dst_use(&rth->u.dst, jiffies); |
2476 | RT_CACHE_STAT_INC(out_hit); | 2462 | RT_CACHE_STAT_INC(out_hit); |
2477 | rcu_read_unlock_bh(); | 2463 | rcu_read_unlock_bh(); |
@@ -2498,6 +2484,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2498 | .check = ipv4_dst_check, | 2484 | .check = ipv4_dst_check, |
2499 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2485 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2500 | .entry_size = sizeof(struct rtable), | 2486 | .entry_size = sizeof(struct rtable), |
2487 | .entries = ATOMIC_INIT(0), | ||
2501 | }; | 2488 | }; |
2502 | 2489 | ||
2503 | 2490 | ||
@@ -2525,6 +2512,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock | |||
2525 | rt->idev = ort->idev; | 2512 | rt->idev = ort->idev; |
2526 | if (rt->idev) | 2513 | if (rt->idev) |
2527 | in_dev_hold(rt->idev); | 2514 | in_dev_hold(rt->idev); |
2515 | rt->rt_genid = atomic_read(&rt_genid); | ||
2528 | rt->rt_flags = ort->rt_flags; | 2516 | rt->rt_flags = ort->rt_flags; |
2529 | rt->rt_type = ort->rt_type; | 2517 | rt->rt_type = ort->rt_type; |
2530 | rt->rt_dst = ort->rt_dst; | 2518 | rt->rt_dst = ort->rt_dst; |
@@ -2779,6 +2767,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2779 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 2767 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { |
2780 | if (idx < s_idx) | 2768 | if (idx < s_idx) |
2781 | continue; | 2769 | continue; |
2770 | if (rt->rt_genid != atomic_read(&rt_genid)) | ||
2771 | continue; | ||
2782 | skb->dst = dst_clone(&rt->u.dst); | 2772 | skb->dst = dst_clone(&rt->u.dst); |
2783 | if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, | 2773 | if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, |
2784 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | 2774 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, |
@@ -2848,24 +2838,6 @@ ctl_table ipv4_route_table[] = { | |||
2848 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, | 2838 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, |
2849 | }, | 2839 | }, |
2850 | { | 2840 | { |
2851 | .ctl_name = NET_IPV4_ROUTE_MIN_DELAY, | ||
2852 | .procname = "min_delay", | ||
2853 | .data = &ip_rt_min_delay, | ||
2854 | .maxlen = sizeof(int), | ||
2855 | .mode = 0644, | ||
2856 | .proc_handler = &proc_dointvec_jiffies, | ||
2857 | .strategy = &sysctl_jiffies, | ||
2858 | }, | ||
2859 | { | ||
2860 | .ctl_name = NET_IPV4_ROUTE_MAX_DELAY, | ||
2861 | .procname = "max_delay", | ||
2862 | .data = &ip_rt_max_delay, | ||
2863 | .maxlen = sizeof(int), | ||
2864 | .mode = 0644, | ||
2865 | .proc_handler = &proc_dointvec_jiffies, | ||
2866 | .strategy = &sysctl_jiffies, | ||
2867 | }, | ||
2868 | { | ||
2869 | .ctl_name = NET_IPV4_ROUTE_GC_THRESH, | 2841 | .ctl_name = NET_IPV4_ROUTE_GC_THRESH, |
2870 | .procname = "gc_thresh", | 2842 | .procname = "gc_thresh", |
2871 | .data = &ipv4_dst_ops.gc_thresh, | 2843 | .data = &ipv4_dst_ops.gc_thresh, |
@@ -3023,8 +2995,8 @@ int __init ip_rt_init(void) | |||
3023 | { | 2995 | { |
3024 | int rc = 0; | 2996 | int rc = 0; |
3025 | 2997 | ||
3026 | rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ | 2998 | atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ |
3027 | (jiffies ^ (jiffies >> 7))); | 2999 | (jiffies ^ (jiffies >> 7)))); |
3028 | 3000 | ||
3029 | #ifdef CONFIG_NET_CLS_ROUTE | 3001 | #ifdef CONFIG_NET_CLS_ROUTE |
3030 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); | 3002 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); |
@@ -3057,7 +3029,6 @@ int __init ip_rt_init(void) | |||
3057 | devinet_init(); | 3029 | devinet_init(); |
3058 | ip_fib_init(); | 3030 | ip_fib_init(); |
3059 | 3031 | ||
3060 | setup_timer(&rt_flush_timer, rt_run_flush, 0); | ||
3061 | setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); | 3032 | setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); |
3062 | 3033 | ||
3063 | /* All the timers, started at system startup tend | 3034 | /* All the timers, started at system startup tend |