diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 112 |
1 files changed, 110 insertions, 2 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 46af62363b8c..94cdbc55ca7e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -91,6 +91,7 @@ | |||
91 | #include <linux/rcupdate.h> | 91 | #include <linux/rcupdate.h> |
92 | #include <linux/times.h> | 92 | #include <linux/times.h> |
93 | #include <linux/slab.h> | 93 | #include <linux/slab.h> |
94 | #include <linux/prefetch.h> | ||
94 | #include <net/dst.h> | 95 | #include <net/dst.h> |
95 | #include <net/net_namespace.h> | 96 | #include <net/net_namespace.h> |
96 | #include <net/protocol.h> | 97 | #include <net/protocol.h> |
@@ -120,6 +121,7 @@ | |||
120 | 121 | ||
121 | static int ip_rt_max_size; | 122 | static int ip_rt_max_size; |
122 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; | 123 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; |
124 | static int ip_rt_gc_interval __read_mostly = 60 * HZ; | ||
123 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; | 125 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; |
124 | static int ip_rt_redirect_number __read_mostly = 9; | 126 | static int ip_rt_redirect_number __read_mostly = 9; |
125 | static int ip_rt_redirect_load __read_mostly = HZ / 50; | 127 | static int ip_rt_redirect_load __read_mostly = HZ / 50; |
@@ -133,6 +135,9 @@ static int ip_rt_min_advmss __read_mostly = 256; | |||
133 | static int rt_chain_length_max __read_mostly = 20; | 135 | static int rt_chain_length_max __read_mostly = 20; |
134 | static int redirect_genid; | 136 | static int redirect_genid; |
135 | 137 | ||
138 | static struct delayed_work expires_work; | ||
139 | static unsigned long expires_ljiffies; | ||
140 | |||
136 | /* | 141 | /* |
137 | * Interface to generic destination cache. | 142 | * Interface to generic destination cache. |
138 | */ | 143 | */ |
@@ -830,6 +835,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
830 | return ONE; | 835 | return ONE; |
831 | } | 836 | } |
832 | 837 | ||
838 | static void rt_check_expire(void) | ||
839 | { | ||
840 | static unsigned int rover; | ||
841 | unsigned int i = rover, goal; | ||
842 | struct rtable *rth; | ||
843 | struct rtable __rcu **rthp; | ||
844 | unsigned long samples = 0; | ||
845 | unsigned long sum = 0, sum2 = 0; | ||
846 | unsigned long delta; | ||
847 | u64 mult; | ||
848 | |||
849 | delta = jiffies - expires_ljiffies; | ||
850 | expires_ljiffies = jiffies; | ||
851 | mult = ((u64)delta) << rt_hash_log; | ||
852 | if (ip_rt_gc_timeout > 1) | ||
853 | do_div(mult, ip_rt_gc_timeout); | ||
854 | goal = (unsigned int)mult; | ||
855 | if (goal > rt_hash_mask) | ||
856 | goal = rt_hash_mask + 1; | ||
857 | for (; goal > 0; goal--) { | ||
858 | unsigned long tmo = ip_rt_gc_timeout; | ||
859 | unsigned long length; | ||
860 | |||
861 | i = (i + 1) & rt_hash_mask; | ||
862 | rthp = &rt_hash_table[i].chain; | ||
863 | |||
864 | if (need_resched()) | ||
865 | cond_resched(); | ||
866 | |||
867 | samples++; | ||
868 | |||
869 | if (rcu_dereference_raw(*rthp) == NULL) | ||
870 | continue; | ||
871 | length = 0; | ||
872 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
873 | while ((rth = rcu_dereference_protected(*rthp, | ||
874 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | ||
875 | prefetch(rth->dst.rt_next); | ||
876 | if (rt_is_expired(rth)) { | ||
877 | *rthp = rth->dst.rt_next; | ||
878 | rt_free(rth); | ||
879 | continue; | ||
880 | } | ||
881 | if (rth->dst.expires) { | ||
882 | /* Entry is expired even if it is in use */ | ||
883 | if (time_before_eq(jiffies, rth->dst.expires)) { | ||
884 | nofree: | ||
885 | tmo >>= 1; | ||
886 | rthp = &rth->dst.rt_next; | ||
887 | /* | ||
888 | * We only count entries on | ||
889 | * a chain with equal hash inputs once | ||
890 | * so that entries for different QOS | ||
891 | * levels, and other non-hash input | ||
892 | * attributes don't unfairly skew | ||
893 | * the length computation | ||
894 | */ | ||
895 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
896 | continue; | ||
897 | } | ||
898 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | ||
899 | goto nofree; | ||
900 | |||
901 | /* Cleanup aged off entries. */ | ||
902 | *rthp = rth->dst.rt_next; | ||
903 | rt_free(rth); | ||
904 | } | ||
905 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
906 | sum += length; | ||
907 | sum2 += length*length; | ||
908 | } | ||
909 | if (samples) { | ||
910 | unsigned long avg = sum / samples; | ||
911 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
912 | rt_chain_length_max = max_t(unsigned long, | ||
913 | ip_rt_gc_elasticity, | ||
914 | (avg + 4*sd) >> FRACT_BITS); | ||
915 | } | ||
916 | rover = i; | ||
917 | } | ||
918 | |||
919 | /* | ||
920 | * rt_worker_func() is run in process context. | ||
921 | * we call rt_check_expire() to scan part of the hash table | ||
922 | */ | ||
923 | static void rt_worker_func(struct work_struct *work) | ||
924 | { | ||
925 | rt_check_expire(); | ||
926 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | ||
927 | } | ||
928 | |||
833 | /* | 929 | /* |
834 | * Perturbation of rt_genid by a small quantity [1..256] | 930 | * Perturbation of rt_genid by a small quantity [1..256] |
835 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 931 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
@@ -1271,7 +1367,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1271 | { | 1367 | { |
1272 | struct rtable *rt = (struct rtable *) dst; | 1368 | struct rtable *rt = (struct rtable *) dst; |
1273 | 1369 | ||
1274 | if (rt) { | 1370 | if (rt && !(rt->dst.flags & DST_NOPEER)) { |
1275 | if (rt->peer == NULL) | 1371 | if (rt->peer == NULL) |
1276 | rt_bind_peer(rt, rt->rt_dst, 1); | 1372 | rt_bind_peer(rt, rt->rt_dst, 1); |
1277 | 1373 | ||
@@ -1282,7 +1378,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1282 | iph->id = htons(inet_getid(rt->peer, more)); | 1378 | iph->id = htons(inet_getid(rt->peer, more)); |
1283 | return; | 1379 | return; |
1284 | } | 1380 | } |
1285 | } else | 1381 | } else if (!rt) |
1286 | printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", | 1382 | printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", |
1287 | __builtin_return_address(0)); | 1383 | __builtin_return_address(0)); |
1288 | 1384 | ||
@@ -3179,6 +3275,13 @@ static ctl_table ipv4_route_table[] = { | |||
3179 | .proc_handler = proc_dointvec_jiffies, | 3275 | .proc_handler = proc_dointvec_jiffies, |
3180 | }, | 3276 | }, |
3181 | { | 3277 | { |
3278 | .procname = "gc_interval", | ||
3279 | .data = &ip_rt_gc_interval, | ||
3280 | .maxlen = sizeof(int), | ||
3281 | .mode = 0644, | ||
3282 | .proc_handler = proc_dointvec_jiffies, | ||
3283 | }, | ||
3284 | { | ||
3182 | .procname = "redirect_load", | 3285 | .procname = "redirect_load", |
3183 | .data = &ip_rt_redirect_load, | 3286 | .data = &ip_rt_redirect_load, |
3184 | .maxlen = sizeof(int), | 3287 | .maxlen = sizeof(int), |
@@ -3388,6 +3491,11 @@ int __init ip_rt_init(void) | |||
3388 | devinet_init(); | 3491 | devinet_init(); |
3389 | ip_fib_init(); | 3492 | ip_fib_init(); |
3390 | 3493 | ||
3494 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
3495 | expires_ljiffies = jiffies; | ||
3496 | schedule_delayed_work(&expires_work, | ||
3497 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | ||
3498 | |||
3391 | if (ip_rt_proc_init()) | 3499 | if (ip_rt_proc_init()) |
3392 | printk(KERN_ERR "Unable to create route proc files\n"); | 3500 | printk(KERN_ERR "Unable to create route proc files\n"); |
3393 | #ifdef CONFIG_XFRM | 3501 | #ifdef CONFIG_XFRM |