diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 46af62363b8c..252c512e8a81 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -120,6 +120,7 @@ | |||
120 | 120 | ||
121 | static int ip_rt_max_size; | 121 | static int ip_rt_max_size; |
122 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; | 122 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; |
123 | static int ip_rt_gc_interval __read_mostly = 60 * HZ; | ||
123 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; | 124 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; |
124 | static int ip_rt_redirect_number __read_mostly = 9; | 125 | static int ip_rt_redirect_number __read_mostly = 9; |
125 | static int ip_rt_redirect_load __read_mostly = HZ / 50; | 126 | static int ip_rt_redirect_load __read_mostly = HZ / 50; |
@@ -133,6 +134,9 @@ static int ip_rt_min_advmss __read_mostly = 256; | |||
133 | static int rt_chain_length_max __read_mostly = 20; | 134 | static int rt_chain_length_max __read_mostly = 20; |
134 | static int redirect_genid; | 135 | static int redirect_genid; |
135 | 136 | ||
137 | static struct delayed_work expires_work; | ||
138 | static unsigned long expires_ljiffies; | ||
139 | |||
136 | /* | 140 | /* |
137 | * Interface to generic destination cache. | 141 | * Interface to generic destination cache. |
138 | */ | 142 | */ |
@@ -830,6 +834,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
830 | return ONE; | 834 | return ONE; |
831 | } | 835 | } |
832 | 836 | ||
837 | static void rt_check_expire(void) | ||
838 | { | ||
839 | static unsigned int rover; | ||
840 | unsigned int i = rover, goal; | ||
841 | struct rtable *rth; | ||
842 | struct rtable __rcu **rthp; | ||
843 | unsigned long samples = 0; | ||
844 | unsigned long sum = 0, sum2 = 0; | ||
845 | unsigned long delta; | ||
846 | u64 mult; | ||
847 | |||
848 | delta = jiffies - expires_ljiffies; | ||
849 | expires_ljiffies = jiffies; | ||
850 | mult = ((u64)delta) << rt_hash_log; | ||
851 | if (ip_rt_gc_timeout > 1) | ||
852 | do_div(mult, ip_rt_gc_timeout); | ||
853 | goal = (unsigned int)mult; | ||
854 | if (goal > rt_hash_mask) | ||
855 | goal = rt_hash_mask + 1; | ||
856 | for (; goal > 0; goal--) { | ||
857 | unsigned long tmo = ip_rt_gc_timeout; | ||
858 | unsigned long length; | ||
859 | |||
860 | i = (i + 1) & rt_hash_mask; | ||
861 | rthp = &rt_hash_table[i].chain; | ||
862 | |||
863 | if (need_resched()) | ||
864 | cond_resched(); | ||
865 | |||
866 | samples++; | ||
867 | |||
868 | if (rcu_dereference_raw(*rthp) == NULL) | ||
869 | continue; | ||
870 | length = 0; | ||
871 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
872 | while ((rth = rcu_dereference_protected(*rthp, | ||
873 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | ||
874 | prefetch(rth->dst.rt_next); | ||
875 | if (rt_is_expired(rth)) { | ||
876 | *rthp = rth->dst.rt_next; | ||
877 | rt_free(rth); | ||
878 | continue; | ||
879 | } | ||
880 | if (rth->dst.expires) { | ||
881 | /* Entry is expired even if it is in use */ | ||
882 | if (time_before_eq(jiffies, rth->dst.expires)) { | ||
883 | nofree: | ||
884 | tmo >>= 1; | ||
885 | rthp = &rth->dst.rt_next; | ||
886 | /* | ||
887 | * We only count entries on | ||
888 | * a chain with equal hash inputs once | ||
889 | * so that entries for different QOS | ||
890 | * levels, and other non-hash input | ||
891 | * attributes don't unfairly skew | ||
892 | * the length computation | ||
893 | */ | ||
894 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
895 | continue; | ||
896 | } | ||
897 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | ||
898 | goto nofree; | ||
899 | |||
900 | /* Cleanup aged off entries. */ | ||
901 | *rthp = rth->dst.rt_next; | ||
902 | rt_free(rth); | ||
903 | } | ||
904 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
905 | sum += length; | ||
906 | sum2 += length*length; | ||
907 | } | ||
908 | if (samples) { | ||
909 | unsigned long avg = sum / samples; | ||
910 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
911 | rt_chain_length_max = max_t(unsigned long, | ||
912 | ip_rt_gc_elasticity, | ||
913 | (avg + 4*sd) >> FRACT_BITS); | ||
914 | } | ||
915 | rover = i; | ||
916 | } | ||
917 | |||
918 | /* | ||
919 | * rt_worker_func() is run in process context. | ||
920 | * we call rt_check_expire() to scan part of the hash table | ||
921 | */ | ||
922 | static void rt_worker_func(struct work_struct *work) | ||
923 | { | ||
924 | rt_check_expire(); | ||
925 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | ||
926 | } | ||
927 | |||
833 | /* | 928 | /* |
834 | * Perturbation of rt_genid by a small quantity [1..256] | 929 | * Perturbation of rt_genid by a small quantity [1..256] |
835 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 930 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
@@ -3179,6 +3274,13 @@ static ctl_table ipv4_route_table[] = { | |||
3179 | .proc_handler = proc_dointvec_jiffies, | 3274 | .proc_handler = proc_dointvec_jiffies, |
3180 | }, | 3275 | }, |
3181 | { | 3276 | { |
3277 | .procname = "gc_interval", | ||
3278 | .data = &ip_rt_gc_interval, | ||
3279 | .maxlen = sizeof(int), | ||
3280 | .mode = 0644, | ||
3281 | .proc_handler = proc_dointvec_jiffies, | ||
3282 | }, | ||
3283 | { | ||
3182 | .procname = "redirect_load", | 3284 | .procname = "redirect_load", |
3183 | .data = &ip_rt_redirect_load, | 3285 | .data = &ip_rt_redirect_load, |
3184 | .maxlen = sizeof(int), | 3286 | .maxlen = sizeof(int), |
@@ -3388,6 +3490,11 @@ int __init ip_rt_init(void) | |||
3388 | devinet_init(); | 3490 | devinet_init(); |
3389 | ip_fib_init(); | 3491 | ip_fib_init(); |
3390 | 3492 | ||
3493 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
3494 | expires_ljiffies = jiffies; | ||
3495 | schedule_delayed_work(&expires_work, | ||
3496 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | ||
3497 | |||
3391 | if (ip_rt_proc_init()) | 3498 | if (ip_rt_proc_init()) |
3392 | printk(KERN_ERR "Unable to create route proc files\n"); | 3499 | printk(KERN_ERR "Unable to create route proc files\n"); |
3393 | #ifdef CONFIG_XFRM | 3500 | #ifdef CONFIG_XFRM |