aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-12-21 15:47:16 -0500
committerDavid S. Miller <davem@davemloft.net>2011-12-21 15:47:16 -0500
commit9f28a2fc0bd77511f649c0a788c7bf9a5fd04edb (patch)
treeea154ab2f82d422b1d6ed835c89acdb16b47d4f7 /net/ipv4/route.c
parentb4949b84567f3ae1227d076fc95bbd8efea06506 (diff)
ipv4: reintroduce route cache garbage collector
Commit 2c8cec5c10b (ipv4: Cache learned PMTU information in inetpeer) removed IP route cache garbage collector a bit too soon, as this gc was responsible for expired routes cleanup, releasing their neighbour reference. As pointed out by Robert Gladewitz, recent kernels can fill and exhaust their neighbour cache. Reintroduce the garbage collection, since we'll have to wait our neighbour lookups become refcount-less to not depend on this stuff. Reported-by: Robert Gladewitz <gladewitz@gmx.de> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c107
1 files changed, 107 insertions, 0 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 46af62363b8c..252c512e8a81 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -120,6 +120,7 @@
120 120
121static int ip_rt_max_size; 121static int ip_rt_max_size;
122static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; 122static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
123static int ip_rt_gc_interval __read_mostly = 60 * HZ;
123static int ip_rt_gc_min_interval __read_mostly = HZ / 2; 124static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
124static int ip_rt_redirect_number __read_mostly = 9; 125static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load __read_mostly = HZ / 50; 126static int ip_rt_redirect_load __read_mostly = HZ / 50;
@@ -133,6 +134,9 @@ static int ip_rt_min_advmss __read_mostly = 256;
133static int rt_chain_length_max __read_mostly = 20; 134static int rt_chain_length_max __read_mostly = 20;
134static int redirect_genid; 135static int redirect_genid;
135 136
137static struct delayed_work expires_work;
138static unsigned long expires_ljiffies;
139
136/* 140/*
137 * Interface to generic destination cache. 141 * Interface to generic destination cache.
138 */ 142 */
@@ -830,6 +834,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
830 return ONE; 834 return ONE;
831} 835}
832 836
837static void rt_check_expire(void)
838{
839 static unsigned int rover;
840 unsigned int i = rover, goal;
841 struct rtable *rth;
842 struct rtable __rcu **rthp;
843 unsigned long samples = 0;
844 unsigned long sum = 0, sum2 = 0;
845 unsigned long delta;
846 u64 mult;
847
848 delta = jiffies - expires_ljiffies;
849 expires_ljiffies = jiffies;
850 mult = ((u64)delta) << rt_hash_log;
851 if (ip_rt_gc_timeout > 1)
852 do_div(mult, ip_rt_gc_timeout);
853 goal = (unsigned int)mult;
854 if (goal > rt_hash_mask)
855 goal = rt_hash_mask + 1;
856 for (; goal > 0; goal--) {
857 unsigned long tmo = ip_rt_gc_timeout;
858 unsigned long length;
859
860 i = (i + 1) & rt_hash_mask;
861 rthp = &rt_hash_table[i].chain;
862
863 if (need_resched())
864 cond_resched();
865
866 samples++;
867
868 if (rcu_dereference_raw(*rthp) == NULL)
869 continue;
870 length = 0;
871 spin_lock_bh(rt_hash_lock_addr(i));
872 while ((rth = rcu_dereference_protected(*rthp,
873 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
874 prefetch(rth->dst.rt_next);
875 if (rt_is_expired(rth)) {
876 *rthp = rth->dst.rt_next;
877 rt_free(rth);
878 continue;
879 }
880 if (rth->dst.expires) {
881 /* Entry is expired even if it is in use */
882 if (time_before_eq(jiffies, rth->dst.expires)) {
883nofree:
884 tmo >>= 1;
885 rthp = &rth->dst.rt_next;
886 /*
887 * We only count entries on
888 * a chain with equal hash inputs once
889 * so that entries for different QOS
890 * levels, and other non-hash input
891 * attributes don't unfairly skew
892 * the length computation
893 */
894 length += has_noalias(rt_hash_table[i].chain, rth);
895 continue;
896 }
897 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
898 goto nofree;
899
900 /* Cleanup aged off entries. */
901 *rthp = rth->dst.rt_next;
902 rt_free(rth);
903 }
904 spin_unlock_bh(rt_hash_lock_addr(i));
905 sum += length;
906 sum2 += length*length;
907 }
908 if (samples) {
909 unsigned long avg = sum / samples;
910 unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
911 rt_chain_length_max = max_t(unsigned long,
912 ip_rt_gc_elasticity,
913 (avg + 4*sd) >> FRACT_BITS);
914 }
915 rover = i;
916}
917
918/*
919 * rt_worker_func() is run in process context.
920 * we call rt_check_expire() to scan part of the hash table
921 */
922static void rt_worker_func(struct work_struct *work)
923{
924 rt_check_expire();
925 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
926}
927
833/* 928/*
834 * Perturbation of rt_genid by a small quantity [1..256] 929 * Perturbation of rt_genid by a small quantity [1..256]
835 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() 930 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
@@ -3179,6 +3274,13 @@ static ctl_table ipv4_route_table[] = {
3179 .proc_handler = proc_dointvec_jiffies, 3274 .proc_handler = proc_dointvec_jiffies,
3180 }, 3275 },
3181 { 3276 {
3277 .procname = "gc_interval",
3278 .data = &ip_rt_gc_interval,
3279 .maxlen = sizeof(int),
3280 .mode = 0644,
3281 .proc_handler = proc_dointvec_jiffies,
3282 },
3283 {
3182 .procname = "redirect_load", 3284 .procname = "redirect_load",
3183 .data = &ip_rt_redirect_load, 3285 .data = &ip_rt_redirect_load,
3184 .maxlen = sizeof(int), 3286 .maxlen = sizeof(int),
@@ -3388,6 +3490,11 @@ int __init ip_rt_init(void)
3388 devinet_init(); 3490 devinet_init();
3389 ip_fib_init(); 3491 ip_fib_init();
3390 3492
3493 INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3494 expires_ljiffies = jiffies;
3495 schedule_delayed_work(&expires_work,
3496 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3497
3391 if (ip_rt_proc_init()) 3498 if (ip_rt_proc_init())
3392 printk(KERN_ERR "Unable to create route proc files\n"); 3499 printk(KERN_ERR "Unable to create route proc files\n");
3393#ifdef CONFIG_XFRM 3500#ifdef CONFIG_XFRM