aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c112
1 files changed, 110 insertions, 2 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 46af62363b8c..94cdbc55ca7e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -91,6 +91,7 @@
91#include <linux/rcupdate.h> 91#include <linux/rcupdate.h>
92#include <linux/times.h> 92#include <linux/times.h>
93#include <linux/slab.h> 93#include <linux/slab.h>
94#include <linux/prefetch.h>
94#include <net/dst.h> 95#include <net/dst.h>
95#include <net/net_namespace.h> 96#include <net/net_namespace.h>
96#include <net/protocol.h> 97#include <net/protocol.h>
@@ -120,6 +121,7 @@
120 121
121static int ip_rt_max_size; 122static int ip_rt_max_size;
122static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; 123static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
124static int ip_rt_gc_interval __read_mostly = 60 * HZ;
123static int ip_rt_gc_min_interval __read_mostly = HZ / 2; 125static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
124static int ip_rt_redirect_number __read_mostly = 9; 126static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load __read_mostly = HZ / 50; 127static int ip_rt_redirect_load __read_mostly = HZ / 50;
@@ -133,6 +135,9 @@ static int ip_rt_min_advmss __read_mostly = 256;
133static int rt_chain_length_max __read_mostly = 20; 135static int rt_chain_length_max __read_mostly = 20;
134static int redirect_genid; 136static int redirect_genid;
135 137
138static struct delayed_work expires_work;
139static unsigned long expires_ljiffies;
140
136/* 141/*
137 * Interface to generic destination cache. 142 * Interface to generic destination cache.
138 */ 143 */
@@ -830,6 +835,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
830 return ONE; 835 return ONE;
831} 836}
832 837
838static void rt_check_expire(void)
839{
840 static unsigned int rover;
841 unsigned int i = rover, goal;
842 struct rtable *rth;
843 struct rtable __rcu **rthp;
844 unsigned long samples = 0;
845 unsigned long sum = 0, sum2 = 0;
846 unsigned long delta;
847 u64 mult;
848
849 delta = jiffies - expires_ljiffies;
850 expires_ljiffies = jiffies;
851 mult = ((u64)delta) << rt_hash_log;
852 if (ip_rt_gc_timeout > 1)
853 do_div(mult, ip_rt_gc_timeout);
854 goal = (unsigned int)mult;
855 if (goal > rt_hash_mask)
856 goal = rt_hash_mask + 1;
857 for (; goal > 0; goal--) {
858 unsigned long tmo = ip_rt_gc_timeout;
859 unsigned long length;
860
861 i = (i + 1) & rt_hash_mask;
862 rthp = &rt_hash_table[i].chain;
863
864 if (need_resched())
865 cond_resched();
866
867 samples++;
868
869 if (rcu_dereference_raw(*rthp) == NULL)
870 continue;
871 length = 0;
872 spin_lock_bh(rt_hash_lock_addr(i));
873 while ((rth = rcu_dereference_protected(*rthp,
874 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
875 prefetch(rth->dst.rt_next);
876 if (rt_is_expired(rth)) {
877 *rthp = rth->dst.rt_next;
878 rt_free(rth);
879 continue;
880 }
881 if (rth->dst.expires) {
882 /* Entry is expired even if it is in use */
883 if (time_before_eq(jiffies, rth->dst.expires)) {
884nofree:
885 tmo >>= 1;
886 rthp = &rth->dst.rt_next;
887 /*
888 * We only count entries on
889 * a chain with equal hash inputs once
890 * so that entries for different QOS
891 * levels, and other non-hash input
892 * attributes don't unfairly skew
893 * the length computation
894 */
895 length += has_noalias(rt_hash_table[i].chain, rth);
896 continue;
897 }
898 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
899 goto nofree;
900
901 /* Cleanup aged off entries. */
902 *rthp = rth->dst.rt_next;
903 rt_free(rth);
904 }
905 spin_unlock_bh(rt_hash_lock_addr(i));
906 sum += length;
907 sum2 += length*length;
908 }
909 if (samples) {
910 unsigned long avg = sum / samples;
911 unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
912 rt_chain_length_max = max_t(unsigned long,
913 ip_rt_gc_elasticity,
914 (avg + 4*sd) >> FRACT_BITS);
915 }
916 rover = i;
917}
918
919/*
920 * rt_worker_func() is run in process context.
921 * we call rt_check_expire() to scan part of the hash table
922 */
923static void rt_worker_func(struct work_struct *work)
924{
925 rt_check_expire();
926 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
927}
928
833/* 929/*
834 * Perturbation of rt_genid by a small quantity [1..256] 930 * Perturbation of rt_genid by a small quantity [1..256]
835 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() 931 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
@@ -1271,7 +1367,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1271{ 1367{
1272 struct rtable *rt = (struct rtable *) dst; 1368 struct rtable *rt = (struct rtable *) dst;
1273 1369
1274 if (rt) { 1370 if (rt && !(rt->dst.flags & DST_NOPEER)) {
1275 if (rt->peer == NULL) 1371 if (rt->peer == NULL)
1276 rt_bind_peer(rt, rt->rt_dst, 1); 1372 rt_bind_peer(rt, rt->rt_dst, 1);
1277 1373
@@ -1282,7 +1378,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1282 iph->id = htons(inet_getid(rt->peer, more)); 1378 iph->id = htons(inet_getid(rt->peer, more));
1283 return; 1379 return;
1284 } 1380 }
1285 } else 1381 } else if (!rt)
1286 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", 1382 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
1287 __builtin_return_address(0)); 1383 __builtin_return_address(0));
1288 1384
@@ -3179,6 +3275,13 @@ static ctl_table ipv4_route_table[] = {
3179 .proc_handler = proc_dointvec_jiffies, 3275 .proc_handler = proc_dointvec_jiffies,
3180 }, 3276 },
3181 { 3277 {
3278 .procname = "gc_interval",
3279 .data = &ip_rt_gc_interval,
3280 .maxlen = sizeof(int),
3281 .mode = 0644,
3282 .proc_handler = proc_dointvec_jiffies,
3283 },
3284 {
3182 .procname = "redirect_load", 3285 .procname = "redirect_load",
3183 .data = &ip_rt_redirect_load, 3286 .data = &ip_rt_redirect_load,
3184 .maxlen = sizeof(int), 3287 .maxlen = sizeof(int),
@@ -3388,6 +3491,11 @@ int __init ip_rt_init(void)
3388 devinet_init(); 3491 devinet_init();
3389 ip_fib_init(); 3492 ip_fib_init();
3390 3493
3494 INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3495 expires_ljiffies = jiffies;
3496 schedule_delayed_work(&expires_work,
3497 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3498
3391 if (ip_rt_proc_init()) 3499 if (ip_rt_proc_init())
3392 printk(KERN_ERR "Unable to create route proc files\n"); 3500 printk(KERN_ERR "Unable to create route proc files\n");
3393#ifdef CONFIG_XFRM 3501#ifdef CONFIG_XFRM