diff options
author | Eric Dumazet <dada1@cosmosbay.com> | 2007-09-15 13:55:54 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-10-10 19:49:25 -0400 |
commit | 39c90ece7565f5c47110c2fa77409d7a9478bd5b (patch) | |
tree | 220bf734ed470024901226675550501d45192f0e | |
parent | dac24ab396fc92985060d5cb3c467d2d0ffc0c20 (diff) |
[IPV4]: Convert rt_check_expire() from softirq processing to workqueue.
On loaded/big hosts, rt_check_expire() if of litle use, because it
generally breaks out of its main loop because of a jiffies change.
It can take a long time (read : timer invocations) to actually
scan the whole hash table, freeing unused entries.
Converting it to use a workqueue instead of softirq is a nice
move because we can allow rt_check_expire() to do the scan
it is supposed to do, without hogging the CPU.
This has an impact on the average number of entries in cache,
reducing ram usage. Cache is more responsive to parameter
changes (/proc/sys/net/ipv4/route/gc_timeout and
/proc/sys/net/ipv4/route/gc_interval)
Note: Maybe the default value of gc_interval (60 seconds)
is too high, since this means we actually need 5 (300/60)
invocations to scan the whole table.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/route.c | 30 |
1 files changed, 12 insertions, 18 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 396c631166a4..006d6058a806 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -81,6 +81,7 @@ | |||
81 | #include <linux/netdevice.h> | 81 | #include <linux/netdevice.h> |
82 | #include <linux/proc_fs.h> | 82 | #include <linux/proc_fs.h> |
83 | #include <linux/init.h> | 83 | #include <linux/init.h> |
84 | #include <linux/workqueue.h> | ||
84 | #include <linux/skbuff.h> | 85 | #include <linux/skbuff.h> |
85 | #include <linux/inetdevice.h> | 86 | #include <linux/inetdevice.h> |
86 | #include <linux/igmp.h> | 87 | #include <linux/igmp.h> |
@@ -136,7 +137,8 @@ static unsigned long rt_deadline; | |||
136 | #define RTprint(a...) printk(KERN_DEBUG a) | 137 | #define RTprint(a...) printk(KERN_DEBUG a) |
137 | 138 | ||
138 | static struct timer_list rt_flush_timer; | 139 | static struct timer_list rt_flush_timer; |
139 | static struct timer_list rt_periodic_timer; | 140 | static void rt_check_expire(struct work_struct *work); |
141 | static DECLARE_DELAYED_WORK(expires_work, rt_check_expire); | ||
140 | static struct timer_list rt_secret_timer; | 142 | static struct timer_list rt_secret_timer; |
141 | 143 | ||
142 | /* | 144 | /* |
@@ -572,20 +574,19 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | |||
572 | (fl1->iif ^ fl2->iif)) == 0; | 574 | (fl1->iif ^ fl2->iif)) == 0; |
573 | } | 575 | } |
574 | 576 | ||
575 | /* This runs via a timer and thus is always in BH context. */ | 577 | static void rt_check_expire(struct work_struct *work) |
576 | static void rt_check_expire(unsigned long dummy) | ||
577 | { | 578 | { |
578 | static unsigned int rover; | 579 | static unsigned int rover; |
579 | unsigned int i = rover, goal; | 580 | unsigned int i = rover, goal; |
580 | struct rtable *rth, **rthp; | 581 | struct rtable *rth, **rthp; |
581 | unsigned long now = jiffies; | ||
582 | u64 mult; | 582 | u64 mult; |
583 | 583 | ||
584 | mult = ((u64)ip_rt_gc_interval) << rt_hash_log; | 584 | mult = ((u64)ip_rt_gc_interval) << rt_hash_log; |
585 | if (ip_rt_gc_timeout > 1) | 585 | if (ip_rt_gc_timeout > 1) |
586 | do_div(mult, ip_rt_gc_timeout); | 586 | do_div(mult, ip_rt_gc_timeout); |
587 | goal = (unsigned int)mult; | 587 | goal = (unsigned int)mult; |
588 | if (goal > rt_hash_mask) goal = rt_hash_mask + 1; | 588 | if (goal > rt_hash_mask) |
589 | goal = rt_hash_mask + 1; | ||
589 | for (; goal > 0; goal--) { | 590 | for (; goal > 0; goal--) { |
590 | unsigned long tmo = ip_rt_gc_timeout; | 591 | unsigned long tmo = ip_rt_gc_timeout; |
591 | 592 | ||
@@ -594,11 +595,11 @@ static void rt_check_expire(unsigned long dummy) | |||
594 | 595 | ||
595 | if (*rthp == 0) | 596 | if (*rthp == 0) |
596 | continue; | 597 | continue; |
597 | spin_lock(rt_hash_lock_addr(i)); | 598 | spin_lock_bh(rt_hash_lock_addr(i)); |
598 | while ((rth = *rthp) != NULL) { | 599 | while ((rth = *rthp) != NULL) { |
599 | if (rth->u.dst.expires) { | 600 | if (rth->u.dst.expires) { |
600 | /* Entry is expired even if it is in use */ | 601 | /* Entry is expired even if it is in use */ |
601 | if (time_before_eq(now, rth->u.dst.expires)) { | 602 | if (time_before_eq(jiffies, rth->u.dst.expires)) { |
602 | tmo >>= 1; | 603 | tmo >>= 1; |
603 | rthp = &rth->u.dst.rt_next; | 604 | rthp = &rth->u.dst.rt_next; |
604 | continue; | 605 | continue; |
@@ -613,14 +614,10 @@ static void rt_check_expire(unsigned long dummy) | |||
613 | *rthp = rth->u.dst.rt_next; | 614 | *rthp = rth->u.dst.rt_next; |
614 | rt_free(rth); | 615 | rt_free(rth); |
615 | } | 616 | } |
616 | spin_unlock(rt_hash_lock_addr(i)); | 617 | spin_unlock_bh(rt_hash_lock_addr(i)); |
617 | |||
618 | /* Fallback loop breaker. */ | ||
619 | if (time_after(jiffies, now)) | ||
620 | break; | ||
621 | } | 618 | } |
622 | rover = i; | 619 | rover = i; |
623 | mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval); | 620 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); |
624 | } | 621 | } |
625 | 622 | ||
626 | /* This can run from both BH and non-BH contexts, the latter | 623 | /* This can run from both BH and non-BH contexts, the latter |
@@ -2993,17 +2990,14 @@ int __init ip_rt_init(void) | |||
2993 | 2990 | ||
2994 | init_timer(&rt_flush_timer); | 2991 | init_timer(&rt_flush_timer); |
2995 | rt_flush_timer.function = rt_run_flush; | 2992 | rt_flush_timer.function = rt_run_flush; |
2996 | init_timer(&rt_periodic_timer); | ||
2997 | rt_periodic_timer.function = rt_check_expire; | ||
2998 | init_timer(&rt_secret_timer); | 2993 | init_timer(&rt_secret_timer); |
2999 | rt_secret_timer.function = rt_secret_rebuild; | 2994 | rt_secret_timer.function = rt_secret_rebuild; |
3000 | 2995 | ||
3001 | /* All the timers, started at system startup tend | 2996 | /* All the timers, started at system startup tend |
3002 | to synchronize. Perturb it a bit. | 2997 | to synchronize. Perturb it a bit. |
3003 | */ | 2998 | */ |
3004 | rt_periodic_timer.expires = jiffies + net_random() % ip_rt_gc_interval + | 2999 | schedule_delayed_work(&expires_work, |
3005 | ip_rt_gc_interval; | 3000 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); |
3006 | add_timer(&rt_periodic_timer); | ||
3007 | 3001 | ||
3008 | rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + | 3002 | rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + |
3009 | ip_rt_secret_interval; | 3003 | ip_rt_secret_interval; |