diff options
author | David S. Miller <davem@davemloft.net> | 2010-12-20 00:11:20 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-12-20 13:37:19 -0500 |
commit | 6561a3b12d62ed5317e6ac32182d87a03f62c8dc (patch) | |
tree | 2b7318c3532a79dff8912ca4fdcd5d90aa0d8b39 | |
parent | 782615aea84e57dc7f2f922cea823df3de635a78 (diff) |
ipv4: Flush per-ns routing cache more sanely.
Flush the routing cache only of entries that match the
network namespace in which the purge event occurred.
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
-rw-r--r-- | include/net/route.h | 2 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 6 | ||||
-rw-r--r-- | net/ipv4/route.c | 64 |
3 files changed, 30 insertions, 42 deletions
diff --git a/include/net/route.h b/include/net/route.h index 27002362944a..93e10c453f6b 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -114,7 +114,7 @@ extern int ip_rt_init(void); | |||
114 | extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, | 114 | extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, |
115 | __be32 src, struct net_device *dev); | 115 | __be32 src, struct net_device *dev); |
116 | extern void rt_cache_flush(struct net *net, int how); | 116 | extern void rt_cache_flush(struct net *net, int how); |
117 | extern void rt_cache_flush_batch(void); | 117 | extern void rt_cache_flush_batch(struct net *net); |
118 | extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp); | 118 | extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp); |
119 | extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); | 119 | extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); |
120 | extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); | 120 | extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d3a1112b9d9c..9f8bb68911e4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -987,7 +987,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
987 | rt_cache_flush(dev_net(dev), 0); | 987 | rt_cache_flush(dev_net(dev), 0); |
988 | break; | 988 | break; |
989 | case NETDEV_UNREGISTER_BATCH: | 989 | case NETDEV_UNREGISTER_BATCH: |
990 | rt_cache_flush_batch(); | 990 | /* The batch unregister is only called on the first |
991 | * device in the list of devices being unregistered. | ||
992 | * Therefore we should not pass dev_net(dev) in here. | ||
993 | */ | ||
994 | rt_cache_flush_batch(NULL); | ||
991 | break; | 995 | break; |
992 | } | 996 | } |
993 | return NOTIFY_DONE; | 997 | return NOTIFY_DONE; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ae520963540f..d8b4f4d0d66e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -717,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth) | |||
717 | * Can be called by a softirq or a process. | 717 | * Can be called by a softirq or a process. |
718 | * In the later case, we want to be reschedule if necessary | 718 | * In the later case, we want to be reschedule if necessary |
719 | */ | 719 | */ |
720 | static void rt_do_flush(int process_context) | 720 | static void rt_do_flush(struct net *net, int process_context) |
721 | { | 721 | { |
722 | unsigned int i; | 722 | unsigned int i; |
723 | struct rtable *rth, *next; | 723 | struct rtable *rth, *next; |
724 | struct rtable * tail; | ||
725 | 724 | ||
726 | for (i = 0; i <= rt_hash_mask; i++) { | 725 | for (i = 0; i <= rt_hash_mask; i++) { |
726 | struct rtable __rcu **pprev; | ||
727 | struct rtable *list; | ||
728 | |||
727 | if (process_context && need_resched()) | 729 | if (process_context && need_resched()) |
728 | cond_resched(); | 730 | cond_resched(); |
729 | rth = rcu_dereference_raw(rt_hash_table[i].chain); | 731 | rth = rcu_dereference_raw(rt_hash_table[i].chain); |
@@ -731,50 +733,32 @@ static void rt_do_flush(int process_context) | |||
731 | continue; | 733 | continue; |
732 | 734 | ||
733 | spin_lock_bh(rt_hash_lock_addr(i)); | 735 | spin_lock_bh(rt_hash_lock_addr(i)); |
734 | #ifdef CONFIG_NET_NS | ||
735 | { | ||
736 | struct rtable __rcu **prev; | ||
737 | struct rtable *p; | ||
738 | 736 | ||
739 | rth = rcu_dereference_protected(rt_hash_table[i].chain, | 737 | list = NULL; |
738 | pprev = &rt_hash_table[i].chain; | ||
739 | rth = rcu_dereference_protected(*pprev, | ||
740 | lockdep_is_held(rt_hash_lock_addr(i))); | 740 | lockdep_is_held(rt_hash_lock_addr(i))); |
741 | 741 | ||
742 | /* defer releasing the head of the list after spin_unlock */ | 742 | while (rth) { |
743 | for (tail = rth; tail; | 743 | next = rcu_dereference_protected(rth->dst.rt_next, |
744 | tail = rcu_dereference_protected(tail->dst.rt_next, | ||
745 | lockdep_is_held(rt_hash_lock_addr(i)))) | ||
746 | if (!rt_is_expired(tail)) | ||
747 | break; | ||
748 | if (rth != tail) | ||
749 | rt_hash_table[i].chain = tail; | ||
750 | |||
751 | /* call rt_free on entries after the tail requiring flush */ | ||
752 | prev = &rt_hash_table[i].chain; | ||
753 | for (p = rcu_dereference_protected(*prev, | ||
754 | lockdep_is_held(rt_hash_lock_addr(i))); | 744 | lockdep_is_held(rt_hash_lock_addr(i))); |
755 | p != NULL; | 745 | |
756 | p = next) { | 746 | if (!net || |
757 | next = rcu_dereference_protected(p->dst.rt_next, | 747 | net_eq(dev_net(rth->dst.dev), net)) { |
758 | lockdep_is_held(rt_hash_lock_addr(i))); | 748 | rcu_assign_pointer(*pprev, next); |
759 | if (!rt_is_expired(p)) { | 749 | rcu_assign_pointer(rth->dst.rt_next, list); |
760 | prev = &p->dst.rt_next; | 750 | list = rth; |
761 | } else { | 751 | } else { |
762 | *prev = next; | 752 | pprev = &rth->dst.rt_next; |
763 | rt_free(p); | ||
764 | } | 753 | } |
754 | rth = next; | ||
765 | } | 755 | } |
766 | } | 756 | |
767 | #else | ||
768 | rth = rcu_dereference_protected(rt_hash_table[i].chain, | ||
769 | lockdep_is_held(rt_hash_lock_addr(i))); | ||
770 | rcu_assign_pointer(rt_hash_table[i].chain, NULL); | ||
771 | tail = NULL; | ||
772 | #endif | ||
773 | spin_unlock_bh(rt_hash_lock_addr(i)); | 757 | spin_unlock_bh(rt_hash_lock_addr(i)); |
774 | 758 | ||
775 | for (; rth != tail; rth = next) { | 759 | for (; list; list = next) { |
776 | next = rcu_dereference_protected(rth->dst.rt_next, 1); | 760 | next = rcu_dereference_protected(list->dst.rt_next, 1); |
777 | rt_free(rth); | 761 | rt_free(list); |
778 | } | 762 | } |
779 | } | 763 | } |
780 | } | 764 | } |
@@ -922,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay) | |||
922 | { | 906 | { |
923 | rt_cache_invalidate(net); | 907 | rt_cache_invalidate(net); |
924 | if (delay >= 0) | 908 | if (delay >= 0) |
925 | rt_do_flush(!in_softirq()); | 909 | rt_do_flush(net, !in_softirq()); |
926 | } | 910 | } |
927 | 911 | ||
928 | /* Flush previous cache invalidated entries from the cache */ | 912 | /* Flush previous cache invalidated entries from the cache */ |
929 | void rt_cache_flush_batch(void) | 913 | void rt_cache_flush_batch(struct net *net) |
930 | { | 914 | { |
931 | rt_do_flush(!in_softirq()); | 915 | rt_do_flush(net, !in_softirq()); |
932 | } | 916 | } |
933 | 917 | ||
934 | static void rt_emergency_hash_rebuild(struct net *net) | 918 | static void rt_emergency_hash_rebuild(struct net *net) |