aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-03-07 22:20:00 -0500
committerDavid S. Miller <davem@davemloft.net>2010-03-08 13:45:31 -0500
commit9837638727488922727b0cfd438039fa73364183 (patch)
tree608b2cf6415553a06dfc09b6981f816b285622c8 /net/ipv4
parent1515faf2f995add976d4428bbc1583a4a0c81e5f (diff)
net: fix route cache rebuilds
We added an automatic route cache rebuilding in commit 1080d709fb9d8cd43 but had to correct few bugs. One of the assumption of original patch, was that entries where kept sorted in a given way. This assumption is known to be wrong (commit 1ddbcb005c395518 gave an explanation of this and corrected a leak) and expensive to respect. Paweł Staszewski reported to me one of his machine got its routing cache disabled after few messages like : [ 2677.850065] Route hash chain too long! [ 2677.850080] Adjust your secret_interval! [82839.662993] Route hash chain too long! [82839.662996] Adjust your secret_interval! [155843.731650] Route hash chain too long! [155843.731664] Adjust your secret_interval! [155843.811881] Route hash chain too long! [155843.811891] Adjust your secret_interval! [155843.858209] vlan0811: 5 rebuilds is over limit, route caching disabled [155843.858212] Route hash chain too long! [155843.858213] Adjust your secret_interval! This is because rt_intern_hash() might be fooled when computing a chain length, because multiple entries with same keys can differ because of TOS (or mark/oif) bits. In the rare case the fast algorithm see a too long chain, and before taking expensive path, we call a helper function in order to not count duplicates of same routes, that only differ with tos/mark/oif bits. This helper works with data already in cpu cache and is not be very expensive, despite its O(N^2) implementation. Paweł Staszewski sucessfully tested this patch on his loaded router. Reported-and-tested-by: Paweł Staszewski <pstaszewski@itcare.pl> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/route.c50
1 files changed, 38 insertions, 12 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b2ba5581d2ae..d9b40248b97f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
146static void ipv4_link_failure(struct sk_buff *skb); 146static void ipv4_link_failure(struct sk_buff *skb);
147static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 147static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
148static int rt_garbage_collect(struct dst_ops *ops); 148static int rt_garbage_collect(struct dst_ops *ops);
149static void rt_emergency_hash_rebuild(struct net *net);
150 149
151 150
152static struct dst_ops ipv4_dst_ops = { 151static struct dst_ops ipv4_dst_ops = {
@@ -780,11 +779,30 @@ static void rt_do_flush(int process_context)
780#define FRACT_BITS 3 779#define FRACT_BITS 3
781#define ONE (1UL << FRACT_BITS) 780#define ONE (1UL << FRACT_BITS)
782 781
782/*
783 * Given a hash chain and an item in this hash chain,
784 * find if a previous entry has the same hash_inputs
785 * (but differs on tos, mark or oif)
786 * Returns 0 if an alias is found.
787 * Returns ONE if rth has no alias before itself.
788 */
789static int has_noalias(const struct rtable *head, const struct rtable *rth)
790{
791 const struct rtable *aux = head;
792
793 while (aux != rth) {
794 if (compare_hash_inputs(&aux->fl, &rth->fl))
795 return 0;
796 aux = aux->u.dst.rt_next;
797 }
798 return ONE;
799}
800
783static void rt_check_expire(void) 801static void rt_check_expire(void)
784{ 802{
785 static unsigned int rover; 803 static unsigned int rover;
786 unsigned int i = rover, goal; 804 unsigned int i = rover, goal;
787 struct rtable *rth, *aux, **rthp; 805 struct rtable *rth, **rthp;
788 unsigned long samples = 0; 806 unsigned long samples = 0;
789 unsigned long sum = 0, sum2 = 0; 807 unsigned long sum = 0, sum2 = 0;
790 unsigned long delta; 808 unsigned long delta;
@@ -835,15 +853,7 @@ nofree:
835 * attributes don't unfairly skew 853 * attributes don't unfairly skew
836 * the length computation 854 * the length computation
837 */ 855 */
838 for (aux = rt_hash_table[i].chain;;) { 856 length += has_noalias(rt_hash_table[i].chain, rth);
839 if (aux == rth) {
840 length += ONE;
841 break;
842 }
843 if (compare_hash_inputs(&aux->fl, &rth->fl))
844 break;
845 aux = aux->u.dst.rt_next;
846 }
847 continue; 857 continue;
848 } 858 }
849 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) 859 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
@@ -1073,6 +1083,21 @@ work_done:
1073out: return 0; 1083out: return 0;
1074} 1084}
1075 1085
1086/*
1087 * Returns number of entries in a hash chain that have different hash_inputs
1088 */
1089static int slow_chain_length(const struct rtable *head)
1090{
1091 int length = 0;
1092 const struct rtable *rth = head;
1093
1094 while (rth) {
1095 length += has_noalias(head, rth);
1096 rth = rth->u.dst.rt_next;
1097 }
1098 return length >> FRACT_BITS;
1099}
1100
1076static int rt_intern_hash(unsigned hash, struct rtable *rt, 1101static int rt_intern_hash(unsigned hash, struct rtable *rt,
1077 struct rtable **rp, struct sk_buff *skb) 1102 struct rtable **rp, struct sk_buff *skb)
1078{ 1103{
@@ -1185,7 +1210,8 @@ restart:
1185 rt_free(cand); 1210 rt_free(cand);
1186 } 1211 }
1187 } else { 1212 } else {
1188 if (chain_length > rt_chain_length_max) { 1213 if (chain_length > rt_chain_length_max &&
1214 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
1189 struct net *net = dev_net(rt->u.dst.dev); 1215 struct net *net = dev_net(rt->u.dst.dev);
1190 int num = ++net->ipv4.current_rt_cache_rebuild_count; 1216 int num = ++net->ipv4.current_rt_cache_rebuild_count;
1191 if (!rt_caching(dev_net(rt->u.dst.dev))) { 1217 if (!rt_caching(dev_net(rt->u.dst.dev))) {