diff options
author | Ilia Sotnikov <hostcc@gmail.com> | 2006-03-25 04:38:55 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2006-03-25 04:38:55 -0500 |
commit | cef2685e0053945ea0f3c02297386b040f486ea7 (patch) | |
tree | 869c11f64c5e2259342c228c80e93cbbbf8e4fc1 | |
parent | b8059eadf9f4dc24bd72da71daa832a9a9899fb4 (diff) |
[IPV4]: Aggregate route entries with different TOS values
When we get an ICMP need-to-frag message, the original TOS value in the
ICMP payload cannot be used as a key to look up the routes to update.
This is because the TOS field may have been modified by routers on the
way. Similarly, ip_rt_redirect should also ignore the TOS as the router
that gave us the message may have modified the TOS value.
The patch achieves this objective by aggregating entries with different
TOS values (but are otherwise identical) into the same bucket. This
makes it easy to update them at the same time when an ICMP message is
received.
In future we should use a twin-hashing scheme where teh aggregation
occurs at the entry level. That is, the TOS goes back into the hash
for normal lookups while ICMP lookups will end up with a node that
gives us a list that contains all other route entries that differ
only by TOS.
Signed-off-by: Ilia Sotnikov <hostcc@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/route.h | 2 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 45 |
3 files changed, 20 insertions, 29 deletions
diff --git a/include/net/route.h b/include/net/route.h index 9c04f15090d2..98c915abdec8 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -110,7 +110,7 @@ extern struct ip_rt_acct *ip_rt_acct; | |||
110 | struct in_device; | 110 | struct in_device; |
111 | extern int ip_rt_init(void); | 111 | extern int ip_rt_init(void); |
112 | extern void ip_rt_redirect(u32 old_gw, u32 dst, u32 new_gw, | 112 | extern void ip_rt_redirect(u32 old_gw, u32 dst, u32 new_gw, |
113 | u32 src, u8 tos, struct net_device *dev); | 113 | u32 src, struct net_device *dev); |
114 | extern void ip_rt_advice(struct rtable **rp, int advice); | 114 | extern void ip_rt_advice(struct rtable **rp, int advice); |
115 | extern void rt_cache_flush(int how); | 115 | extern void rt_cache_flush(int how); |
116 | extern int __ip_route_output_key(struct rtable **, const struct flowi *flp); | 116 | extern int __ip_route_output_key(struct rtable **, const struct flowi *flp); |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e7bbff4340bb..9831fd2c73a0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -753,7 +753,7 @@ static void icmp_redirect(struct sk_buff *skb) | |||
753 | case ICMP_REDIR_HOST: | 753 | case ICMP_REDIR_HOST: |
754 | case ICMP_REDIR_HOSTTOS: | 754 | case ICMP_REDIR_HOSTTOS: |
755 | ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway, | 755 | ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway, |
756 | iph->saddr, iph->tos, skb->dev); | 756 | iph->saddr, skb->dev); |
757 | break; | 757 | break; |
758 | } | 758 | } |
759 | out: | 759 | out: |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fca5fe0cf94a..94fcbc5e5a1b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -55,6 +55,8 @@ | |||
55 | * Robert Olsson : Added rt_cache statistics | 55 | * Robert Olsson : Added rt_cache statistics |
56 | * Arnaldo C. Melo : Convert proc stuff to seq_file | 56 | * Arnaldo C. Melo : Convert proc stuff to seq_file |
57 | * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. | 57 | * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. |
58 | * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect | ||
59 | * Ilia Sotnikov : Removed TOS from hash calculations | ||
58 | * | 60 | * |
59 | * This program is free software; you can redistribute it and/or | 61 | * This program is free software; you can redistribute it and/or |
60 | * modify it under the terms of the GNU General Public License | 62 | * modify it under the terms of the GNU General Public License |
@@ -247,9 +249,9 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | |||
247 | static int rt_intern_hash(unsigned hash, struct rtable *rth, | 249 | static int rt_intern_hash(unsigned hash, struct rtable *rth, |
248 | struct rtable **res); | 250 | struct rtable **res); |
249 | 251 | ||
250 | static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos) | 252 | static unsigned int rt_hash_code(u32 daddr, u32 saddr) |
251 | { | 253 | { |
252 | return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd) | 254 | return (jhash_2words(daddr, saddr, rt_hash_rnd) |
253 | & rt_hash_mask); | 255 | & rt_hash_mask); |
254 | } | 256 | } |
255 | 257 | ||
@@ -1111,7 +1113,7 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1111 | } | 1113 | } |
1112 | 1114 | ||
1113 | void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, | 1115 | void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, |
1114 | u32 saddr, u8 tos, struct net_device *dev) | 1116 | u32 saddr, struct net_device *dev) |
1115 | { | 1117 | { |
1116 | int i, k; | 1118 | int i, k; |
1117 | struct in_device *in_dev = in_dev_get(dev); | 1119 | struct in_device *in_dev = in_dev_get(dev); |
@@ -1119,8 +1121,6 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, | |||
1119 | u32 skeys[2] = { saddr, 0 }; | 1121 | u32 skeys[2] = { saddr, 0 }; |
1120 | int ikeys[2] = { dev->ifindex, 0 }; | 1122 | int ikeys[2] = { dev->ifindex, 0 }; |
1121 | 1123 | ||
1122 | tos &= IPTOS_RT_MASK; | ||
1123 | |||
1124 | if (!in_dev) | 1124 | if (!in_dev) |
1125 | return; | 1125 | return; |
1126 | 1126 | ||
@@ -1141,8 +1141,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, | |||
1141 | for (i = 0; i < 2; i++) { | 1141 | for (i = 0; i < 2; i++) { |
1142 | for (k = 0; k < 2; k++) { | 1142 | for (k = 0; k < 2; k++) { |
1143 | unsigned hash = rt_hash_code(daddr, | 1143 | unsigned hash = rt_hash_code(daddr, |
1144 | skeys[i] ^ (ikeys[k] << 5), | 1144 | skeys[i] ^ (ikeys[k] << 5)); |
1145 | tos); | ||
1146 | 1145 | ||
1147 | rthp=&rt_hash_table[hash].chain; | 1146 | rthp=&rt_hash_table[hash].chain; |
1148 | 1147 | ||
@@ -1152,7 +1151,6 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, | |||
1152 | 1151 | ||
1153 | if (rth->fl.fl4_dst != daddr || | 1152 | if (rth->fl.fl4_dst != daddr || |
1154 | rth->fl.fl4_src != skeys[i] || | 1153 | rth->fl.fl4_src != skeys[i] || |
1155 | rth->fl.fl4_tos != tos || | ||
1156 | rth->fl.oif != ikeys[k] || | 1154 | rth->fl.oif != ikeys[k] || |
1157 | rth->fl.iif != 0) { | 1155 | rth->fl.iif != 0) { |
1158 | rthp = &rth->u.rt_next; | 1156 | rthp = &rth->u.rt_next; |
@@ -1232,10 +1230,9 @@ reject_redirect: | |||
1232 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 1230 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) |
1233 | printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " | 1231 | printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " |
1234 | "%u.%u.%u.%u ignored.\n" | 1232 | "%u.%u.%u.%u ignored.\n" |
1235 | " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, " | 1233 | " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n", |
1236 | "tos %02x\n", | ||
1237 | NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), | 1234 | NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), |
1238 | NIPQUAD(saddr), NIPQUAD(daddr), tos); | 1235 | NIPQUAD(saddr), NIPQUAD(daddr)); |
1239 | #endif | 1236 | #endif |
1240 | in_dev_put(in_dev); | 1237 | in_dev_put(in_dev); |
1241 | } | 1238 | } |
@@ -1253,8 +1250,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1253 | rt->u.dst.expires) { | 1250 | rt->u.dst.expires) { |
1254 | unsigned hash = rt_hash_code(rt->fl.fl4_dst, | 1251 | unsigned hash = rt_hash_code(rt->fl.fl4_dst, |
1255 | rt->fl.fl4_src ^ | 1252 | rt->fl.fl4_src ^ |
1256 | (rt->fl.oif << 5), | 1253 | (rt->fl.oif << 5)); |
1257 | rt->fl.fl4_tos); | ||
1258 | #if RT_CACHE_DEBUG >= 1 | 1254 | #if RT_CACHE_DEBUG >= 1 |
1259 | printk(KERN_DEBUG "ip_rt_advice: redirect to " | 1255 | printk(KERN_DEBUG "ip_rt_advice: redirect to " |
1260 | "%u.%u.%u.%u/%02x dropped\n", | 1256 | "%u.%u.%u.%u/%02x dropped\n", |
@@ -1391,14 +1387,13 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) | |||
1391 | struct rtable *rth; | 1387 | struct rtable *rth; |
1392 | u32 skeys[2] = { iph->saddr, 0, }; | 1388 | u32 skeys[2] = { iph->saddr, 0, }; |
1393 | u32 daddr = iph->daddr; | 1389 | u32 daddr = iph->daddr; |
1394 | u8 tos = iph->tos & IPTOS_RT_MASK; | ||
1395 | unsigned short est_mtu = 0; | 1390 | unsigned short est_mtu = 0; |
1396 | 1391 | ||
1397 | if (ipv4_config.no_pmtu_disc) | 1392 | if (ipv4_config.no_pmtu_disc) |
1398 | return 0; | 1393 | return 0; |
1399 | 1394 | ||
1400 | for (i = 0; i < 2; i++) { | 1395 | for (i = 0; i < 2; i++) { |
1401 | unsigned hash = rt_hash_code(daddr, skeys[i], tos); | 1396 | unsigned hash = rt_hash_code(daddr, skeys[i]); |
1402 | 1397 | ||
1403 | rcu_read_lock(); | 1398 | rcu_read_lock(); |
1404 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 1399 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
@@ -1407,7 +1402,6 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) | |||
1407 | rth->fl.fl4_src == skeys[i] && | 1402 | rth->fl.fl4_src == skeys[i] && |
1408 | rth->rt_dst == daddr && | 1403 | rth->rt_dst == daddr && |
1409 | rth->rt_src == iph->saddr && | 1404 | rth->rt_src == iph->saddr && |
1410 | rth->fl.fl4_tos == tos && | ||
1411 | rth->fl.iif == 0 && | 1405 | rth->fl.iif == 0 && |
1412 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { | 1406 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { |
1413 | unsigned short mtu = new_mtu; | 1407 | unsigned short mtu = new_mtu; |
@@ -1658,7 +1652,7 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, | |||
1658 | RT_CACHE_STAT_INC(in_slow_mc); | 1652 | RT_CACHE_STAT_INC(in_slow_mc); |
1659 | 1653 | ||
1660 | in_dev_put(in_dev); | 1654 | in_dev_put(in_dev); |
1661 | hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); | 1655 | hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5)); |
1662 | return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); | 1656 | return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); |
1663 | 1657 | ||
1664 | e_nobufs: | 1658 | e_nobufs: |
@@ -1823,7 +1817,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, | |||
1823 | return err; | 1817 | return err; |
1824 | 1818 | ||
1825 | /* put it into the cache */ | 1819 | /* put it into the cache */ |
1826 | hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); | 1820 | hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); |
1827 | return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); | 1821 | return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); |
1828 | } | 1822 | } |
1829 | 1823 | ||
@@ -1864,7 +1858,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, | |||
1864 | return err; | 1858 | return err; |
1865 | 1859 | ||
1866 | /* put it into the cache */ | 1860 | /* put it into the cache */ |
1867 | hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); | 1861 | hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); |
1868 | err = rt_intern_hash(hash, rth, &rtres); | 1862 | err = rt_intern_hash(hash, rth, &rtres); |
1869 | if (err) | 1863 | if (err) |
1870 | return err; | 1864 | return err; |
@@ -2041,7 +2035,7 @@ local_input: | |||
2041 | rth->rt_flags &= ~RTCF_LOCAL; | 2035 | rth->rt_flags &= ~RTCF_LOCAL; |
2042 | } | 2036 | } |
2043 | rth->rt_type = res.type; | 2037 | rth->rt_type = res.type; |
2044 | hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos); | 2038 | hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5)); |
2045 | err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); | 2039 | err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); |
2046 | goto done; | 2040 | goto done; |
2047 | 2041 | ||
@@ -2088,7 +2082,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, | |||
2088 | int iif = dev->ifindex; | 2082 | int iif = dev->ifindex; |
2089 | 2083 | ||
2090 | tos &= IPTOS_RT_MASK; | 2084 | tos &= IPTOS_RT_MASK; |
2091 | hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos); | 2085 | hash = rt_hash_code(daddr, saddr ^ (iif << 5)); |
2092 | 2086 | ||
2093 | rcu_read_lock(); | 2087 | rcu_read_lock(); |
2094 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2088 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
@@ -2286,10 +2280,8 @@ static inline int ip_mkroute_output_def(struct rtable **rp, | |||
2286 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); | 2280 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); |
2287 | unsigned hash; | 2281 | unsigned hash; |
2288 | if (err == 0) { | 2282 | if (err == 0) { |
2289 | u32 tos = RT_FL_TOS(oldflp); | ||
2290 | |||
2291 | hash = rt_hash_code(oldflp->fl4_dst, | 2283 | hash = rt_hash_code(oldflp->fl4_dst, |
2292 | oldflp->fl4_src ^ (oldflp->oif << 5), tos); | 2284 | oldflp->fl4_src ^ (oldflp->oif << 5)); |
2293 | err = rt_intern_hash(hash, rth, rp); | 2285 | err = rt_intern_hash(hash, rth, rp); |
2294 | } | 2286 | } |
2295 | 2287 | ||
@@ -2304,7 +2296,6 @@ static inline int ip_mkroute_output(struct rtable** rp, | |||
2304 | unsigned flags) | 2296 | unsigned flags) |
2305 | { | 2297 | { |
2306 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | 2298 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED |
2307 | u32 tos = RT_FL_TOS(oldflp); | ||
2308 | unsigned char hop; | 2299 | unsigned char hop; |
2309 | unsigned hash; | 2300 | unsigned hash; |
2310 | int err = -EINVAL; | 2301 | int err = -EINVAL; |
@@ -2334,7 +2325,7 @@ static inline int ip_mkroute_output(struct rtable** rp, | |||
2334 | 2325 | ||
2335 | hash = rt_hash_code(oldflp->fl4_dst, | 2326 | hash = rt_hash_code(oldflp->fl4_dst, |
2336 | oldflp->fl4_src ^ | 2327 | oldflp->fl4_src ^ |
2337 | (oldflp->oif << 5), tos); | 2328 | (oldflp->oif << 5)); |
2338 | err = rt_intern_hash(hash, rth, rp); | 2329 | err = rt_intern_hash(hash, rth, rp); |
2339 | 2330 | ||
2340 | /* forward hop information to multipath impl. */ | 2331 | /* forward hop information to multipath impl. */ |
@@ -2563,7 +2554,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) | |||
2563 | unsigned hash; | 2554 | unsigned hash; |
2564 | struct rtable *rth; | 2555 | struct rtable *rth; |
2565 | 2556 | ||
2566 | hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos); | 2557 | hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5)); |
2567 | 2558 | ||
2568 | rcu_read_lock_bh(); | 2559 | rcu_read_lock_bh(); |
2569 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2560 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |