aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorIlia Sotnikov <hostcc@gmail.com>2006-03-25 04:38:55 -0500
committerDavid S. Miller <davem@davemloft.net>2006-03-25 04:38:55 -0500
commitcef2685e0053945ea0f3c02297386b040f486ea7 (patch)
tree869c11f64c5e2259342c228c80e93cbbbf8e4fc1 /net
parentb8059eadf9f4dc24bd72da71daa832a9a9899fb4 (diff)
[IPV4]: Aggregate route entries with different TOS values
When we get an ICMP need-to-frag message, the original TOS value in the ICMP payload cannot be used as a key to look up the routes to update. This is because the TOS field may have been modified by routers on the way. Similarly, ip_rt_redirect should also ignore the TOS as the router that gave us the message may have modified the TOS value. The patch achieves this objective by aggregating entries with different TOS values (but are otherwise identical) into the same bucket. This makes it easy to update them at the same time when an ICMP message is received. In future we should use a twin-hashing scheme where teh aggregation occurs at the entry level. That is, the TOS goes back into the hash for normal lookups while ICMP lookups will end up with a node that gives us a list that contains all other route entries that differ only by TOS. Signed-off-by: Ilia Sotnikov <hostcc@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/route.c45
2 files changed, 19 insertions, 28 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e7bbff4340b..9831fd2c73a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -753,7 +753,7 @@ static void icmp_redirect(struct sk_buff *skb)
753 case ICMP_REDIR_HOST: 753 case ICMP_REDIR_HOST:
754 case ICMP_REDIR_HOSTTOS: 754 case ICMP_REDIR_HOSTTOS:
755 ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway, 755 ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway,
756 iph->saddr, iph->tos, skb->dev); 756 iph->saddr, skb->dev);
757 break; 757 break;
758 } 758 }
759out: 759out:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fca5fe0cf94..94fcbc5e5a1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -55,6 +55,8 @@
55 * Robert Olsson : Added rt_cache statistics 55 * Robert Olsson : Added rt_cache statistics
56 * Arnaldo C. Melo : Convert proc stuff to seq_file 56 * Arnaldo C. Melo : Convert proc stuff to seq_file
57 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. 57 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
58 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
59 * Ilia Sotnikov : Removed TOS from hash calculations
58 * 60 *
59 * This program is free software; you can redistribute it and/or 61 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License 62 * modify it under the terms of the GNU General Public License
@@ -247,9 +249,9 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
247static int rt_intern_hash(unsigned hash, struct rtable *rth, 249static int rt_intern_hash(unsigned hash, struct rtable *rth,
248 struct rtable **res); 250 struct rtable **res);
249 251
250static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos) 252static unsigned int rt_hash_code(u32 daddr, u32 saddr)
251{ 253{
252 return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd) 254 return (jhash_2words(daddr, saddr, rt_hash_rnd)
253 & rt_hash_mask); 255 & rt_hash_mask);
254} 256}
255 257
@@ -1111,7 +1113,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
1111} 1113}
1112 1114
1113void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, 1115void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
1114 u32 saddr, u8 tos, struct net_device *dev) 1116 u32 saddr, struct net_device *dev)
1115{ 1117{
1116 int i, k; 1118 int i, k;
1117 struct in_device *in_dev = in_dev_get(dev); 1119 struct in_device *in_dev = in_dev_get(dev);
@@ -1119,8 +1121,6 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
1119 u32 skeys[2] = { saddr, 0 }; 1121 u32 skeys[2] = { saddr, 0 };
1120 int ikeys[2] = { dev->ifindex, 0 }; 1122 int ikeys[2] = { dev->ifindex, 0 };
1121 1123
1122 tos &= IPTOS_RT_MASK;
1123
1124 if (!in_dev) 1124 if (!in_dev)
1125 return; 1125 return;
1126 1126
@@ -1141,8 +1141,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
1141 for (i = 0; i < 2; i++) { 1141 for (i = 0; i < 2; i++) {
1142 for (k = 0; k < 2; k++) { 1142 for (k = 0; k < 2; k++) {
1143 unsigned hash = rt_hash_code(daddr, 1143 unsigned hash = rt_hash_code(daddr,
1144 skeys[i] ^ (ikeys[k] << 5), 1144 skeys[i] ^ (ikeys[k] << 5));
1145 tos);
1146 1145
1147 rthp=&rt_hash_table[hash].chain; 1146 rthp=&rt_hash_table[hash].chain;
1148 1147
@@ -1152,7 +1151,6 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
1152 1151
1153 if (rth->fl.fl4_dst != daddr || 1152 if (rth->fl.fl4_dst != daddr ||
1154 rth->fl.fl4_src != skeys[i] || 1153 rth->fl.fl4_src != skeys[i] ||
1155 rth->fl.fl4_tos != tos ||
1156 rth->fl.oif != ikeys[k] || 1154 rth->fl.oif != ikeys[k] ||
1157 rth->fl.iif != 0) { 1155 rth->fl.iif != 0) {
1158 rthp = &rth->u.rt_next; 1156 rthp = &rth->u.rt_next;
@@ -1232,10 +1230,9 @@ reject_redirect:
1232 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 1230 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
1233 printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " 1231 printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about "
1234 "%u.%u.%u.%u ignored.\n" 1232 "%u.%u.%u.%u ignored.\n"
1235 " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, " 1233 " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n",
1236 "tos %02x\n",
1237 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), 1234 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw),
1238 NIPQUAD(saddr), NIPQUAD(daddr), tos); 1235 NIPQUAD(saddr), NIPQUAD(daddr));
1239#endif 1236#endif
1240 in_dev_put(in_dev); 1237 in_dev_put(in_dev);
1241} 1238}
@@ -1253,8 +1250,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1253 rt->u.dst.expires) { 1250 rt->u.dst.expires) {
1254 unsigned hash = rt_hash_code(rt->fl.fl4_dst, 1251 unsigned hash = rt_hash_code(rt->fl.fl4_dst,
1255 rt->fl.fl4_src ^ 1252 rt->fl.fl4_src ^
1256 (rt->fl.oif << 5), 1253 (rt->fl.oif << 5));
1257 rt->fl.fl4_tos);
1258#if RT_CACHE_DEBUG >= 1 1254#if RT_CACHE_DEBUG >= 1
1259 printk(KERN_DEBUG "ip_rt_advice: redirect to " 1255 printk(KERN_DEBUG "ip_rt_advice: redirect to "
1260 "%u.%u.%u.%u/%02x dropped\n", 1256 "%u.%u.%u.%u/%02x dropped\n",
@@ -1391,14 +1387,13 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
1391 struct rtable *rth; 1387 struct rtable *rth;
1392 u32 skeys[2] = { iph->saddr, 0, }; 1388 u32 skeys[2] = { iph->saddr, 0, };
1393 u32 daddr = iph->daddr; 1389 u32 daddr = iph->daddr;
1394 u8 tos = iph->tos & IPTOS_RT_MASK;
1395 unsigned short est_mtu = 0; 1390 unsigned short est_mtu = 0;
1396 1391
1397 if (ipv4_config.no_pmtu_disc) 1392 if (ipv4_config.no_pmtu_disc)
1398 return 0; 1393 return 0;
1399 1394
1400 for (i = 0; i < 2; i++) { 1395 for (i = 0; i < 2; i++) {
1401 unsigned hash = rt_hash_code(daddr, skeys[i], tos); 1396 unsigned hash = rt_hash_code(daddr, skeys[i]);
1402 1397
1403 rcu_read_lock(); 1398 rcu_read_lock();
1404 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1399 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1407,7 +1402,6 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
1407 rth->fl.fl4_src == skeys[i] && 1402 rth->fl.fl4_src == skeys[i] &&
1408 rth->rt_dst == daddr && 1403 rth->rt_dst == daddr &&
1409 rth->rt_src == iph->saddr && 1404 rth->rt_src == iph->saddr &&
1410 rth->fl.fl4_tos == tos &&
1411 rth->fl.iif == 0 && 1405 rth->fl.iif == 0 &&
1412 !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { 1406 !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) {
1413 unsigned short mtu = new_mtu; 1407 unsigned short mtu = new_mtu;
@@ -1658,7 +1652,7 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
1658 RT_CACHE_STAT_INC(in_slow_mc); 1652 RT_CACHE_STAT_INC(in_slow_mc);
1659 1653
1660 in_dev_put(in_dev); 1654 in_dev_put(in_dev);
1661 hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); 1655 hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5));
1662 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); 1656 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
1663 1657
1664e_nobufs: 1658e_nobufs:
@@ -1823,7 +1817,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
1823 return err; 1817 return err;
1824 1818
1825 /* put it into the cache */ 1819 /* put it into the cache */
1826 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); 1820 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5));
1827 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 1821 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
1828} 1822}
1829 1823
@@ -1864,7 +1858,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
1864 return err; 1858 return err;
1865 1859
1866 /* put it into the cache */ 1860 /* put it into the cache */
1867 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); 1861 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5));
1868 err = rt_intern_hash(hash, rth, &rtres); 1862 err = rt_intern_hash(hash, rth, &rtres);
1869 if (err) 1863 if (err)
1870 return err; 1864 return err;
@@ -2041,7 +2035,7 @@ local_input:
2041 rth->rt_flags &= ~RTCF_LOCAL; 2035 rth->rt_flags &= ~RTCF_LOCAL;
2042 } 2036 }
2043 rth->rt_type = res.type; 2037 rth->rt_type = res.type;
2044 hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos); 2038 hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5));
2045 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 2039 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
2046 goto done; 2040 goto done;
2047 2041
@@ -2088,7 +2082,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
2088 int iif = dev->ifindex; 2082 int iif = dev->ifindex;
2089 2083
2090 tos &= IPTOS_RT_MASK; 2084 tos &= IPTOS_RT_MASK;
2091 hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos); 2085 hash = rt_hash_code(daddr, saddr ^ (iif << 5));
2092 2086
2093 rcu_read_lock(); 2087 rcu_read_lock();
2094 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2088 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2286,10 +2280,8 @@ static inline int ip_mkroute_output_def(struct rtable **rp,
2286 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2280 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2287 unsigned hash; 2281 unsigned hash;
2288 if (err == 0) { 2282 if (err == 0) {
2289 u32 tos = RT_FL_TOS(oldflp);
2290
2291 hash = rt_hash_code(oldflp->fl4_dst, 2283 hash = rt_hash_code(oldflp->fl4_dst,
2292 oldflp->fl4_src ^ (oldflp->oif << 5), tos); 2284 oldflp->fl4_src ^ (oldflp->oif << 5));
2293 err = rt_intern_hash(hash, rth, rp); 2285 err = rt_intern_hash(hash, rth, rp);
2294 } 2286 }
2295 2287
@@ -2304,7 +2296,6 @@ static inline int ip_mkroute_output(struct rtable** rp,
2304 unsigned flags) 2296 unsigned flags)
2305{ 2297{
2306#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2298#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2307 u32 tos = RT_FL_TOS(oldflp);
2308 unsigned char hop; 2299 unsigned char hop;
2309 unsigned hash; 2300 unsigned hash;
2310 int err = -EINVAL; 2301 int err = -EINVAL;
@@ -2334,7 +2325,7 @@ static inline int ip_mkroute_output(struct rtable** rp,
2334 2325
2335 hash = rt_hash_code(oldflp->fl4_dst, 2326 hash = rt_hash_code(oldflp->fl4_dst,
2336 oldflp->fl4_src ^ 2327 oldflp->fl4_src ^
2337 (oldflp->oif << 5), tos); 2328 (oldflp->oif << 5));
2338 err = rt_intern_hash(hash, rth, rp); 2329 err = rt_intern_hash(hash, rth, rp);
2339 2330
2340 /* forward hop information to multipath impl. */ 2331 /* forward hop information to multipath impl. */
@@ -2563,7 +2554,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2563 unsigned hash; 2554 unsigned hash;
2564 struct rtable *rth; 2555 struct rtable *rth;
2565 2556
2566 hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos); 2557 hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5));
2567 2558
2568 rcu_read_lock_bh(); 2559 rcu_read_lock_bh();
2569 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2560 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;