aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorNeil Horman <nhorman@tuxdriver.com>2009-06-22 06:18:53 -0400
committerDavid S. Miller <davem@davemloft.net>2009-06-23 19:36:26 -0400
commitb6280b47a7a42970d098a3059f4ebe7e55e90d8d (patch)
tree527607fdec0db21c6b49b97c7e8b19671bfe25bf /net
parentd55d87fdff8252d0e2f7c28c2d443aee17e9d70f (diff)
ipv4 routing: Ensure that route cache entries are usable and reclaimable with caching is off
When route caching is disabled (rt_caching returns false), We still use route cache entries that are created and passed into rt_intern_hash once. These routes need to be made usable for the one call path that holds a reference to them, and they need to be reclaimed when they're finished with their use. To be made usable, they need to be associated with a neighbor table entry (which they currently are not), otherwise iproute_finish2 just discards the packet, since we don't know which L2 peer to send the packet to. To do this binding, we need to follow the path a bit higher up in rt_intern_hash, which calls arp_bind_neighbour, but not assign the route entry to the hash table. Currently, if caching is off, we simply assign the route to the rp pointer and are reutrn success. This patch associates us with a neighbor entry first. Secondly, we need to make sure that any single use routes like this are known to the garbage collector when caching is off. If caching is off, and we try to hash in a route, it will leak when its refcount reaches zero. To avoid this, this patch calls rt_free on the route cache entry passed into rt_intern_hash. This places us on the gc list for the route cache garbage collector, so that when its refcount reaches zero, it will be reclaimed (Thanks to Alexey for this suggestion). I've tested this on a local system here, and with these patches in place, I'm able to maintain routed connectivity to remote systems, even if I set /proc/sys/net/ipv4/rt_cache_rebuild_count to -1, which forces rt_caching to return false. Signed-off-by: Neil Horman <nhorman@redhat.com> Reported-by: Jarek Poplawski <jarkao2@gmail.com> Reported-by: Maxime Bizon <mbizon@freebox.fr> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/route.c26
1 files changed, 23 insertions, 3 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 65b3a8b11a6c..278f46f5011b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1093,8 +1093,27 @@ restart:
1093 * If we drop it here, the callers have no way to resolve routes 1093 * If we drop it here, the callers have no way to resolve routes
1094 * when we're not caching. Instead, just point *rp at rt, so 1094 * when we're not caching. Instead, just point *rp at rt, so
1095 * the caller gets a single use out of the route 1095 * the caller gets a single use out of the route
1096 * Note that we do rt_free on this new route entry, so that
1097 * once its refcount hits zero, we are still able to reap it
1098 * (Thanks Alexey)
1099 * Note also the rt_free uses call_rcu. We don't actually
1100 * need rcu protection here, this is just our path to get
1101 * on the route gc list.
1096 */ 1102 */
1097 goto report_and_exit; 1103
1104 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1105 int err = arp_bind_neighbour(&rt->u.dst);
1106 if (err) {
1107 if (net_ratelimit())
1108 printk(KERN_WARNING
1109 "Neighbour table failure & not caching routes.\n");
1110 rt_drop(rt);
1111 return err;
1112 }
1113 }
1114
1115 rt_free(rt);
1116 goto skip_hashing;
1098 } 1117 }
1099 1118
1100 rthp = &rt_hash_table[hash].chain; 1119 rthp = &rt_hash_table[hash].chain;
@@ -1211,7 +1230,8 @@ restart:
1211#if RT_CACHE_DEBUG >= 2 1230#if RT_CACHE_DEBUG >= 2
1212 if (rt->u.dst.rt_next) { 1231 if (rt->u.dst.rt_next) {
1213 struct rtable *trt; 1232 struct rtable *trt;
1214 printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); 1233 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1234 hash, &rt->rt_dst);
1215 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1235 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
1216 printk(" . %pI4", &trt->rt_dst); 1236 printk(" . %pI4", &trt->rt_dst);
1217 printk("\n"); 1237 printk("\n");
@@ -1226,7 +1246,7 @@ restart:
1226 1246
1227 spin_unlock_bh(rt_hash_lock_addr(hash)); 1247 spin_unlock_bh(rt_hash_lock_addr(hash));
1228 1248
1229report_and_exit: 1249skip_hashing:
1230 if (rp) 1250 if (rp)
1231 *rp = rt; 1251 *rp = rt;
1232 else 1252 else