diff options
author | Neil Horman <nhorman@tuxdriver.com> | 2009-06-22 06:18:53 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-06-23 19:36:26 -0400 |
commit | b6280b47a7a42970d098a3059f4ebe7e55e90d8d (patch) | |
tree | 527607fdec0db21c6b49b97c7e8b19671bfe25bf /net | |
parent | d55d87fdff8252d0e2f7c28c2d443aee17e9d70f (diff) |
ipv4 routing: Ensure that route cache entries are usable and reclaimable with caching is off
When route caching is disabled (rt_caching returns false), We still use route
cache entries that are created and passed into rt_intern_hash once. These
routes need to be made usable for the one call path that holds a reference to
them, and they need to be reclaimed when they're finished with their use. To be
made usable, they need to be associated with a neighbor table entry (which they
currently are not), otherwise iproute_finish2 just discards the packet, since we
don't know which L2 peer to send the packet to. To do this binding, we need to
follow the path a bit higher up in rt_intern_hash, which calls
arp_bind_neighbour, but not assign the route entry to the hash table.
Currently, if caching is off, we simply assign the route to the rp pointer and
are reutrn success. This patch associates us with a neighbor entry first.
Secondly, we need to make sure that any single use routes like this are known to
the garbage collector when caching is off. If caching is off, and we try to
hash in a route, it will leak when its refcount reaches zero. To avoid this,
this patch calls rt_free on the route cache entry passed into rt_intern_hash.
This places us on the gc list for the route cache garbage collector, so that
when its refcount reaches zero, it will be reclaimed (Thanks to Alexey for this
suggestion).
I've tested this on a local system here, and with these patches in place, I'm
able to maintain routed connectivity to remote systems, even if I set
/proc/sys/net/ipv4/rt_cache_rebuild_count to -1, which forces rt_caching to
return false.
Signed-off-by: Neil Horman <nhorman@redhat.com>
Reported-by: Jarek Poplawski <jarkao2@gmail.com>
Reported-by: Maxime Bizon <mbizon@freebox.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/route.c | 26 |
1 files changed, 23 insertions, 3 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 65b3a8b11a6c..278f46f5011b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1093,8 +1093,27 @@ restart: | |||
1093 | * If we drop it here, the callers have no way to resolve routes | 1093 | * If we drop it here, the callers have no way to resolve routes |
1094 | * when we're not caching. Instead, just point *rp at rt, so | 1094 | * when we're not caching. Instead, just point *rp at rt, so |
1095 | * the caller gets a single use out of the route | 1095 | * the caller gets a single use out of the route |
1096 | * Note that we do rt_free on this new route entry, so that | ||
1097 | * once its refcount hits zero, we are still able to reap it | ||
1098 | * (Thanks Alexey) | ||
1099 | * Note also the rt_free uses call_rcu. We don't actually | ||
1100 | * need rcu protection here, this is just our path to get | ||
1101 | * on the route gc list. | ||
1096 | */ | 1102 | */ |
1097 | goto report_and_exit; | 1103 | |
1104 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | ||
1105 | int err = arp_bind_neighbour(&rt->u.dst); | ||
1106 | if (err) { | ||
1107 | if (net_ratelimit()) | ||
1108 | printk(KERN_WARNING | ||
1109 | "Neighbour table failure & not caching routes.\n"); | ||
1110 | rt_drop(rt); | ||
1111 | return err; | ||
1112 | } | ||
1113 | } | ||
1114 | |||
1115 | rt_free(rt); | ||
1116 | goto skip_hashing; | ||
1098 | } | 1117 | } |
1099 | 1118 | ||
1100 | rthp = &rt_hash_table[hash].chain; | 1119 | rthp = &rt_hash_table[hash].chain; |
@@ -1211,7 +1230,8 @@ restart: | |||
1211 | #if RT_CACHE_DEBUG >= 2 | 1230 | #if RT_CACHE_DEBUG >= 2 |
1212 | if (rt->u.dst.rt_next) { | 1231 | if (rt->u.dst.rt_next) { |
1213 | struct rtable *trt; | 1232 | struct rtable *trt; |
1214 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); | 1233 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", |
1234 | hash, &rt->rt_dst); | ||
1215 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) | 1235 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) |
1216 | printk(" . %pI4", &trt->rt_dst); | 1236 | printk(" . %pI4", &trt->rt_dst); |
1217 | printk("\n"); | 1237 | printk("\n"); |
@@ -1226,7 +1246,7 @@ restart: | |||
1226 | 1246 | ||
1227 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1247 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1228 | 1248 | ||
1229 | report_and_exit: | 1249 | skip_hashing: |
1230 | if (rp) | 1250 | if (rp) |
1231 | *rp = rt; | 1251 | *rp = rt; |
1232 | else | 1252 | else |