diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2012-07-30 21:08:23 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-07-31 17:41:38 -0400 |
commit | 54764bb647b2e847c512acf8d443df965da35000 (patch) | |
tree | 3918f6f42679d0ebdcef230f28285f99d178be62 /net/ipv4/route.c | |
parent | 5a0d513b622ee41e117fc37e26e27e8ef42e8dae (diff) |
ipv4: Restore old dst_free() behavior.
commit 404e0a8b6a55 (net: ipv4: fix RCU races on dst refcounts) tried
to solve a race but added a problem at device/fib dismantle time :
We really want to call dst_free() as soon as possible, even if sockets
still have dst in their cache.
dst_release() calls in free_fib_info_rcu() are not welcomed.
Root of the problem was that now we also cache output routes (in
nh_rth_output), we must use call_rcu() instead of call_rcu_bh() in
rt_free(), because output route lookups are done in process context.
Based on feedback and initial patch from David Miller (adding another
call_rcu_bh() call in fib, but it appears it was not the right fix)
I left the inet_sk_rx_dst_set() helper and added __rcu attributes
to nh_rth_output and nh_rth_input to better document what is going on in
this code.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 26 |
1 files changed, 17 insertions, 9 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d6eabcfe8a90..2bd107477469 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1199,23 +1199,31 @@ restart: | |||
1199 | fnhe->fnhe_stamp = jiffies; | 1199 | fnhe->fnhe_stamp = jiffies; |
1200 | } | 1200 | } |
1201 | 1201 | ||
1202 | static inline void rt_free(struct rtable *rt) | ||
1203 | { | ||
1204 | call_rcu(&rt->dst.rcu_head, dst_rcu_free); | ||
1205 | } | ||
1206 | |||
1202 | static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) | 1207 | static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) |
1203 | { | 1208 | { |
1204 | struct rtable *orig, *prev, **p = &nh->nh_rth_output; | 1209 | struct rtable *orig, *prev, **p = (struct rtable **)&nh->nh_rth_output; |
1205 | 1210 | ||
1206 | if (rt_is_input_route(rt)) | 1211 | if (rt_is_input_route(rt)) |
1207 | p = &nh->nh_rth_input; | 1212 | p = (struct rtable **)&nh->nh_rth_input; |
1208 | 1213 | ||
1209 | orig = *p; | 1214 | orig = *p; |
1210 | 1215 | ||
1211 | rt->dst.flags |= DST_RCU_FREE; | ||
1212 | dst_hold(&rt->dst); | ||
1213 | prev = cmpxchg(p, orig, rt); | 1216 | prev = cmpxchg(p, orig, rt); |
1214 | if (prev == orig) { | 1217 | if (prev == orig) { |
1215 | if (orig) | 1218 | if (orig) |
1216 | dst_release(&orig->dst); | 1219 | rt_free(orig); |
1217 | } else { | 1220 | } else { |
1218 | dst_release(&rt->dst); | 1221 | /* Routes we intend to cache in the FIB nexthop have |
1222 | * the DST_NOCACHE bit clear. However, if we are | ||
1223 | * unsuccessful at storing this route into the cache | ||
1224 | * we really need to set it. | ||
1225 | */ | ||
1226 | rt->dst.flags |= DST_NOCACHE; | ||
1219 | } | 1227 | } |
1220 | } | 1228 | } |
1221 | 1229 | ||
@@ -1412,7 +1420,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1412 | do_cache = false; | 1420 | do_cache = false; |
1413 | if (res->fi) { | 1421 | if (res->fi) { |
1414 | if (!itag) { | 1422 | if (!itag) { |
1415 | rth = FIB_RES_NH(*res).nh_rth_input; | 1423 | rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); |
1416 | if (rt_cache_valid(rth)) { | 1424 | if (rt_cache_valid(rth)) { |
1417 | skb_dst_set_noref(skb, &rth->dst); | 1425 | skb_dst_set_noref(skb, &rth->dst); |
1418 | goto out; | 1426 | goto out; |
@@ -1574,7 +1582,7 @@ local_input: | |||
1574 | do_cache = false; | 1582 | do_cache = false; |
1575 | if (res.fi) { | 1583 | if (res.fi) { |
1576 | if (!itag) { | 1584 | if (!itag) { |
1577 | rth = FIB_RES_NH(res).nh_rth_input; | 1585 | rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input); |
1578 | if (rt_cache_valid(rth)) { | 1586 | if (rt_cache_valid(rth)) { |
1579 | skb_dst_set_noref(skb, &rth->dst); | 1587 | skb_dst_set_noref(skb, &rth->dst); |
1580 | err = 0; | 1588 | err = 0; |
@@ -1742,7 +1750,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
1742 | if (fi) { | 1750 | if (fi) { |
1743 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); | 1751 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); |
1744 | if (!fnhe) { | 1752 | if (!fnhe) { |
1745 | rth = FIB_RES_NH(*res).nh_rth_output; | 1753 | rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_output); |
1746 | if (rt_cache_valid(rth)) { | 1754 | if (rt_cache_valid(rth)) { |
1747 | dst_hold(&rth->dst); | 1755 | dst_hold(&rth->dst); |
1748 | return rth; | 1756 | return rth; |