diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-05-26 13:27:11 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-05-27 13:39:11 -0400 |
commit | 686a7e32ca7fdd819eb9606abd3db52b77d1479f (patch) | |
tree | 409af64ba9a4685781e5cd6ed455b1927a13348d /net/ipv4/inetpeer.c | |
parent | e7a46b4d0839c2a3aa2e0ae0b145f293f6738498 (diff) |
inetpeer: fix race in unused_list manipulations
Several crashes in cleanup_once() were reported in recent kernels.
Commit d6cc1d642de9 (inetpeer: various changes) added a race in
unlink_from_unused().
One way to avoid taking unused_peers.lock before doing the list_empty()
test is to catch 0->1 refcnt transitions, using full barrier atomic
operations variants (atomic_cmpxchg() and atomic_inc_return()) instead
of previous atomic_inc() and atomic_add_unless() variants.
We then call unlink_from_unused() only for the owner of the 0->1
transition.
Add a new atomic_add_unless_return() static helper
With help from Arun Sharma.
Refs: https://bugzilla.kernel.org/show_bug.cgi?id=32772
Reported-by: Arun Sharma <asharma@fb.com>
Reported-by: Maximilian Engelhardt <maxi@daemonizer.de>
Reported-by: Yann Dupont <Yann.Dupont@univ-nantes.fr>
Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inetpeer.c')
-rw-r--r-- | net/ipv4/inetpeer.c | 42 |
1 files changed, 27 insertions, 15 deletions
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9df4e635fb5f..ce616d92cc54 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -154,11 +154,9 @@ void __init inet_initpeers(void) | |||
154 | /* Called with or without local BH being disabled. */ | 154 | /* Called with or without local BH being disabled. */ |
155 | static void unlink_from_unused(struct inet_peer *p) | 155 | static void unlink_from_unused(struct inet_peer *p) |
156 | { | 156 | { |
157 | if (!list_empty(&p->unused)) { | 157 | spin_lock_bh(&unused_peers.lock); |
158 | spin_lock_bh(&unused_peers.lock); | 158 | list_del_init(&p->unused); |
159 | list_del_init(&p->unused); | 159 | spin_unlock_bh(&unused_peers.lock); |
160 | spin_unlock_bh(&unused_peers.lock); | ||
161 | } | ||
162 | } | 160 | } |
163 | 161 | ||
164 | static int addr_compare(const struct inetpeer_addr *a, | 162 | static int addr_compare(const struct inetpeer_addr *a, |
@@ -205,6 +203,20 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
205 | u; \ | 203 | u; \ |
206 | }) | 204 | }) |
207 | 205 | ||
206 | static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) | ||
207 | { | ||
208 | int cur, old = atomic_read(ptr); | ||
209 | |||
210 | while (old != u) { | ||
211 | *newv = old + a; | ||
212 | cur = atomic_cmpxchg(ptr, old, *newv); | ||
213 | if (cur == old) | ||
214 | return true; | ||
215 | old = cur; | ||
216 | } | ||
217 | return false; | ||
218 | } | ||
219 | |||
208 | /* | 220 | /* |
209 | * Called with rcu_read_lock() | 221 | * Called with rcu_read_lock() |
210 | * Because we hold no lock against a writer, its quite possible we fall | 222 | * Because we hold no lock against a writer, its quite possible we fall |
@@ -213,7 +225,8 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
213 | * We exit from this function if number of links exceeds PEER_MAXDEPTH | 225 | * We exit from this function if number of links exceeds PEER_MAXDEPTH |
214 | */ | 226 | */ |
215 | static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, | 227 | static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, |
216 | struct inet_peer_base *base) | 228 | struct inet_peer_base *base, |
229 | int *newrefcnt) | ||
217 | { | 230 | { |
218 | struct inet_peer *u = rcu_dereference(base->root); | 231 | struct inet_peer *u = rcu_dereference(base->root); |
219 | int count = 0; | 232 | int count = 0; |
@@ -226,7 +239,7 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, | |||
226 | * distinction between an unused entry (refcnt=0) and | 239 | * distinction between an unused entry (refcnt=0) and |
227 | * a freed one. | 240 | * a freed one. |
228 | */ | 241 | */ |
229 | if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1))) | 242 | if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt)) |
230 | u = NULL; | 243 | u = NULL; |
231 | return u; | 244 | return u; |
232 | } | 245 | } |
@@ -465,22 +478,23 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
465 | struct inet_peer_base *base = family_to_base(daddr->family); | 478 | struct inet_peer_base *base = family_to_base(daddr->family); |
466 | struct inet_peer *p; | 479 | struct inet_peer *p; |
467 | unsigned int sequence; | 480 | unsigned int sequence; |
468 | int invalidated; | 481 | int invalidated, newrefcnt = 0; |
469 | 482 | ||
470 | /* Look up for the address quickly, lockless. | 483 | /* Look up for the address quickly, lockless. |
471 | * Because of a concurrent writer, we might not find an existing entry. | 484 | * Because of a concurrent writer, we might not find an existing entry. |
472 | */ | 485 | */ |
473 | rcu_read_lock(); | 486 | rcu_read_lock(); |
474 | sequence = read_seqbegin(&base->lock); | 487 | sequence = read_seqbegin(&base->lock); |
475 | p = lookup_rcu(daddr, base); | 488 | p = lookup_rcu(daddr, base, &newrefcnt); |
476 | invalidated = read_seqretry(&base->lock, sequence); | 489 | invalidated = read_seqretry(&base->lock, sequence); |
477 | rcu_read_unlock(); | 490 | rcu_read_unlock(); |
478 | 491 | ||
479 | if (p) { | 492 | if (p) { |
480 | /* The existing node has been found. | 493 | found: /* The existing node has been found. |
481 | * Remove the entry from unused list if it was there. | 494 | * Remove the entry from unused list if it was there. |
482 | */ | 495 | */ |
483 | unlink_from_unused(p); | 496 | if (newrefcnt == 1) |
497 | unlink_from_unused(p); | ||
484 | return p; | 498 | return p; |
485 | } | 499 | } |
486 | 500 | ||
@@ -494,11 +508,9 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
494 | write_seqlock_bh(&base->lock); | 508 | write_seqlock_bh(&base->lock); |
495 | p = lookup(daddr, stack, base); | 509 | p = lookup(daddr, stack, base); |
496 | if (p != peer_avl_empty) { | 510 | if (p != peer_avl_empty) { |
497 | atomic_inc(&p->refcnt); | 511 | newrefcnt = atomic_inc_return(&p->refcnt); |
498 | write_sequnlock_bh(&base->lock); | 512 | write_sequnlock_bh(&base->lock); |
499 | /* Remove the entry from unused list if it was there. */ | 513 | goto found; |
500 | unlink_from_unused(p); | ||
501 | return p; | ||
502 | } | 514 | } |
503 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; | 515 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; |
504 | if (p) { | 516 | if (p) { |